From b2f680380ddf2f003882e59e00acd6c1952f91fc Mon Sep 17 00:00:00 2001 From: Benjamin LaHaise Date: Wed, 9 Mar 2016 15:05:56 -0500 Subject: x86/mm/32: Add support for 64-bit __get_user() on 32-bit kernels The existing __get_user() implementation does not support fetching 64-bit values on 32-bit x86. Implement this in a way that does not generate any incorrect warnings as cautioned by Russell King. Test code available at: http://www.kvack.org/~bcrl/x86_32-get_user.tar . Signed-off-by: Benjamin LaHaise Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess.h | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index a969ae607be8..8b3fb76b489b 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -333,7 +333,26 @@ do { \ } while (0) #ifdef CONFIG_X86_32 -#define __get_user_asm_u64(x, ptr, retval, errret) (x) = __get_user_bad() +#define __get_user_asm_u64(x, ptr, retval, errret) \ +({ \ + __typeof__(ptr) __ptr = (ptr); \ + asm volatile(ASM_STAC "\n" \ + "1: movl %2,%%eax\n" \ + "2: movl %3,%%edx\n" \ + "3: " ASM_CLAC "\n" \ + ".section .fixup,\"ax\"\n" \ + "4: mov %4,%0\n" \ + " xorl %%eax,%%eax\n" \ + " xorl %%edx,%%edx\n" \ + " jmp 3b\n" \ + ".previous\n" \ + _ASM_EXTABLE(1b, 4b) \ + _ASM_EXTABLE(2b, 4b) \ + : "=r" (retval), "=A"(x) \ + : "m" (__m(__ptr)), "m" __m(((u32 *)(__ptr)) + 1), \ + "i" (errret), "0" (retval)); \ +}) + #define __get_user_asm_ex_u64(x, ptr) (x) = __get_user_bad() #else #define __get_user_asm_u64(x, ptr, retval, errret) \ @@ -420,7 +439,7 @@ do { \ #define __get_user_nocheck(x, ptr, size) \ ({ \ int __gu_err; \ - unsigned long __gu_val; \ + __inttype(*(ptr)) __gu_val; \ __uaccess_begin(); \ __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ __uaccess_end(); \ -- cgit v1.2.3 From 3282e6b8f89eaeaf4915ee6cc57bcf06d1d6cead Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Wed, 4 May 2016 17:50:59 +0100 Subject: x86/topology: Remove redundant ENABLE_TOPO_DEFINES Commit c8e56d20f2d1 ("x86: Kill CONFIG_X86_HT") removed CONFIG_X86_HT and defined ENABLE_TOPO_DEFINES always if CONFIG_SMP, which makes ENABLE_TOPO_DEFINES redundant. This patch removes the redundant ENABLE_TOPO_DEFINES and instead uses CONFIG_SMP directly Signed-off-by: Sudeep Holla Acked-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1462380659-5968-1-git-send-email-sudeep.holla@arm.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/topology.h | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 7f991bd5031b..c9a4ed73aef4 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -25,16 +25,6 @@ #ifndef _ASM_X86_TOPOLOGY_H #define _ASM_X86_TOPOLOGY_H -#ifdef CONFIG_X86_32 -# ifdef CONFIG_SMP -# define ENABLE_TOPO_DEFINES -# endif -#else -# ifdef CONFIG_SMP -# define ENABLE_TOPO_DEFINES -# endif -#endif - /* * to preserve the visibility of NUMA_NO_NODE definition, * moved to there from here. May be used independent of @@ -123,7 +113,7 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu); #define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id) #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) -#ifdef ENABLE_TOPO_DEFINES +#ifdef CONFIG_SMP #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) -- cgit v1.2.3 From f0133acc7d4835cfbb86393b7d2a4fba7519585b Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 8 May 2016 20:58:40 +0200 Subject: x86/cpu: Correct comments and messages in P4 erratum 037 handling code Remove the linebreak in the conditional and s/errata/erratum/ as the singular is "erratum". No functionality change. Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1462733920-7224-1-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index f71a34944b56..5354080f76c3 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -263,15 +263,14 @@ static void intel_workarounds(struct cpuinfo_x86 *c) } /* - * P4 Xeon errata 037 workaround. + * P4 Xeon erratum 037 workaround. * Hardware prefetcher may cause stale data to be loaded into the cache. */ if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { if (msr_set_bit(MSR_IA32_MISC_ENABLE, - MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) - > 0) { + MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) > 0) { pr_info("CPU: C0 stepping P4 Xeon detected.\n"); - pr_info("CPU: Disabling hardware prefetching (Errata 037)\n"); + pr_info("CPU: Disabling hardware prefetching (Erratum 037)\n"); } } -- cgit v1.2.3 From 67d7a982bab6702d84415ea889996fae72a7d3b2 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Tue, 10 May 2016 23:07:02 +0200 Subject: x86/extable: Ensure entries are swapped completely when sorting The x86 exception table sorting was changed in this recent commit: 29934b0fb8ff ("x86/extable: use generic search and sort routines") ... to use the arch independent code in lib/extable.c. However, the patch was mangled somehow on its way into the kernel from the last version posted at: https://lkml.org/lkml/2016/1/27/232 The committed version kind of attempted to incorporate the changes of contemporary commit done in the x86 tree: 548acf19234d ("x86/mm: Expand the exception table logic to allow new handling options") ... as in _completely_ _ignoring_ the x86 specific 'handler' member of struct exception_table_entry. This effectively broke the sorting as entries will only be partly swapped now. Fortunately, the x86 Kconfig selects BUILDTIME_EXTABLE_SORT, so the exception table doesn't need to be sorted at runtime. However, in case that ever changes, we better not break the exception table sorting just because of that. Fix this by providing a swap_ex_entry_fixup() macro that takes care of the 'handler' member. Signed-off-by: Mathias Krause Reviewed-by: Ard Biesheuvel Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Link: http://lkml.kernel.org/r/1462914422-2911-1-git-send-email-minipli@googlemail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 8b3fb76b489b..86c48f359686 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -108,6 +108,14 @@ struct exception_table_entry { #define ARCH_HAS_RELATIVE_EXTABLE +#define swap_ex_entry_fixup(a, b, tmp, delta) \ + do { \ + (a)->fixup = (b)->fixup + (delta); \ + (b)->fixup = (tmp).fixup - (delta); \ + (a)->handler = (b)->handler + (delta); \ + (b)->handler = (tmp).handler - (delta); \ + } while (0) + extern int fixup_exception(struct pt_regs *regs, int trapnr); extern bool ex_has_fault_handler(unsigned long ip); extern int early_fixup_exception(unsigned long *ip); -- cgit v1.2.3 From 20f362785869196fb61a76661a48321169a9046e Mon Sep 17 00:00:00 2001 From: Lukasz Odzioba Date: Mon, 16 May 2016 23:16:18 +0200 Subject: perf/x86/intel: Add 'static' keyword to locally used arrays Add the 'static' keyword to intel_bdw_event_constraints[], snb_events_attrs[], nhm_events_attrs[] and intel_skl_event_constraints arrays[], because they are only used locally. Signed-off-by: Lukasz Odzioba Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: akpm@linux-foundation.org Cc: hpa@zytor.com Cc: kan.liang@intel.com Cc: lukasz.anaczkowski@intel.com Cc: zheng.z.yan@intel.com Link: http://lkml.kernel.org/r/1463433378-16816-1-git-send-email-lukasz.odzioba@intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 7c666958a625..ad08caf1a1b6 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -177,7 +177,7 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly = EVENT_CONSTRAINT_END }; -struct event_constraint intel_skl_event_constraints[] = { +static struct event_constraint intel_skl_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ @@ -225,12 +225,12 @@ EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2"); -struct attribute *nhm_events_attrs[] = { +static struct attribute *nhm_events_attrs[] = { EVENT_PTR(mem_ld_nhm), NULL, }; -struct attribute *snb_events_attrs[] = { +static struct attribute *snb_events_attrs[] = { EVENT_PTR(mem_ld_snb), EVENT_PTR(mem_st_snb), NULL, @@ -258,7 +258,7 @@ static struct event_constraint intel_hsw_event_constraints[] = { EVENT_CONSTRAINT_END }; -struct event_constraint intel_bdw_event_constraints[] = { +static struct event_constraint intel_bdw_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ -- cgit v1.2.3 From 9c489fce7a4a46c8a408e16e126bf3225401c7b5 Mon Sep 17 00:00:00 2001 From: Lukasz Odzioba Date: Mon, 16 May 2016 23:16:59 +0200 Subject: perf/x86/intel: Change offcore response masks for Knights Landing Due to change in register definition we need to update OCR mask: MSR_OFFCORE_RESP0 reserved bits: 3,4,18,29,30,33,34, 8,11,14 MSR_OFFCORE_RESP1 reserved bits: 3,4,18,29,30,33,34, 38 Reported-by: Andi Kleen Signed-off-by: Lukasz Odzioba Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: akpm@linux-foundation.org Cc: hpa@zytor.com Cc: kan.liang@intel.com Cc: lukasz.anaczkowski@intel.com Cc: zheng.z.yan@intel.com Link: http://lkml.kernel.org/r/1463433419-16893-1-git-send-email-lukasz.odzioba@intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index ad08caf1a1b6..0941f846cc71 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -186,10 +186,8 @@ static struct event_constraint intel_skl_event_constraints[] = { }; static struct extra_reg intel_knl_extra_regs[] __read_mostly = { - INTEL_UEVENT_EXTRA_REG(0x01b7, - MSR_OFFCORE_RSP_0, 0x7f9ffbffffull, RSP_0), - INTEL_UEVENT_EXTRA_REG(0x02b7, - MSR_OFFCORE_RSP_1, 0x3f9ffbffffull, RSP_1), + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x799ffbb6e7ull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x399ffbffe7ull, RSP_1), EVENT_EXTRA_END }; -- cgit v1.2.3 From a54fa07930c0f7db55ecb4cc16b86d74101332c0 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Sun, 15 May 2016 23:18:24 -0700 Subject: perf/x86/intel/uncore: Locate specific box by checking full device info Some platforms, e.g. Knights Landing, use a common PCI device ID for multiple instances of an uncore PMU device type. So it is impossible to locate the specific instances only by PCI device ID. The current code specially handles Knights Landing by arbitrarily pointing an instance to an unused uncore box. However, we still have no idea which uncore device is mapped to which box. Furthermore, there could be more platforms which use a common PCI device ID for uncore devices. We have to specially handle them one by one. This patch records full device information (slot, func, and device ID) in id_table[]. So the probe function can point the instance to a specific uncore box by checking the full device information. Tested-by: Lukasz Odzioba Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Acked-by: tglx@linutronix.de Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: bp@suse.de Cc: harish.chegondi@intel.com Cc: hubert.chrzaniuk@intel.com Cc: lawrence.f.meadows@intel.com Link: http://lkml.kernel.org/r/1463379504-39003-1-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore.c | 43 +++++++++++----- arch/x86/events/intel/uncore.h | 4 ++ arch/x86/events/intel/uncore_snbep.c | 96 +++++++++++++++++++++++++++++++++--- 3 files changed, 122 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index fce74062d981..65490589e52e 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -882,7 +882,7 @@ uncore_types_init(struct intel_uncore_type **types, bool setid) static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct intel_uncore_type *type; - struct intel_uncore_pmu *pmu; + struct intel_uncore_pmu *pmu = NULL; struct intel_uncore_box *box; int phys_id, pkg, ret; @@ -903,20 +903,37 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id } type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)]; + /* - * for performance monitoring unit with multiple boxes, - * each box has a different function id. + * Some platforms, e.g. Knights Landing, use a common PCI device ID + * for multiple instances of an uncore PMU device type. We should check + * PCI slot and func to indicate the uncore box. */ - pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)]; - /* Knights Landing uses a common PCI device ID for multiple instances of - * an uncore PMU device type. There is only one entry per device type in - * the knl_uncore_pci_ids table inspite of multiple devices present for - * some device types. Hence PCI device idx would be 0 for all devices. - * So increment pmu pointer to point to an unused array element. - */ - if (boot_cpu_data.x86_model == 87) { - while (pmu->func_id >= 0) - pmu++; + if (id->driver_data & ~0xffff) { + struct pci_driver *pci_drv = pdev->driver; + const struct pci_device_id *ids = pci_drv->id_table; + unsigned int devfn; + + while (ids && ids->vendor) { + if ((ids->vendor == pdev->vendor) && + (ids->device == pdev->device)) { + devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data), + UNCORE_PCI_DEV_FUNC(ids->driver_data)); + if (devfn == pdev->devfn) { + pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)]; + break; + } + } + ids++; + } + if (pmu == NULL) + return -ENODEV; + } else { + /* + * for performance monitoring unit with multiple boxes, + * each box has a different function id. + */ + pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)]; } if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL)) diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index 79766b9a3580..66c3a3657a10 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -15,7 +15,11 @@ #define UNCORE_PMC_IDX_FIXED UNCORE_PMC_IDX_MAX_GENERIC #define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FIXED + 1) +#define UNCORE_PCI_DEV_FULL_DATA(dev, func, type, idx) \ + ((dev << 24) | (func << 16) | (type << 8) | idx) #define UNCORE_PCI_DEV_DATA(type, idx) ((type << 8) | idx) +#define UNCORE_PCI_DEV_DEV(data) ((data >> 24) & 0xff) +#define UNCORE_PCI_DEV_FUNC(data) ((data >> 16) & 0xff) #define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff) #define UNCORE_PCI_DEV_IDX(data) (data & 0xff) #define UNCORE_EXTRA_PCI_DEV 0xff diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index b2625867ebd1..7336e55c248c 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -2164,21 +2164,101 @@ static struct intel_uncore_type *knl_pci_uncores[] = { */ static const struct pci_device_id knl_uncore_pci_ids[] = { - { /* MC UClk */ + { /* MC0 UClk */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841), - .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_UCLK, 0), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(10, 0, KNL_PCI_UNCORE_MC_UCLK, 0), }, - { /* MC DClk Channel */ + { /* MC1 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(11, 0, KNL_PCI_UNCORE_MC_UCLK, 1), + }, + { /* MC0 DClk CH 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 2, KNL_PCI_UNCORE_MC_DCLK, 0), + }, + { /* MC0 DClk CH 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 3, KNL_PCI_UNCORE_MC_DCLK, 1), + }, + { /* MC0 DClk CH 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 4, KNL_PCI_UNCORE_MC_DCLK, 2), + }, + { /* MC1 DClk CH 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 2, KNL_PCI_UNCORE_MC_DCLK, 3), + }, + { /* MC1 DClk CH 1 */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), - .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_DCLK, 0), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 3, KNL_PCI_UNCORE_MC_DCLK, 4), + }, + { /* MC1 DClk CH 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 4, KNL_PCI_UNCORE_MC_DCLK, 5), + }, + { /* EDC0 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(15, 0, KNL_PCI_UNCORE_EDC_UCLK, 0), + }, + { /* EDC1 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(16, 0, KNL_PCI_UNCORE_EDC_UCLK, 1), + }, + { /* EDC2 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(17, 0, KNL_PCI_UNCORE_EDC_UCLK, 2), + }, + { /* EDC3 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 0, KNL_PCI_UNCORE_EDC_UCLK, 3), }, - { /* EDC UClk */ + { /* EDC4 UClk */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), - .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_UCLK, 0), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(19, 0, KNL_PCI_UNCORE_EDC_UCLK, 4), + }, + { /* EDC5 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(20, 0, KNL_PCI_UNCORE_EDC_UCLK, 5), + }, + { /* EDC6 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 0, KNL_PCI_UNCORE_EDC_UCLK, 6), + }, + { /* EDC7 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(22, 0, KNL_PCI_UNCORE_EDC_UCLK, 7), + }, + { /* EDC0 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(24, 2, KNL_PCI_UNCORE_EDC_ECLK, 0), + }, + { /* EDC1 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(25, 2, KNL_PCI_UNCORE_EDC_ECLK, 1), + }, + { /* EDC2 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(26, 2, KNL_PCI_UNCORE_EDC_ECLK, 2), + }, + { /* EDC3 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(27, 2, KNL_PCI_UNCORE_EDC_ECLK, 3), + }, + { /* EDC4 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(28, 2, KNL_PCI_UNCORE_EDC_ECLK, 4), + }, + { /* EDC5 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(29, 2, KNL_PCI_UNCORE_EDC_ECLK, 5), + }, + { /* EDC6 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(30, 2, KNL_PCI_UNCORE_EDC_ECLK, 6), }, - { /* EDC EClk */ + { /* EDC7 EClk */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), - .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_ECLK, 0), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(31, 2, KNL_PCI_UNCORE_EDC_ECLK, 7), }, { /* M2PCIe */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7817), -- cgit v1.2.3 From 70b8301f6b8f7bc053377a9cbd0c4e42e29d9807 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 19 May 2016 17:09:55 -0700 Subject: x86/topology: Add topology_max_smt_threads() For SMT specific workarounds it is useful to know if SMT is active on any online CPU in the system. This currently requires a loop over all online CPUs. Add a global variable that is updated with the maximum number of smt threads on any CPU on online/offline, and use it for topology_max_smt_threads() The single call is easier to use than a loop. Not exported to user space because user space already can use the existing sibling interfaces to find this out. Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: acme@kernel.org Cc: jolsa@kernel.org Link: http://lkml.kernel.org/r/1463703002-19686-2-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/topology.h | 9 +++++++++ arch/x86/kernel/smpboot.c | 25 ++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 7f991bd5031b..e346572841a0 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -129,6 +129,14 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu); extern unsigned int __max_logical_packages; #define topology_max_packages() (__max_logical_packages) + +extern int __max_smt_threads; + +static inline int topology_max_smt_threads(void) +{ + return __max_smt_threads; +} + int topology_update_package_map(unsigned int apicid, unsigned int cpu); extern int topology_phys_to_logical_pkg(unsigned int pkg); #else @@ -136,6 +144,7 @@ extern int topology_phys_to_logical_pkg(unsigned int pkg); static inline int topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; } static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } +static inline int topology_max_smt_threads(void) { return 1; } #endif static inline void arch_fix_phys_package_id(int num, u32 slot) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index fafe8b923cac..2ed0ec1353f8 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -105,6 +105,9 @@ static unsigned int max_physical_pkg_id __read_mostly; unsigned int __max_logical_packages __read_mostly; EXPORT_SYMBOL(__max_logical_packages); +/* Maximum number of SMT threads on any online core */ +int __max_smt_threads __read_mostly; + static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) { unsigned long flags; @@ -493,7 +496,7 @@ void set_cpu_sibling_map(int cpu) bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1; struct cpuinfo_x86 *c = &cpu_data(cpu); struct cpuinfo_x86 *o; - int i; + int i, threads; cpumask_set_cpu(cpu, cpu_sibling_setup_mask); @@ -550,6 +553,10 @@ void set_cpu_sibling_map(int cpu) if (match_die(c, o) && !topology_same_node(c, o)) primarily_use_numa_for_topology(); } + + threads = cpumask_weight(topology_sibling_cpumask(cpu)); + if (threads > __max_smt_threads) + __max_smt_threads = threads; } /* maps the cpu to the sched domain representing multi-core */ @@ -1441,6 +1448,21 @@ __init void prefill_possible_map(void) #ifdef CONFIG_HOTPLUG_CPU +/* Recompute SMT state for all CPUs on offline */ +static void recompute_smt_state(void) +{ + int max_threads, cpu; + + max_threads = 0; + for_each_online_cpu (cpu) { + int threads = cpumask_weight(topology_sibling_cpumask(cpu)); + + if (threads > max_threads) + max_threads = threads; + } + __max_smt_threads = max_threads; +} + static void remove_siblinginfo(int cpu) { int sibling; @@ -1465,6 +1487,7 @@ static void remove_siblinginfo(int cpu) c->phys_proc_id = 0; c->cpu_core_id = 0; cpumask_clear_cpu(cpu, cpu_sibling_setup_mask); + recompute_smt_state(); } static void remove_cpu_from_maps(int cpu) -- cgit v1.2.3 From fc07e9f983b4b11922c22b6cccadc1f342f05a4c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 19 May 2016 17:09:56 -0700 Subject: perf/x86: Support sysfs files depending on SMT status Add a way to show different sysfs events attributes depending on HyperThreading is on or off. This is difficult to determine early at boot, so we just do it dynamically when the sysfs attribute is read. Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: acme@kernel.org Cc: jolsa@kernel.org Link: http://lkml.kernel.org/r/1463703002-19686-3-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 23 +++++++++++++++++++++++ arch/x86/events/perf_event.h | 10 ++++++++++ include/linux/perf_event.h | 7 +++++++ 3 files changed, 40 insertions(+) (limited to 'arch') diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 33787ee817f0..929655db5084 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1622,6 +1622,29 @@ ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, cha } EXPORT_SYMBOL_GPL(events_sysfs_show); +ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr, + char *page) +{ + struct perf_pmu_events_ht_attr *pmu_attr = + container_of(attr, struct perf_pmu_events_ht_attr, attr); + + /* + * Report conditional events depending on Hyper-Threading. + * + * This is overly conservative as usually the HT special + * handling is not needed if the other CPU thread is idle. + * + * Note this does not (and cannot) handle the case when thread + * siblings are invisible, for example with virtualization + * if they are owned by some other guest. The user tool + * has to re-read when a thread sibling gets onlined later. + */ + return sprintf(page, "%s", + topology_max_smt_threads() > 1 ? + pmu_attr->event_str_ht : + pmu_attr->event_str_noht); +} + EVENT_ATTR(cpu-cycles, CPU_CYCLES ); EVENT_ATTR(instructions, INSTRUCTIONS ); EVENT_ATTR(cache-references, CACHE_REFERENCES ); diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 8bd764df815d..e2d7285a2dac 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -668,6 +668,14 @@ static struct perf_pmu_events_attr event_attr_##v = { \ .event_str = str, \ }; +#define EVENT_ATTR_STR_HT(_name, v, noht, ht) \ +static struct perf_pmu_events_ht_attr event_attr_##v = { \ + .attr = __ATTR(_name, 0444, events_ht_sysfs_show, NULL),\ + .id = 0, \ + .event_str_noht = noht, \ + .event_str_ht = ht, \ +} + extern struct x86_pmu x86_pmu __read_mostly; static inline bool x86_pmu_has_lbr_callstack(void) @@ -803,6 +811,8 @@ struct attribute **merge_attr(struct attribute **a, struct attribute **b); ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page); +ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr, + char *page); #ifdef CONFIG_CPU_SUP_AMD diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 92e9ce737432..a7593d653b40 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1334,6 +1334,13 @@ struct perf_pmu_events_attr { const char *event_str; }; +struct perf_pmu_events_ht_attr { + struct device_attribute attr; + u64 id; + const char *event_str_ht; + const char *event_str_noht; +}; + ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr, char *page); -- cgit v1.2.3 From a39fcae7a83629312cc06cee7a745b9a8203327f Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 19 May 2016 17:09:57 -0700 Subject: perf/x86/intel: Add topdown events to Intel Core Add declarations for the events needed for topdown to the Intel big core CPUs starting with Sandy Bridge. We need to report different values if HyperThreading is on or off. The only thing this patch does is to export some events in sysfs. topdown level 1 uses a set of abstracted metrics which are generic to out of order CPU cores (although some CPUs may not implement all of them): topdown-total-slots Available slots in the pipeline topdown-slots-issued Slots issued into the pipeline topdown-slots-retired Slots successfully retired topdown-fetch-bubbles Pipeline gaps in the frontend topdown-recovery-bubbles Pipeline gaps during recovery from misspeculation A slot is a single operation in the CPU pipe line. These metrics then allow to compute four useful metrics: FrontendBound, BackendBound, Retiring, BadSpeculation. The formulas to compute the metrics are generic, they only change based on the availability on the abstracted input values. The kernel declares the events supported by the current CPU and their scaling factors (such as the pipeline width) and perf stat then computes the formulas based on the available metrics. This is similar how existing perf metrics, such as TSC metrics or IPC, are implemented. This abstracts all CPU pipe line specific knowledge in the kernel driver, but still avoids the need for larger scale perf interface changes. For HyperThreading the any bit is needed to get accurate values when both threads are executing. This implies that the events can only be collected as root or with perf_event_paranoid=-1 for now. The basic scheme is based on the following paper: Yasin, A Top Down Method for Performance analysis and Counter architecture ISPASS14 (pdf available via google) Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: acme@kernel.org Cc: jolsa@kernel.org Link: http://lkml.kernel.org/r/1463703002-19686-4-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 50 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'arch') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 0941f846cc71..4f51bc44b89f 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -228,9 +228,46 @@ static struct attribute *nhm_events_attrs[] = { NULL, }; +/* + * topdown events for Intel Core CPUs. + * + * The events are all in slots, which is a free slot in a 4 wide + * pipeline. Some events are already reported in slots, for cycle + * events we multiply by the pipeline width (4). + * + * With Hyper Threading on, topdown metrics are either summed or averaged + * between the threads of a core: (count_t0 + count_t1). + * + * For the average case the metric is always scaled to pipeline width, + * so we use factor 2 ((count_t0 + count_t1) / 2 * 4) + */ + +EVENT_ATTR_STR_HT(topdown-total-slots, td_total_slots, + "event=0x3c,umask=0x0", /* cpu_clk_unhalted.thread */ + "event=0x3c,umask=0x0,any=1"); /* cpu_clk_unhalted.thread_any */ +EVENT_ATTR_STR_HT(topdown-total-slots.scale, td_total_slots_scale, "4", "2"); +EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued, + "event=0xe,umask=0x1"); /* uops_issued.any */ +EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired, + "event=0xc2,umask=0x2"); /* uops_retired.retire_slots */ +EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles, + "event=0x9c,umask=0x1"); /* idq_uops_not_delivered_core */ +EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles, + "event=0xd,umask=0x3,cmask=1", /* int_misc.recovery_cycles */ + "event=0xd,umask=0x3,cmask=1,any=1"); /* int_misc.recovery_cycles_any */ +EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale, + "4", "2"); + static struct attribute *snb_events_attrs[] = { EVENT_PTR(mem_ld_snb), EVENT_PTR(mem_st_snb), + EVENT_PTR(td_slots_issued), + EVENT_PTR(td_slots_retired), + EVENT_PTR(td_fetch_bubbles), + EVENT_PTR(td_total_slots), + EVENT_PTR(td_total_slots_scale), + EVENT_PTR(td_recovery_bubbles), + EVENT_PTR(td_recovery_bubbles_scale), NULL, }; @@ -3435,6 +3472,13 @@ static struct attribute *hsw_events_attrs[] = { EVENT_PTR(cycles_ct), EVENT_PTR(mem_ld_hsw), EVENT_PTR(mem_st_hsw), + EVENT_PTR(td_slots_issued), + EVENT_PTR(td_slots_retired), + EVENT_PTR(td_fetch_bubbles), + EVENT_PTR(td_total_slots), + EVENT_PTR(td_total_slots_scale), + EVENT_PTR(td_recovery_bubbles), + EVENT_PTR(td_recovery_bubbles_scale), NULL }; @@ -3803,6 +3847,12 @@ __init int intel_pmu_init(void) memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); intel_pmu_lbr_init_skl(); + /* INT_MISC.RECOVERY_CYCLES has umask 1 in Skylake */ + event_attr_td_recovery_bubbles.event_str_noht = + "event=0xd,umask=0x1,cmask=1"; + event_attr_td_recovery_bubbles.event_str_ht = + "event=0xd,umask=0x1,cmask=1,any=1"; + x86_pmu.event_constraints = intel_skl_event_constraints; x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints; x86_pmu.extra_regs = intel_skl_extra_regs; -- cgit v1.2.3 From eb12b8ece71cfd4c96df37198b9903fc639768d8 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 19 May 2016 17:09:58 -0700 Subject: perf/x86/intel: Add topdown events to Intel Atom Add topdown event declarations to Silvermont / Airmont. These cores do not support the full Top Down metrics, but an useful subset (FrontendBound, Retiring, Backend Bound/Bad Speculation). The perf stat tool automatically handles the missing events and combines the available metrics. Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: acme@kernel.org Cc: jolsa@kernel.org Link: http://lkml.kernel.org/r/1463703002-19686-5-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'arch') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 4f51bc44b89f..593b1676b5d1 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -1367,6 +1367,29 @@ static __initconst const u64 atom_hw_cache_event_ids }, }; +EVENT_ATTR_STR(topdown-total-slots, td_total_slots_slm, "event=0x3c"); +EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_slm, "2"); +/* no_alloc_cycles.not_delivered */ +EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_slm, + "event=0xca,umask=0x50"); +EVENT_ATTR_STR(topdown-fetch-bubbles.scale, td_fetch_bubbles_scale_slm, "2"); +/* uops_retired.all */ +EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_slm, + "event=0xc2,umask=0x10"); +/* uops_retired.all */ +EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_slm, + "event=0xc2,umask=0x10"); + +static struct attribute *slm_events_attrs[] = { + EVENT_PTR(td_total_slots_slm), + EVENT_PTR(td_total_slots_scale_slm), + EVENT_PTR(td_fetch_bubbles_slm), + EVENT_PTR(td_fetch_bubbles_scale_slm), + EVENT_PTR(td_slots_issued_slm), + EVENT_PTR(td_slots_retired_slm), + NULL +}; + static struct extra_reg intel_slm_extra_regs[] __read_mostly = { /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ @@ -3629,6 +3652,7 @@ __init int intel_pmu_init(void) x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints; x86_pmu.extra_regs = intel_slm_extra_regs; x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.cpu_events = slm_events_attrs; pr_cont("Silvermont events, "); break; -- cgit v1.2.3 From 030ba6cd105c68ce919c5e239853b567490cd059 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 19 May 2016 17:09:59 -0700 Subject: perf/x86/intel: Use new topology_max_smt_threads() in HT leak workaround Now that we have topology_max_smt_threads() use it to detect the HT workarounds for older CPUs. Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: acme@kernel.org Cc: jolsa@kernel.org Link: http://lkml.kernel.org/r/1463703002-19686-6-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 593b1676b5d1..5081b4cdad0d 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3989,16 +3989,14 @@ __init int intel_pmu_init(void) */ static __init int fixup_ht_bug(void) { - int cpu = smp_processor_id(); - int w, c; + int c; /* * problem not present on this CPU model, nothing to do */ if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED)) return 0; - w = cpumask_weight(topology_sibling_cpumask(cpu)); - if (w > 1) { + if (topology_max_smt_threads() > 1) { pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n"); return 0; } -- cgit v1.2.3 From cab43282682e0f46d6a74dd4f54f52595af5eefa Mon Sep 17 00:00:00 2001 From: Wenyou Yang Date: Wed, 8 Jun 2016 12:15:11 +0800 Subject: ARM: at91/dt: sama5d2: Use new compatible for ohci node Use compatible "atmel,sama5d2-ohci" to be capable of suspending ports while sleep to save the power consumption. Signed-off-by: Wenyou Yang Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/sama5d2.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi index 2827e7ab5ebc..5dd2734e67ba 100644 --- a/arch/arm/boot/dts/sama5d2.dtsi +++ b/arch/arm/boot/dts/sama5d2.dtsi @@ -232,7 +232,7 @@ }; usb1: ohci@00400000 { - compatible = "atmel,at91rm9200-ohci", "usb-ohci"; + compatible = "atmel,sama5d2-ohci", "usb-ohci"; reg = <0x00400000 0x100000>; interrupts = <41 IRQ_TYPE_LEVEL_HIGH 2>; clocks = <&uhphs_clk>, <&uhphs_clk>, <&uhpck>; -- cgit v1.2.3 From ef5f9f47d4ec4cf42bac48c7c4dafacc1b9f0630 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 2 Jun 2016 17:19:29 -0700 Subject: perf/x86/intel: Use Intel family macros for core perf events Use the new model number macros instead of spelling things out in the comments. Note that this is missing a Nehalem model that is mentioned in intel_idle which is fixed up in a later patch. The resulting binary (arch/x86/events/intel/core.o) is exactly the same with and without this patch modulo some harmless changes to restoring %esi in the return path of functions, even those untouched by this patch. Signed-off-by: Dave Hansen Cc: Alexander Shishkin Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: jacob.jun.pan@intel.com Link: http://lkml.kernel.org/r/20160603001929.C5F1C079@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 87 ++++++++++++++++++++++---------------------- 1 file changed, 44 insertions(+), 43 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 5081b4cdad0d..3ed528c2370c 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -16,6 +16,7 @@ #include #include +#include #include #include "../perf_event.h" @@ -3319,11 +3320,11 @@ static int intel_snb_pebs_broken(int cpu) u32 rev = UINT_MAX; /* default to broken for unknown models */ switch (cpu_data(cpu).x86_model) { - case 42: /* SNB */ + case INTEL_FAM6_SANDYBRIDGE: rev = 0x28; break; - case 45: /* SNB-EP */ + case INTEL_FAM6_SANDYBRIDGE_X: switch (cpu_data(cpu).x86_mask) { case 6: rev = 0x618; break; case 7: rev = 0x70c; break; @@ -3573,15 +3574,15 @@ __init int intel_pmu_init(void) * Install the hw-cache-events table: */ switch (boot_cpu_data.x86_model) { - case 14: /* 65nm Core "Yonah" */ + case INTEL_FAM6_CORE_YONAH: pr_cont("Core events, "); break; - case 15: /* 65nm Core2 "Merom" */ + case INTEL_FAM6_CORE2_MEROM: x86_add_quirk(intel_clovertown_quirk); - case 22: /* 65nm Core2 "Merom-L" */ - case 23: /* 45nm Core2 "Penryn" */ - case 29: /* 45nm Core2 "Dunnington (MP) */ + case INTEL_FAM6_CORE2_MEROM_L: + case INTEL_FAM6_CORE2_PENRYN: + case INTEL_FAM6_CORE2_DUNNINGTON: memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -3592,9 +3593,9 @@ __init int intel_pmu_init(void) pr_cont("Core2 events, "); break; - case 30: /* 45nm Nehalem */ - case 26: /* 45nm Nehalem-EP */ - case 46: /* 45nm Nehalem-EX */ + case INTEL_FAM6_NEHALEM: + case INTEL_FAM6_NEHALEM_EP: + case INTEL_FAM6_NEHALEM_EX: memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, @@ -3622,11 +3623,11 @@ __init int intel_pmu_init(void) pr_cont("Nehalem events, "); break; - case 28: /* 45nm Atom "Pineview" */ - case 38: /* 45nm Atom "Lincroft" */ - case 39: /* 32nm Atom "Penwell" */ - case 53: /* 32nm Atom "Cloverview" */ - case 54: /* 32nm Atom "Cedarview" */ + case INTEL_FAM6_ATOM_PINEVIEW: + case INTEL_FAM6_ATOM_LINCROFT: + case INTEL_FAM6_ATOM_PENWELL: + case INTEL_FAM6_ATOM_CLOVERVIEW: + case INTEL_FAM6_ATOM_CEDARVIEW: memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -3638,9 +3639,9 @@ __init int intel_pmu_init(void) pr_cont("Atom events, "); break; - case 55: /* 22nm Atom "Silvermont" */ - case 76: /* 14nm Atom "Airmont" */ - case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */ + case INTEL_FAM6_ATOM_SILVERMONT1: + case INTEL_FAM6_ATOM_SILVERMONT2: + case INTEL_FAM6_ATOM_AIRMONT: memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs, @@ -3656,8 +3657,8 @@ __init int intel_pmu_init(void) pr_cont("Silvermont events, "); break; - case 92: /* 14nm Atom "Goldmont" */ - case 95: /* 14nm Atom "Goldmont Denverton" */ + case INTEL_FAM6_ATOM_GOLDMONT: + case INTEL_FAM6_ATOM_DENVERTON: memcpy(hw_cache_event_ids, glm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs, @@ -3680,9 +3681,9 @@ __init int intel_pmu_init(void) pr_cont("Goldmont events, "); break; - case 37: /* 32nm Westmere */ - case 44: /* 32nm Westmere-EP */ - case 47: /* 32nm Westmere-EX */ + case INTEL_FAM6_WESTMERE: + case INTEL_FAM6_WESTMERE_EP: + case INTEL_FAM6_WESTMERE_EX: memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, @@ -3709,8 +3710,8 @@ __init int intel_pmu_init(void) pr_cont("Westmere events, "); break; - case 42: /* 32nm SandyBridge */ - case 45: /* 32nm SandyBridge-E/EN/EP */ + case INTEL_FAM6_SANDYBRIDGE: + case INTEL_FAM6_SANDYBRIDGE_X: x86_add_quirk(intel_sandybridge_quirk); x86_add_quirk(intel_ht_bug); memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, @@ -3723,7 +3724,7 @@ __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_snb_event_constraints; x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; x86_pmu.pebs_aliases = intel_pebs_aliases_snb; - if (boot_cpu_data.x86_model == 45) + if (boot_cpu_data.x86_model == INTEL_FAM6_SANDYBRIDGE_X) x86_pmu.extra_regs = intel_snbep_extra_regs; else x86_pmu.extra_regs = intel_snb_extra_regs; @@ -3745,8 +3746,8 @@ __init int intel_pmu_init(void) pr_cont("SandyBridge events, "); break; - case 58: /* 22nm IvyBridge */ - case 62: /* 22nm IvyBridge-EP/EX */ + case INTEL_FAM6_IVYBRIDGE: + case INTEL_FAM6_IVYBRIDGE_X: x86_add_quirk(intel_ht_bug); memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -3762,7 +3763,7 @@ __init int intel_pmu_init(void) x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints; x86_pmu.pebs_aliases = intel_pebs_aliases_ivb; x86_pmu.pebs_prec_dist = true; - if (boot_cpu_data.x86_model == 62) + if (boot_cpu_data.x86_model == INTEL_FAM6_IVYBRIDGE_X) x86_pmu.extra_regs = intel_snbep_extra_regs; else x86_pmu.extra_regs = intel_snb_extra_regs; @@ -3780,10 +3781,10 @@ __init int intel_pmu_init(void) break; - case 60: /* 22nm Haswell Core */ - case 63: /* 22nm Haswell Server */ - case 69: /* 22nm Haswell ULT */ - case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */ + case INTEL_FAM6_HASWELL_CORE: + case INTEL_FAM6_HASWELL_X: + case INTEL_FAM6_HASWELL_ULT: + case INTEL_FAM6_HASWELL_GT3E: x86_add_quirk(intel_ht_bug); x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -3807,10 +3808,10 @@ __init int intel_pmu_init(void) pr_cont("Haswell events, "); break; - case 61: /* 14nm Broadwell Core-M */ - case 86: /* 14nm Broadwell Xeon D */ - case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */ - case 79: /* 14nm Broadwell Server */ + case INTEL_FAM6_BROADWELL_CORE: + case INTEL_FAM6_BROADWELL_XEON_D: + case INTEL_FAM6_BROADWELL_GT3E: + case INTEL_FAM6_BROADWELL_X: x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); @@ -3843,7 +3844,7 @@ __init int intel_pmu_init(void) pr_cont("Broadwell events, "); break; - case 87: /* Knights Landing Xeon Phi */ + case INTEL_FAM6_XEON_PHI_KNL: memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, @@ -3861,11 +3862,11 @@ __init int intel_pmu_init(void) pr_cont("Knights Landing events, "); break; - case 142: /* 14nm Kabylake Mobile */ - case 158: /* 14nm Kabylake Desktop */ - case 78: /* 14nm Skylake Mobile */ - case 94: /* 14nm Skylake Desktop */ - case 85: /* 14nm Skylake Server */ + case INTEL_FAM6_SKYLAKE_MOBILE: + case INTEL_FAM6_SKYLAKE_DESKTOP: + case INTEL_FAM6_SKYLAKE_X: + case INTEL_FAM6_KABYLAKE_MOBILE: + case INTEL_FAM6_KABYLAKE_DESKTOP: x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); -- cgit v1.2.3 From 7f2236d0bf9a33bb539551b653ae842430654240 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 2 Jun 2016 17:19:30 -0700 Subject: perf/x86/rapl: Use Intel family macros for RAPL Use the new INTEL_FAM6_* macros for rapl.c. Note that this is missing at least one Westmere model and Skylake Server which will we fixed later in this series. The resulting binary structure 'rapl_cpu_match' is the same before and after this patch. Signed-off-by: Dave Hansen Cc: Alexander Shishkin Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Srinivas Pandruvada Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: jacob.jun.pan@intel.com Link: http://lkml.kernel.org/r/20160603001930.6AC50BE3@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/rapl.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index e30eef4f29a6..8012fe6c7c8b 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -55,6 +55,7 @@ #include #include #include +#include #include "../perf_event.h" MODULE_LICENSE("GPL"); @@ -786,26 +787,26 @@ static const struct intel_rapl_init_fun skl_rapl_init __initconst = { }; static const struct x86_cpu_id rapl_cpu_match[] __initconst = { - X86_RAPL_MODEL_MATCH(42, snb_rapl_init), /* Sandy Bridge */ - X86_RAPL_MODEL_MATCH(45, snbep_rapl_init), /* Sandy Bridge-EP */ + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, snb_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_rapl_init), - X86_RAPL_MODEL_MATCH(58, snb_rapl_init), /* Ivy Bridge */ - X86_RAPL_MODEL_MATCH(62, snbep_rapl_init), /* IvyTown */ + X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, snb_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, snbep_rapl_init), - X86_RAPL_MODEL_MATCH(60, hsw_rapl_init), /* Haswell */ - X86_RAPL_MODEL_MATCH(63, hsx_rapl_init), /* Haswell-Server */ - X86_RAPL_MODEL_MATCH(69, hsw_rapl_init), /* Haswell-Celeron */ - X86_RAPL_MODEL_MATCH(70, hsw_rapl_init), /* Haswell GT3e */ + X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hsw_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, hsw_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_rapl_init), - X86_RAPL_MODEL_MATCH(61, hsw_rapl_init), /* Broadwell */ - X86_RAPL_MODEL_MATCH(71, hsw_rapl_init), /* Broadwell-H */ - X86_RAPL_MODEL_MATCH(79, hsx_rapl_init), /* Broadwell-Server */ - X86_RAPL_MODEL_MATCH(86, hsx_rapl_init), /* Broadwell Xeon D */ + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, hsw_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, hsw_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, hsw_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsw_rapl_init), - X86_RAPL_MODEL_MATCH(87, knl_rapl_init), /* Knights Landing */ + X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init), - X86_RAPL_MODEL_MATCH(78, skl_rapl_init), /* Skylake */ - X86_RAPL_MODEL_MATCH(94, skl_rapl_init), /* Skylake H/S */ + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init), {}, }; -- cgit v1.2.3 From 353bf605a771e3c86b21de017e9525aba7d64770 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 2 Jun 2016 17:19:33 -0700 Subject: perf/x86/msr: Use Intel family macros for MSR events code Use the new INTEL_MODEL_* macros for arch/x86/events/msr.c. This code appears to be missing handling for "WESTMERE2" and "SKYLAKE_X". Signed-off-by: Dave Hansen Cc: Alexander Shishkin Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: jacob.jun.pan@intel.com Link: http://lkml.kernel.org/r/20160603001933.99A402B0@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/msr.c | 47 ++++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 23 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index 85ef3c2e80e0..83cf13e368cd 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -1,4 +1,5 @@ #include +#include enum perf_msr_id { PERF_MSR_TSC = 0, @@ -34,39 +35,39 @@ static bool test_intel(int idx) return false; switch (boot_cpu_data.x86_model) { - case 30: /* 45nm Nehalem */ - case 26: /* 45nm Nehalem-EP */ - case 46: /* 45nm Nehalem-EX */ + case INTEL_FAM6_NEHALEM: + case INTEL_FAM6_NEHALEM_EP: + case INTEL_FAM6_NEHALEM_EX: - case 37: /* 32nm Westmere */ - case 44: /* 32nm Westmere-EP */ - case 47: /* 32nm Westmere-EX */ + case INTEL_FAM6_WESTMERE: + case INTEL_FAM6_WESTMERE_EP: + case INTEL_FAM6_WESTMERE_EX: - case 42: /* 32nm SandyBridge */ - case 45: /* 32nm SandyBridge-E/EN/EP */ + case INTEL_FAM6_SANDYBRIDGE: + case INTEL_FAM6_SANDYBRIDGE_X: - case 58: /* 22nm IvyBridge */ - case 62: /* 22nm IvyBridge-EP/EX */ + case INTEL_FAM6_IVYBRIDGE: + case INTEL_FAM6_IVYBRIDGE_X: - case 60: /* 22nm Haswell Core */ - case 63: /* 22nm Haswell Server */ - case 69: /* 22nm Haswell ULT */ - case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */ + case INTEL_FAM6_HASWELL_CORE: + case INTEL_FAM6_HASWELL_X: + case INTEL_FAM6_HASWELL_ULT: + case INTEL_FAM6_HASWELL_GT3E: - case 61: /* 14nm Broadwell Core-M */ - case 86: /* 14nm Broadwell Xeon D */ - case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */ - case 79: /* 14nm Broadwell Server */ + case INTEL_FAM6_BROADWELL_CORE: + case INTEL_FAM6_BROADWELL_XEON_D: + case INTEL_FAM6_BROADWELL_GT3E: + case INTEL_FAM6_BROADWELL_X: - case 55: /* 22nm Atom "Silvermont" */ - case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */ - case 76: /* 14nm Atom "Airmont" */ + case INTEL_FAM6_ATOM_SILVERMONT1: + case INTEL_FAM6_ATOM_SILVERMONT2: + case INTEL_FAM6_ATOM_AIRMONT: if (idx == PERF_MSR_SMI) return true; break; - case 78: /* 14nm Skylake Mobile */ - case 94: /* 14nm Skylake Desktop */ + case INTEL_FAM6_SKYLAKE_MOBILE: + case INTEL_FAM6_SKYLAKE_DESKTOP: if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF) return true; break; -- cgit v1.2.3 From 5134596caee9e834d2486edc45efad4c9e6effc3 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 2 Jun 2016 17:19:35 -0700 Subject: perf/x86/msr: Add missing Intel models This patch presumes that Kabylake and Skylake Server will be the same as the existing Skylake parts and adds them to the MSR events code. Also add handling for "WESTMERE2". Signed-off-by: Dave Hansen Cc: Alexander Shishkin Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: jacob.jun.pan@intel.com Link: http://lkml.kernel.org/r/20160603001935.FE6B3847@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/msr.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index 83cf13e368cd..50b3a056f96b 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -40,6 +40,7 @@ static bool test_intel(int idx) case INTEL_FAM6_NEHALEM_EX: case INTEL_FAM6_WESTMERE: + case INTEL_FAM6_WESTMERE2: case INTEL_FAM6_WESTMERE_EP: case INTEL_FAM6_WESTMERE_EX: @@ -68,6 +69,9 @@ static bool test_intel(int idx) case INTEL_FAM6_SKYLAKE_MOBILE: case INTEL_FAM6_SKYLAKE_DESKTOP: + case INTEL_FAM6_SKYLAKE_X: + case INTEL_FAM6_KABYLAKE_MOBILE: + case INTEL_FAM6_KABYLAKE_DESKTOP: if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF) return true; break; -- cgit v1.2.3 From bf4ad54199333d10c212499b57f26ffeb8222c81 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 2 Jun 2016 17:19:40 -0700 Subject: perf/x86/cstate: Use Intel Model name macros This should be getting old by now. Use the new macros intead of open-coded magic numbers. Signed-off-by: Dave Hansen Cc: Alexander Shishkin Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: jacob.jun.pan@intel.com Link: http://lkml.kernel.org/r/20160603001940.FE69D646@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/cstate.c | 47 +++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 23 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 9ba4e4136a15..4c7638b91fa5 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -89,6 +89,7 @@ #include #include #include +#include #include "../perf_event.h" MODULE_LICENSE("GPL"); @@ -511,37 +512,37 @@ static const struct cstate_model slm_cstates __initconst = { { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) } static const struct x86_cpu_id intel_cstates_match[] __initconst = { - X86_CSTATES_MODEL(30, nhm_cstates), /* 45nm Nehalem */ - X86_CSTATES_MODEL(26, nhm_cstates), /* 45nm Nehalem-EP */ - X86_CSTATES_MODEL(46, nhm_cstates), /* 45nm Nehalem-EX */ + X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM, nhm_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EP, nhm_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EX, nhm_cstates), - X86_CSTATES_MODEL(37, nhm_cstates), /* 32nm Westmere */ - X86_CSTATES_MODEL(44, nhm_cstates), /* 32nm Westmere-EP */ - X86_CSTATES_MODEL(47, nhm_cstates), /* 32nm Westmere-EX */ + X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE, nhm_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EP, nhm_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EX, nhm_cstates), - X86_CSTATES_MODEL(42, snb_cstates), /* 32nm SandyBridge */ - X86_CSTATES_MODEL(45, snb_cstates), /* 32nm SandyBridge-E/EN/EP */ + X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE_X, snb_cstates), - X86_CSTATES_MODEL(58, snb_cstates), /* 22nm IvyBridge */ - X86_CSTATES_MODEL(62, snb_cstates), /* 22nm IvyBridge-EP/EX */ + X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE_X, snb_cstates), - X86_CSTATES_MODEL(60, snb_cstates), /* 22nm Haswell Core */ - X86_CSTATES_MODEL(63, snb_cstates), /* 22nm Haswell Server */ - X86_CSTATES_MODEL(70, snb_cstates), /* 22nm Haswell + GT3e */ + X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_CORE, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_X, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_GT3E, snb_cstates), - X86_CSTATES_MODEL(69, hswult_cstates), /* 22nm Haswell ULT */ + X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_ULT, hswult_cstates), - X86_CSTATES_MODEL(55, slm_cstates), /* 22nm Atom Silvermont */ - X86_CSTATES_MODEL(77, slm_cstates), /* 22nm Atom Avoton/Rangely */ - X86_CSTATES_MODEL(76, slm_cstates), /* 22nm Atom Airmont */ + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT1, slm_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT2, slm_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_AIRMONT, slm_cstates), - X86_CSTATES_MODEL(61, snb_cstates), /* 14nm Broadwell Core-M */ - X86_CSTATES_MODEL(86, snb_cstates), /* 14nm Broadwell Xeon D */ - X86_CSTATES_MODEL(71, snb_cstates), /* 14nm Broadwell + GT3e */ - X86_CSTATES_MODEL(79, snb_cstates), /* 14nm Broadwell Server */ + X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_CORE, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_XEON_D, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_GT3E, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_X, snb_cstates), - X86_CSTATES_MODEL(78, snb_cstates), /* 14nm Skylake Mobile */ - X86_CSTATES_MODEL(94, snb_cstates), /* 14nm Skylake Desktop */ + X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates), { }, }; MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); -- cgit v1.2.3 From a07301ab3dabd1e31696c1bf1775aba24eb7573d Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 2 Jun 2016 17:19:42 -0700 Subject: perf/x86/uncore: Use Intel family name macros for uncore Another straightforward replacement of magic numbers Signed-off-by: Dave Hansen Cc: Alexander Shishkin Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: jacob.jun.pan@intel.com Link: http://lkml.kernel.org/r/20160603001942.537570B6@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore.c | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 65490589e52e..4e70d2721249 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1,4 +1,5 @@ #include +#include #include "uncore.h" static struct intel_uncore_type *empty_uncore[] = { NULL, }; @@ -1382,26 +1383,26 @@ static const struct intel_uncore_init_fun skl_uncore_init __initconst = { }; static const struct x86_cpu_id intel_uncore_match[] __initconst = { - X86_UNCORE_MODEL_MATCH(26, nhm_uncore_init), /* Nehalem */ - X86_UNCORE_MODEL_MATCH(30, nhm_uncore_init), - X86_UNCORE_MODEL_MATCH(37, nhm_uncore_init), /* Westmere */ - X86_UNCORE_MODEL_MATCH(44, nhm_uncore_init), - X86_UNCORE_MODEL_MATCH(42, snb_uncore_init), /* Sandy Bridge */ - X86_UNCORE_MODEL_MATCH(58, ivb_uncore_init), /* Ivy Bridge */ - X86_UNCORE_MODEL_MATCH(60, hsw_uncore_init), /* Haswell */ - X86_UNCORE_MODEL_MATCH(69, hsw_uncore_init), /* Haswell Celeron */ - X86_UNCORE_MODEL_MATCH(70, hsw_uncore_init), /* Haswell */ - X86_UNCORE_MODEL_MATCH(61, bdw_uncore_init), /* Broadwell */ - X86_UNCORE_MODEL_MATCH(71, bdw_uncore_init), /* Broadwell */ - X86_UNCORE_MODEL_MATCH(45, snbep_uncore_init), /* Sandy Bridge-EP */ - X86_UNCORE_MODEL_MATCH(46, nhmex_uncore_init), /* Nehalem-EX */ - X86_UNCORE_MODEL_MATCH(47, nhmex_uncore_init), /* Westmere-EX aka. Xeon E7 */ - X86_UNCORE_MODEL_MATCH(62, ivbep_uncore_init), /* Ivy Bridge-EP */ - X86_UNCORE_MODEL_MATCH(63, hswep_uncore_init), /* Haswell-EP */ - X86_UNCORE_MODEL_MATCH(79, bdx_uncore_init), /* BDX-EP */ - X86_UNCORE_MODEL_MATCH(86, bdx_uncore_init), /* BDX-DE */ - X86_UNCORE_MODEL_MATCH(87, knl_uncore_init), /* Knights Landing */ - X86_UNCORE_MODEL_MATCH(94, skl_uncore_init), /* SkyLake */ + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EP, nhm_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM, nhm_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE, nhm_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EP, nhm_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, snb_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, ivb_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, hsw_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, bdw_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, bdw_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EX, nhmex_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EX, nhmex_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, ivbep_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hswep_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, bdx_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, bdx_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP,skl_uncore_init), {}, }; -- cgit v1.2.3 From 348c5ac6c7dc117e1de095bf07c86c31101d56f3 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Thu, 2 Jun 2016 17:19:53 -0700 Subject: perf/x86/rapl: Add Skylake server model detection SKX uses similar RAPL interface as Broadwell server. Signed-off-by: Jacob Pan Signed-off-by: Dave Hansen Cc: Alexander Shishkin Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: jacob.jun.pan@intel.com Link: http://lkml.kernel.org/r/20160603001953.38848836@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/rapl.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 8012fe6c7c8b..d0c58b35155f 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -807,6 +807,7 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = { X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, hsx_rapl_init), {}, }; -- cgit v1.2.3 From d5e0c89a8ccde900c3245474915ea0f518abdb79 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 2 Jun 2016 17:19:39 -0700 Subject: x86/platform: Use new Intel model number macros Remove the open-coded model numbers. Signed-off-by: Dave Hansen Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jacob Pan Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Srinivas Pandruvada Cc: Thomas Gleixner Cc: jacob.jun.pan@intel.com Link: http://lkml.kernel.org/r/20160603001939.D1D7FC2F@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/x86/platform/atom/punit_atom_debug.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/platform/atom/punit_atom_debug.c b/arch/x86/platform/atom/punit_atom_debug.c index 81c769e80614..109782996867 100644 --- a/arch/x86/platform/atom/punit_atom_debug.c +++ b/arch/x86/platform/atom/punit_atom_debug.c @@ -23,6 +23,7 @@ #include #include #include +#include #include /* Power gate status reg */ @@ -143,8 +144,8 @@ static void punit_dbgfs_unregister(void) (kernel_ulong_t)&drv_data } static const struct x86_cpu_id intel_punit_cpu_ids[] = { - ICPU(55, punit_device_byt), /* Valleyview, Bay Trail */ - ICPU(76, punit_device_cht), /* Braswell, Cherry Trail */ + ICPU(INTEL_FAM6_ATOM_SILVERMONT1, punit_device_byt), + ICPU(INTEL_FAM6_ATOM_AIRMONT, punit_device_cht), {} }; -- cgit v1.2.3 From d1898b733619bd46194bd25aa6452d238ff2dc4e Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 1 Jun 2016 10:42:20 -0700 Subject: x86/fpu: Add tracepoints to dump FPU state at key points I've been carrying this patch around for a bit and it's helped me solve at least a couple FPU-related bugs. In addition to using it for debugging, I also drug it out because using AVX (and AVX2/AVX-512) can have serious power consequences for a modern core. It's very important to be able to figure out who is using it. It's also insanely useful to go out and see who is using a given feature, like MPX or Memory Protection Keys. If you, for instance, want to find all processes using protection keys, you can do: echo 'xfeatures & 0x200' > filter Since 0x200 is the protection keys feature bit. Note that this touches the KVM code. KVM did a CREATE_TRACE_POINTS and then included a bunch of random headers. If anyone one of those included other tracepoints, it would have defined the *OTHER* tracepoints. That's bogus, so move it to the right place. Signed-off-by: Dave Hansen Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Steven Rostedt Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20160601174220.3CDFB90E@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/internal.h | 5 ++ arch/x86/include/asm/trace/fpu.h | 119 ++++++++++++++++++++++++++++++++++++ arch/x86/kernel/fpu/core.c | 18 ++++++ arch/x86/kernel/fpu/signal.c | 3 + arch/x86/kvm/x86.c | 6 +- 5 files changed, 148 insertions(+), 3 deletions(-) create mode 100644 arch/x86/include/asm/trace/fpu.h (limited to 'arch') diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 31ac8e6d9f36..116b58347501 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -18,6 +18,7 @@ #include #include #include +#include /* * High level FPU state handling functions: @@ -524,6 +525,7 @@ static inline void __fpregs_deactivate(struct fpu *fpu) fpu->fpregs_active = 0; this_cpu_write(fpu_fpregs_owner_ctx, NULL); + trace_x86_fpu_regs_deactivated(fpu); } /* Must be paired with a 'clts' (fpregs_activate_hw()) before! */ @@ -533,6 +535,7 @@ static inline void __fpregs_activate(struct fpu *fpu) fpu->fpregs_active = 1; this_cpu_write(fpu_fpregs_owner_ctx, fpu); + trace_x86_fpu_regs_activated(fpu); } /* @@ -604,11 +607,13 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) /* But leave fpu_fpregs_owner_ctx! */ old_fpu->fpregs_active = 0; + trace_x86_fpu_regs_deactivated(old_fpu); /* Don't change CR0.TS if we just switch! */ if (fpu.preload) { new_fpu->counter++; __fpregs_activate(new_fpu); + trace_x86_fpu_regs_activated(new_fpu); prefetch(&new_fpu->state); } else { __fpregs_deactivate_hw(); diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h new file mode 100644 index 000000000000..9217ab1f5bf6 --- /dev/null +++ b/arch/x86/include/asm/trace/fpu.h @@ -0,0 +1,119 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM x86_fpu + +#if !defined(_TRACE_FPU_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_FPU_H + +#include + +DECLARE_EVENT_CLASS(x86_fpu, + TP_PROTO(struct fpu *fpu), + TP_ARGS(fpu), + + TP_STRUCT__entry( + __field(struct fpu *, fpu) + __field(bool, fpregs_active) + __field(bool, fpstate_active) + __field(int, counter) + __field(u64, xfeatures) + __field(u64, xcomp_bv) + ), + + TP_fast_assign( + __entry->fpu = fpu; + __entry->fpregs_active = fpu->fpregs_active; + __entry->fpstate_active = fpu->fpstate_active; + __entry->counter = fpu->counter; + if (boot_cpu_has(X86_FEATURE_OSXSAVE)) { + __entry->xfeatures = fpu->state.xsave.header.xfeatures; + __entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv; + } + ), + TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d counter: %d xfeatures: %llx xcomp_bv: %llx", + __entry->fpu, + __entry->fpregs_active, + __entry->fpstate_active, + __entry->counter, + __entry->xfeatures, + __entry->xcomp_bv + ) +); + +DEFINE_EVENT(x86_fpu, x86_fpu_state, + TP_PROTO(struct fpu *fpu), + TP_ARGS(fpu) +); + +DEFINE_EVENT(x86_fpu, x86_fpu_before_save, + TP_PROTO(struct fpu *fpu), + TP_ARGS(fpu) +); + +DEFINE_EVENT(x86_fpu, x86_fpu_after_save, + TP_PROTO(struct fpu *fpu), + TP_ARGS(fpu) +); + +DEFINE_EVENT(x86_fpu, x86_fpu_before_restore, + TP_PROTO(struct fpu *fpu), + TP_ARGS(fpu) +); + +DEFINE_EVENT(x86_fpu, x86_fpu_after_restore, + TP_PROTO(struct fpu *fpu), + TP_ARGS(fpu) +); + +DEFINE_EVENT(x86_fpu, x86_fpu_regs_activated, + TP_PROTO(struct fpu *fpu), + TP_ARGS(fpu) +); + +DEFINE_EVENT(x86_fpu, x86_fpu_regs_deactivated, + TP_PROTO(struct fpu *fpu), + TP_ARGS(fpu) +); + +DEFINE_EVENT(x86_fpu, x86_fpu_activate_state, + TP_PROTO(struct fpu *fpu), + TP_ARGS(fpu) +); + +DEFINE_EVENT(x86_fpu, x86_fpu_deactivate_state, + TP_PROTO(struct fpu *fpu), + TP_ARGS(fpu) +); + +DEFINE_EVENT(x86_fpu, x86_fpu_init_state, + TP_PROTO(struct fpu *fpu), + TP_ARGS(fpu) +); + +DEFINE_EVENT(x86_fpu, x86_fpu_dropped, + TP_PROTO(struct fpu *fpu), + TP_ARGS(fpu) +); + +DEFINE_EVENT(x86_fpu, x86_fpu_copy_src, + TP_PROTO(struct fpu *fpu), + TP_ARGS(fpu) +); + +DEFINE_EVENT(x86_fpu, x86_fpu_copy_dst, + TP_PROTO(struct fpu *fpu), + TP_ARGS(fpu) +); + +DEFINE_EVENT(x86_fpu, x86_fpu_xstate_check_failed, + TP_PROTO(struct fpu *fpu), + TP_ARGS(fpu) +); + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH asm/trace/ +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE fpu +#endif /* _TRACE_FPU_H */ + +/* This part must be outside protection */ +#include diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 97027545a72d..7d564742e499 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -12,6 +12,9 @@ #include +#define CREATE_TRACE_POINTS +#include + /* * Represents the initial FPU state. It's mostly (but not completely) zeroes, * depending on the FPU hardware format: @@ -192,6 +195,7 @@ void fpu__save(struct fpu *fpu) WARN_ON_FPU(fpu != ¤t->thread.fpu); preempt_disable(); + trace_x86_fpu_before_save(fpu); if (fpu->fpregs_active) { if (!copy_fpregs_to_fpstate(fpu)) { if (use_eager_fpu()) @@ -200,6 +204,7 @@ void fpu__save(struct fpu *fpu) fpregs_deactivate(fpu); } } + trace_x86_fpu_after_save(fpu); preempt_enable(); } EXPORT_SYMBOL_GPL(fpu__save); @@ -275,6 +280,9 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) } preempt_enable(); + trace_x86_fpu_copy_src(src_fpu); + trace_x86_fpu_copy_dst(dst_fpu); + return 0; } @@ -288,7 +296,9 @@ void fpu__activate_curr(struct fpu *fpu) if (!fpu->fpstate_active) { fpstate_init(&fpu->state); + trace_x86_fpu_init_state(fpu); + trace_x86_fpu_activate_state(fpu); /* Safe to do for the current task: */ fpu->fpstate_active = 1; } @@ -314,7 +324,9 @@ void fpu__activate_fpstate_read(struct fpu *fpu) } else { if (!fpu->fpstate_active) { fpstate_init(&fpu->state); + trace_x86_fpu_init_state(fpu); + trace_x86_fpu_activate_state(fpu); /* Safe to do for current and for stopped child tasks: */ fpu->fpstate_active = 1; } @@ -347,7 +359,9 @@ void fpu__activate_fpstate_write(struct fpu *fpu) fpu->last_cpu = -1; } else { fpstate_init(&fpu->state); + trace_x86_fpu_init_state(fpu); + trace_x86_fpu_activate_state(fpu); /* Safe to do for stopped child tasks: */ fpu->fpstate_active = 1; } @@ -432,9 +446,11 @@ void fpu__restore(struct fpu *fpu) /* Avoid __kernel_fpu_begin() right after fpregs_activate() */ kernel_fpu_disable(); + trace_x86_fpu_before_restore(fpu); fpregs_activate(fpu); copy_kernel_to_fpregs(&fpu->state); fpu->counter++; + trace_x86_fpu_after_restore(fpu); kernel_fpu_enable(); } EXPORT_SYMBOL_GPL(fpu__restore); @@ -463,6 +479,8 @@ void fpu__drop(struct fpu *fpu) fpu->fpstate_active = 0; + trace_x86_fpu_dropped(fpu); + preempt_enable(); } diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 31c6a60505e6..c6f2a3cee2c2 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -10,6 +10,7 @@ #include #include +#include static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; @@ -282,6 +283,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) */ state_size = sizeof(struct fxregs_state); fx_only = 1; + trace_x86_fpu_xstate_check_failed(fpu); } else { state_size = fx_sw_user.xstate_size; xfeatures = fx_sw_user.xfeatures; @@ -311,6 +313,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) if (__copy_from_user(&fpu->state.xsave, buf_fx, state_size) || __copy_from_user(&env, buf, sizeof(env))) { fpstate_init(&fpu->state); + trace_x86_fpu_init_state(fpu); err = -1; } else { sanitize_restored_xstate(tsk, &env, xfeatures, fx_only); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 902d9da12392..1ba3b7d3cae9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -55,9 +55,6 @@ #include #include -#define CREATE_TRACE_POINTS -#include "trace.h" - #include #include #include @@ -68,6 +65,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include "trace.h" + #define MAX_IO_MSRS 256 #define KVM_MAX_MCE_BANKS 32 #define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P) -- cgit v1.2.3 From f5967101e9de12addcda4510dfbac66d7c5779c3 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 30 May 2016 12:56:27 +0200 Subject: x86/hweight: Get rid of the special calling convention People complained about ARCH_HWEIGHT_CFLAGS and how it throws a wrench into kcov, lto, etc, experimentations. Add asm versions for __sw_hweight{32,64}() and do explicit saving and restoring of clobbered registers. This gets rid of the special calling convention. We get to call those functions on !X86_FEATURE_POPCNT CPUs. We still need to hardcode POPCNT and register operands as some old gas versions which we support, do not know about POPCNT. Btw, remove redundant REX prefix from 32-bit POPCNT because alternatives can do padding now. Suggested-by: H. Peter Anvin Signed-off-by: Borislav Petkov Acked-by: Peter Zijlstra (Intel) Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1464605787-20603-1-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 5 --- arch/x86/include/asm/arch_hweight.h | 24 +++++------- arch/x86/kernel/i386_ksyms_32.c | 2 + arch/x86/kernel/x8664_ksyms_64.c | 3 ++ arch/x86/lib/Makefile | 2 +- arch/x86/lib/hweight.S | 77 +++++++++++++++++++++++++++++++++++++ lib/Makefile | 5 --- lib/hweight.c | 4 ++ 8 files changed, 97 insertions(+), 25 deletions(-) create mode 100644 arch/x86/lib/hweight.S (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0a7b885964ba..729d41d9ced3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -294,11 +294,6 @@ config X86_32_LAZY_GS def_bool y depends on X86_32 && !CC_STACKPROTECTOR -config ARCH_HWEIGHT_CFLAGS - string - default "-fcall-saved-ecx -fcall-saved-edx" if X86_32 - default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64 - config ARCH_SUPPORTS_UPROBES def_bool y diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h index 02e799fa43d1..e7cd63175de4 100644 --- a/arch/x86/include/asm/arch_hweight.h +++ b/arch/x86/include/asm/arch_hweight.h @@ -4,8 +4,8 @@ #include #ifdef CONFIG_64BIT -/* popcnt %edi, %eax -- redundant REX prefix for alignment */ -#define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7" +/* popcnt %edi, %eax */ +#define POPCNT32 ".byte 0xf3,0x0f,0xb8,0xc7" /* popcnt %rdi, %rax */ #define POPCNT64 ".byte 0xf3,0x48,0x0f,0xb8,0xc7" #define REG_IN "D" @@ -17,19 +17,15 @@ #define REG_OUT "a" #endif -/* - * __sw_hweightXX are called from within the alternatives below - * and callee-clobbered registers need to be taken care of. See - * ARCH_HWEIGHT_CFLAGS in for the respective - * compiler switches. - */ +#define __HAVE_ARCH_SW_HWEIGHT + static __always_inline unsigned int __arch_hweight32(unsigned int w) { - unsigned int res = 0; + unsigned int res; asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT) - : "="REG_OUT (res) - : REG_IN (w)); + : "="REG_OUT (res) + : REG_IN (w)); return res; } @@ -53,11 +49,11 @@ static inline unsigned long __arch_hweight64(__u64 w) #else static __always_inline unsigned long __arch_hweight64(__u64 w) { - unsigned long res = 0; + unsigned long res; asm (ALTERNATIVE("call __sw_hweight64", POPCNT64, X86_FEATURE_POPCNT) - : "="REG_OUT (res) - : REG_IN (w)); + : "="REG_OUT (res) + : REG_IN (w)); return res; } diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c index 64341aa485ae..d40ee8a38fed 100644 --- a/arch/x86/kernel/i386_ksyms_32.c +++ b/arch/x86/kernel/i386_ksyms_32.c @@ -42,3 +42,5 @@ EXPORT_SYMBOL(empty_zero_page); EXPORT_SYMBOL(___preempt_schedule); EXPORT_SYMBOL(___preempt_schedule_notrace); #endif + +EXPORT_SYMBOL(__sw_hweight32); diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index cd05942bc918..f1aebfb49c36 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -44,6 +44,9 @@ EXPORT_SYMBOL(clear_page); EXPORT_SYMBOL(csum_partial); +EXPORT_SYMBOL(__sw_hweight32); +EXPORT_SYMBOL(__sw_hweight64); + /* * Export string functions. We normally rely on gcc builtin for most of these, * but gcc sometimes decides not to inline them. diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 72a576752a7e..ec969cc3eb20 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -25,7 +25,7 @@ lib-y += memcpy_$(BITS).o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o -obj-y += msr.o msr-reg.o msr-reg-export.o +obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o ifeq ($(CONFIG_X86_32),y) obj-y += atomic64_32.o diff --git a/arch/x86/lib/hweight.S b/arch/x86/lib/hweight.S new file mode 100644 index 000000000000..02de3d74d2c5 --- /dev/null +++ b/arch/x86/lib/hweight.S @@ -0,0 +1,77 @@ +#include + +#include + +/* + * unsigned int __sw_hweight32(unsigned int w) + * %rdi: w + */ +ENTRY(__sw_hweight32) + +#ifdef CONFIG_X86_64 + movl %edi, %eax # w +#endif + __ASM_SIZE(push,) %__ASM_REG(dx) + movl %eax, %edx # w -> t + shrl %edx # t >>= 1 + andl $0x55555555, %edx # t &= 0x55555555 + subl %edx, %eax # w -= t + + movl %eax, %edx # w -> t + shrl $2, %eax # w_tmp >>= 2 + andl $0x33333333, %edx # t &= 0x33333333 + andl $0x33333333, %eax # w_tmp &= 0x33333333 + addl %edx, %eax # w = w_tmp + t + + movl %eax, %edx # w -> t + shrl $4, %edx # t >>= 4 + addl %edx, %eax # w_tmp += t + andl $0x0f0f0f0f, %eax # w_tmp &= 0x0f0f0f0f + imull $0x01010101, %eax, %eax # w_tmp *= 0x01010101 + shrl $24, %eax # w = w_tmp >> 24 + __ASM_SIZE(pop,) %__ASM_REG(dx) + ret +ENDPROC(__sw_hweight32) + +ENTRY(__sw_hweight64) +#ifdef CONFIG_X86_64 + pushq %rdx + + movq %rdi, %rdx # w -> t + movabsq $0x5555555555555555, %rax + shrq %rdx # t >>= 1 + andq %rdx, %rax # t &= 0x5555555555555555 + movabsq $0x3333333333333333, %rdx + subq %rax, %rdi # w -= t + + movq %rdi, %rax # w -> t + shrq $2, %rdi # w_tmp >>= 2 + andq %rdx, %rax # t &= 0x3333333333333333 + andq %rdi, %rdx # w_tmp &= 0x3333333333333333 + addq %rdx, %rax # w = w_tmp + t + + movq %rax, %rdx # w -> t + shrq $4, %rdx # t >>= 4 + addq %rdx, %rax # w_tmp += t + movabsq $0x0f0f0f0f0f0f0f0f, %rdx + andq %rdx, %rax # w_tmp &= 0x0f0f0f0f0f0f0f0f + movabsq $0x0101010101010101, %rdx + imulq %rdx, %rax # w_tmp *= 0x0101010101010101 + shrq $56, %rax # w = w_tmp >> 56 + + popq %rdx + ret +#else /* CONFIG_X86_32 */ + /* We're getting an u64 arg in (%eax,%edx): unsigned long hweight64(__u64 w) */ + pushl %ecx + + call __sw_hweight32 + movl %eax, %ecx # stash away result + movl %edx, %eax # second part of input + call __sw_hweight32 + addl %ecx, %eax # result + + popl %ecx + ret +#endif +ENDPROC(__sw_hweight64) diff --git a/lib/Makefile b/lib/Makefile index ff6a7a6c6395..07d06a8b9788 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -15,9 +15,6 @@ KCOV_INSTRUMENT_rbtree.o := n KCOV_INSTRUMENT_list_debug.o := n KCOV_INSTRUMENT_debugobjects.o := n KCOV_INSTRUMENT_dynamic_debug.o := n -# Kernel does not boot if we instrument this file as it uses custom calling -# convention (see CONFIG_ARCH_HWEIGHT_CFLAGS). -KCOV_INSTRUMENT_hweight.o := n lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o timerqueue.o\ @@ -74,8 +71,6 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o -GCOV_PROFILE_hweight.o := n -CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS)) obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o obj-$(CONFIG_BTREE) += btree.o diff --git a/lib/hweight.c b/lib/hweight.c index 9a5c1f221558..43273a7d83cf 100644 --- a/lib/hweight.c +++ b/lib/hweight.c @@ -9,6 +9,7 @@ * The Hamming Weight of a number is the total number of bits set in it. */ +#ifndef __HAVE_ARCH_SW_HWEIGHT unsigned int __sw_hweight32(unsigned int w) { #ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER @@ -25,6 +26,7 @@ unsigned int __sw_hweight32(unsigned int w) #endif } EXPORT_SYMBOL(__sw_hweight32); +#endif unsigned int __sw_hweight16(unsigned int w) { @@ -43,6 +45,7 @@ unsigned int __sw_hweight8(unsigned int w) } EXPORT_SYMBOL(__sw_hweight8); +#ifndef __HAVE_ARCH_SW_HWEIGHT unsigned long __sw_hweight64(__u64 w) { #if BITS_PER_LONG == 32 @@ -65,3 +68,4 @@ unsigned long __sw_hweight64(__u64 w) #endif } EXPORT_SYMBOL(__sw_hweight64); +#endif -- cgit v1.2.3 From 8ee62b1870be8e630158701632a533d0378e15b8 Mon Sep 17 00:00:00 2001 From: Jason Low Date: Fri, 3 Jun 2016 22:26:02 -0700 Subject: locking/rwsem: Convert sem->count to 'atomic_long_t' Convert the rwsem count variable to an atomic_long_t since we use it as an atomic variable. This also allows us to remove the rwsem_atomic_{add,update}() "abstraction" which would now be an unnecesary level of indirection. In follow up patches, we also remove the rwsem_atomic_{add,update}() definitions across the various architectures. Suggested-by: Peter Zijlstra Signed-off-by: Jason Low [ Build warning fixes on various architectures. ] Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Davidlohr Bueso Cc: Fenghua Yu Cc: Heiko Carstens Cc: Jason Low Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Paul E. McKenney Cc: Peter Hurley Cc: Terry Rudd Cc: Thomas Gleixner Cc: Tim Chen Cc: Tony Luck Cc: Waiman Long Link: http://lkml.kernel.org/r/1465017963-4839-2-git-send-email-jason.low2@hpe.com Signed-off-by: Ingo Molnar --- arch/alpha/include/asm/rwsem.h | 26 +++++++++++++------------- arch/ia64/include/asm/rwsem.h | 24 ++++++++++++------------ include/asm-generic/rwsem.h | 6 +++--- include/linux/rwsem.h | 8 +++++--- kernel/locking/rwsem-xadd.c | 32 +++++++++++++++++--------------- 5 files changed, 50 insertions(+), 46 deletions(-) (limited to 'arch') diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h index 0131a7058778..b40021aabb9f 100644 --- a/arch/alpha/include/asm/rwsem.h +++ b/arch/alpha/include/asm/rwsem.h @@ -25,8 +25,8 @@ static inline void __down_read(struct rw_semaphore *sem) { long oldcount; #ifndef CONFIG_SMP - oldcount = sem->count; - sem->count += RWSEM_ACTIVE_READ_BIAS; + oldcount = sem->count.counter; + sem->count.counter += RWSEM_ACTIVE_READ_BIAS; #else long temp; __asm__ __volatile__( @@ -52,13 +52,13 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) { long old, new, res; - res = sem->count; + res = atomic_long_read(&sem->count); do { new = res + RWSEM_ACTIVE_READ_BIAS; if (new <= 0) break; old = res; - res = cmpxchg(&sem->count, old, new); + res = atomic_long_cmpxchg(&sem->count, old, new); } while (res != old); return res >= 0 ? 1 : 0; } @@ -67,8 +67,8 @@ static inline long ___down_write(struct rw_semaphore *sem) { long oldcount; #ifndef CONFIG_SMP - oldcount = sem->count; - sem->count += RWSEM_ACTIVE_WRITE_BIAS; + oldcount = sem->count.counter; + sem->count.counter += RWSEM_ACTIVE_WRITE_BIAS; #else long temp; __asm__ __volatile__( @@ -106,7 +106,7 @@ static inline int __down_write_killable(struct rw_semaphore *sem) */ static inline int __down_write_trylock(struct rw_semaphore *sem) { - long ret = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE, + long ret = atomic_long_cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE, RWSEM_ACTIVE_WRITE_BIAS); if (ret == RWSEM_UNLOCKED_VALUE) return 1; @@ -117,8 +117,8 @@ static inline void __up_read(struct rw_semaphore *sem) { long oldcount; #ifndef CONFIG_SMP - oldcount = sem->count; - sem->count -= RWSEM_ACTIVE_READ_BIAS; + oldcount = sem->count.counter; + sem->count.counter -= RWSEM_ACTIVE_READ_BIAS; #else long temp; __asm__ __volatile__( @@ -142,8 +142,8 @@ static inline void __up_write(struct rw_semaphore *sem) { long count; #ifndef CONFIG_SMP - sem->count -= RWSEM_ACTIVE_WRITE_BIAS; - count = sem->count; + sem->count.counter -= RWSEM_ACTIVE_WRITE_BIAS; + count = sem->count.counter; #else long temp; __asm__ __volatile__( @@ -171,8 +171,8 @@ static inline void __downgrade_write(struct rw_semaphore *sem) { long oldcount; #ifndef CONFIG_SMP - oldcount = sem->count; - sem->count -= RWSEM_WAITING_BIAS; + oldcount = sem->count.counter; + sem->count.counter -= RWSEM_WAITING_BIAS; #else long temp; __asm__ __volatile__( diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h index 8b23e070b844..c5d544f188ed 100644 --- a/arch/ia64/include/asm/rwsem.h +++ b/arch/ia64/include/asm/rwsem.h @@ -40,7 +40,7 @@ static inline void __down_read (struct rw_semaphore *sem) { - long result = ia64_fetchadd8_acq((unsigned long *)&sem->count, 1); + long result = ia64_fetchadd8_acq((unsigned long *)&sem->count.counter, 1); if (result < 0) rwsem_down_read_failed(sem); @@ -55,9 +55,9 @@ ___down_write (struct rw_semaphore *sem) long old, new; do { - old = sem->count; + old = atomic_long_read(&sem->count); new = old + RWSEM_ACTIVE_WRITE_BIAS; - } while (cmpxchg_acq(&sem->count, old, new) != old); + } while (atomic_long_cmpxchg_acquire(&sem->count, old, new) != old); return old; } @@ -85,7 +85,7 @@ __down_write_killable (struct rw_semaphore *sem) static inline void __up_read (struct rw_semaphore *sem) { - long result = ia64_fetchadd8_rel((unsigned long *)&sem->count, -1); + long result = ia64_fetchadd8_rel((unsigned long *)&sem->count.counter, -1); if (result < 0 && (--result & RWSEM_ACTIVE_MASK) == 0) rwsem_wake(sem); @@ -100,9 +100,9 @@ __up_write (struct rw_semaphore *sem) long old, new; do { - old = sem->count; + old = atomic_long_read(&sem->count); new = old - RWSEM_ACTIVE_WRITE_BIAS; - } while (cmpxchg_rel(&sem->count, old, new) != old); + } while (atomic_long_cmpxchg_release(&sem->count, old, new) != old); if (new < 0 && (new & RWSEM_ACTIVE_MASK) == 0) rwsem_wake(sem); @@ -115,8 +115,8 @@ static inline int __down_read_trylock (struct rw_semaphore *sem) { long tmp; - while ((tmp = sem->count) >= 0) { - if (tmp == cmpxchg_acq(&sem->count, tmp, tmp+1)) { + while ((tmp = atomic_long_read(&sem->count)) >= 0) { + if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp, tmp+1)) { return 1; } } @@ -129,8 +129,8 @@ __down_read_trylock (struct rw_semaphore *sem) static inline int __down_write_trylock (struct rw_semaphore *sem) { - long tmp = cmpxchg_acq(&sem->count, RWSEM_UNLOCKED_VALUE, - RWSEM_ACTIVE_WRITE_BIAS); + long tmp = atomic_long_cmpxchg_acquire(&sem->count, + RWSEM_UNLOCKED_VALUE, RWSEM_ACTIVE_WRITE_BIAS); return tmp == RWSEM_UNLOCKED_VALUE; } @@ -143,9 +143,9 @@ __downgrade_write (struct rw_semaphore *sem) long old, new; do { - old = sem->count; + old = atomic_long_read(&sem->count); new = old - RWSEM_WAITING_BIAS; - } while (cmpxchg_rel(&sem->count, old, new) != old); + } while (atomic_long_cmpxchg_release(&sem->count, old, new) != old); if (old < 0) rwsem_downgrade_wake(sem); diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h index 3fc94a046bf5..a3a93eca766c 100644 --- a/include/asm-generic/rwsem.h +++ b/include/asm-generic/rwsem.h @@ -41,8 +41,8 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) { long tmp; - while ((tmp = sem->count) >= 0) { - if (tmp == cmpxchg_acquire(&sem->count, tmp, + while ((tmp = atomic_long_read(&sem->count)) >= 0) { + if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp, tmp + RWSEM_ACTIVE_READ_BIAS)) { return 1; } @@ -79,7 +79,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem) { long tmp; - tmp = cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE, + tmp = atomic_long_cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE, RWSEM_ACTIVE_WRITE_BIAS); return tmp == RWSEM_UNLOCKED_VALUE; } diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index d37fbb34d06f..dd1d14250340 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -23,10 +23,11 @@ struct rw_semaphore; #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK #include /* use a generic implementation */ +#define __RWSEM_INIT_COUNT(name) .count = RWSEM_UNLOCKED_VALUE #else /* All arch specific implementations share the same struct */ struct rw_semaphore { - long count; + atomic_long_t count; struct list_head wait_list; raw_spinlock_t wait_lock; #ifdef CONFIG_RWSEM_SPIN_ON_OWNER @@ -54,9 +55,10 @@ extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem); /* In all implementations count != 0 means locked */ static inline int rwsem_is_locked(struct rw_semaphore *sem) { - return sem->count != 0; + return atomic_long_read(&sem->count) != 0; } +#define __RWSEM_INIT_COUNT(name) .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE) #endif /* Common initializer macros and functions */ @@ -74,7 +76,7 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem) #endif #define __RWSEM_INITIALIZER(name) \ - { .count = RWSEM_UNLOCKED_VALUE, \ + { __RWSEM_INIT_COUNT(name), \ .wait_list = LIST_HEAD_INIT((name).wait_list), \ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock) \ __RWSEM_OPT_INIT(name) \ diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index b957da7fcb19..63b40a5c62ec 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -80,7 +80,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name, debug_check_no_locks_freed((void *)sem, sizeof(*sem)); lockdep_init_map(&sem->dep_map, name, key, 0); #endif - sem->count = RWSEM_UNLOCKED_VALUE; + atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE); raw_spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); #ifdef CONFIG_RWSEM_SPIN_ON_OWNER @@ -153,10 +153,11 @@ __rwsem_mark_wake(struct rw_semaphore *sem, if (wake_type != RWSEM_WAKE_READ_OWNED) { adjustment = RWSEM_ACTIVE_READ_BIAS; try_reader_grant: - oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; + oldcount = atomic_long_add_return(adjustment, &sem->count) - adjustment; + if (unlikely(oldcount < RWSEM_WAITING_BIAS)) { /* A writer stole the lock. Undo our reader grant. */ - if (rwsem_atomic_update(-adjustment, sem) & + if (atomic_long_sub_return(adjustment, &sem->count) & RWSEM_ACTIVE_MASK) goto out; /* Last active locker left. Retry waking readers. */ @@ -186,7 +187,7 @@ __rwsem_mark_wake(struct rw_semaphore *sem, adjustment -= RWSEM_WAITING_BIAS; if (adjustment) - rwsem_atomic_add(adjustment, sem); + atomic_long_add(adjustment, &sem->count); next = sem->wait_list.next; loop = woken; @@ -233,7 +234,7 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) list_add_tail(&waiter.list, &sem->wait_list); /* we're now waiting on the lock, but no longer actively locking */ - count = rwsem_atomic_update(adjustment, sem); + count = atomic_long_add_return(adjustment, &sem->count); /* If there are no active locks, wake the front queued process(es). * @@ -282,7 +283,8 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem) RWSEM_ACTIVE_WRITE_BIAS : RWSEM_ACTIVE_WRITE_BIAS + RWSEM_WAITING_BIAS; - if (cmpxchg_acquire(&sem->count, RWSEM_WAITING_BIAS, count) == RWSEM_WAITING_BIAS) { + if (atomic_long_cmpxchg_acquire(&sem->count, RWSEM_WAITING_BIAS, count) + == RWSEM_WAITING_BIAS) { rwsem_set_owner(sem); return true; } @@ -296,13 +298,13 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem) */ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) { - long old, count = READ_ONCE(sem->count); + long old, count = atomic_long_read(&sem->count); while (true) { if (!(count == 0 || count == RWSEM_WAITING_BIAS)) return false; - old = cmpxchg_acquire(&sem->count, count, + old = atomic_long_cmpxchg_acquire(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS); if (old == count) { rwsem_set_owner(sem); @@ -324,7 +326,7 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) rcu_read_lock(); owner = READ_ONCE(sem->owner); if (!owner) { - long count = READ_ONCE(sem->count); + long count = atomic_long_read(&sem->count); /* * If sem->owner is not set, yet we have just recently entered the * slowpath with the lock being active, then there is a possibility @@ -375,7 +377,7 @@ bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner) * held by readers. Check the counter to verify the * state. */ - count = READ_ONCE(sem->count); + count = atomic_long_read(&sem->count); return (count == 0 || count == RWSEM_WAITING_BIAS); } @@ -460,7 +462,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) WAKE_Q(wake_q); /* undo write bias from down_write operation, stop active locking */ - count = rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS, sem); + count = atomic_long_sub_return(RWSEM_ACTIVE_WRITE_BIAS, &sem->count); /* do optimistic spinning and steal lock if possible */ if (rwsem_optimistic_spin(sem)) @@ -483,7 +485,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) /* we're now waiting on the lock, but no longer actively locking */ if (waiting) { - count = READ_ONCE(sem->count); + count = atomic_long_read(&sem->count); /* * If there were already threads queued before us and there are @@ -505,7 +507,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) } } else - count = rwsem_atomic_update(RWSEM_WAITING_BIAS, sem); + count = atomic_long_add_return(RWSEM_WAITING_BIAS, &sem->count); /* wait until we successfully acquire the lock */ set_current_state(state); @@ -521,7 +523,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) schedule(); set_current_state(state); - } while ((count = sem->count) & RWSEM_ACTIVE_MASK); + } while ((count = atomic_long_read(&sem->count)) & RWSEM_ACTIVE_MASK); raw_spin_lock_irq(&sem->wait_lock); } @@ -536,7 +538,7 @@ out_nolock: raw_spin_lock_irq(&sem->wait_lock); list_del(&waiter.list); if (list_empty(&sem->wait_list)) - rwsem_atomic_update(-RWSEM_WAITING_BIAS, sem); + atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count); else __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); raw_spin_unlock_irq(&sem->wait_lock); -- cgit v1.2.3 From d157bd860f1c828593730dca594d0ce51956833b Mon Sep 17 00:00:00 2001 From: Jason Low Date: Mon, 16 May 2016 17:38:02 -0700 Subject: locking/rwsem: Remove rwsem_atomic_add() and rwsem_atomic_update() The rwsem-xadd count has been converted to an atomic variable and the rwsem code now directly uses atomic_long_add() and atomic_long_add_return(), so we can remove the arch implementations of rwsem_atomic_add() and rwsem_atomic_update(). Signed-off-by: Jason Low Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Arnd Bergmann Cc: Christoph Lameter Cc: Davidlohr Bueso Cc: Fenghua Yu Cc: Heiko Carstens Cc: Ivan Kokshaysky Cc: Jason Low Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Matt Turner Cc: Paul E. McKenney Cc: Peter Hurley Cc: Peter Zijlstra Cc: Richard Henderson Cc: Terry Rudd Cc: Thomas Gleixner Cc: Tim Chen Cc: Tony Luck Cc: Waiman Long Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/alpha/include/asm/rwsem.h | 42 ------------------------------------------ arch/ia64/include/asm/rwsem.h | 7 ------- arch/s390/include/asm/rwsem.h | 37 ------------------------------------- arch/x86/include/asm/rwsem.h | 18 ------------------ include/asm-generic/rwsem.h | 16 ---------------- 5 files changed, 120 deletions(-) (limited to 'arch') diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h index b40021aabb9f..77873d0ad293 100644 --- a/arch/alpha/include/asm/rwsem.h +++ b/arch/alpha/include/asm/rwsem.h @@ -191,47 +191,5 @@ static inline void __downgrade_write(struct rw_semaphore *sem) rwsem_downgrade_wake(sem); } -static inline void rwsem_atomic_add(long val, struct rw_semaphore *sem) -{ -#ifndef CONFIG_SMP - sem->count += val; -#else - long temp; - __asm__ __volatile__( - "1: ldq_l %0,%1\n" - " addq %0,%2,%0\n" - " stq_c %0,%1\n" - " beq %0,2f\n" - ".subsection 2\n" - "2: br 1b\n" - ".previous" - :"=&r" (temp), "=m" (sem->count) - :"Ir" (val), "m" (sem->count)); -#endif -} - -static inline long rwsem_atomic_update(long val, struct rw_semaphore *sem) -{ -#ifndef CONFIG_SMP - sem->count += val; - return sem->count; -#else - long ret, temp; - __asm__ __volatile__( - "1: ldq_l %0,%1\n" - " addq %0,%3,%2\n" - " addq %0,%3,%0\n" - " stq_c %2,%1\n" - " beq %2,2f\n" - ".subsection 2\n" - "2: br 1b\n" - ".previous" - :"=&r" (ret), "=m" (sem->count), "=&r" (temp) - :"Ir" (val), "m" (sem->count)); - - return ret; -#endif -} - #endif /* __KERNEL__ */ #endif /* _ALPHA_RWSEM_H */ diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h index c5d544f188ed..8fa98dd303b4 100644 --- a/arch/ia64/include/asm/rwsem.h +++ b/arch/ia64/include/asm/rwsem.h @@ -151,11 +151,4 @@ __downgrade_write (struct rw_semaphore *sem) rwsem_downgrade_wake(sem); } -/* - * Implement atomic add functionality. These used to be "inline" functions, but GCC v3.1 - * doesn't quite optimize this stuff right and ends up with bad calls to fetchandadd. - */ -#define rwsem_atomic_add(delta, sem) atomic64_add(delta, (atomic64_t *)(&(sem)->count)) -#define rwsem_atomic_update(delta, sem) atomic64_add_return(delta, (atomic64_t *)(&(sem)->count)) - #endif /* _ASM_IA64_RWSEM_H */ diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h index c75e4471e618..597e7e96b59e 100644 --- a/arch/s390/include/asm/rwsem.h +++ b/arch/s390/include/asm/rwsem.h @@ -207,41 +207,4 @@ static inline void __downgrade_write(struct rw_semaphore *sem) rwsem_downgrade_wake(sem); } -/* - * implement atomic add functionality - */ -static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem) -{ - signed long old, new; - - asm volatile( - " lg %0,%2\n" - "0: lgr %1,%0\n" - " agr %1,%4\n" - " csg %0,%1,%2\n" - " jl 0b" - : "=&d" (old), "=&d" (new), "=Q" (sem->count) - : "Q" (sem->count), "d" (delta) - : "cc", "memory"); -} - -/* - * implement exchange and add functionality - */ -static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem) -{ - signed long old, new; - - asm volatile( - " lg %0,%2\n" - "0: lgr %1,%0\n" - " agr %1,%4\n" - " csg %0,%1,%2\n" - " jl 0b" - : "=&d" (old), "=&d" (new), "=Q" (sem->count) - : "Q" (sem->count), "d" (delta) - : "cc", "memory"); - return new; -} - #endif /* _S390_RWSEM_H */ diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index 453744c1d347..089ced4edbbc 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -213,23 +213,5 @@ static inline void __downgrade_write(struct rw_semaphore *sem) : "memory", "cc"); } -/* - * implement atomic add functionality - */ -static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem) -{ - asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0" - : "+m" (sem->count) - : "er" (delta)); -} - -/* - * implement exchange and add functionality - */ -static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem) -{ - return delta + xadd(&sem->count, delta); -} - #endif /* __KERNEL__ */ #endif /* _ASM_X86_RWSEM_H */ diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h index a3a93eca766c..5be122e3d326 100644 --- a/include/asm-generic/rwsem.h +++ b/include/asm-generic/rwsem.h @@ -106,14 +106,6 @@ static inline void __up_write(struct rw_semaphore *sem) rwsem_wake(sem); } -/* - * implement atomic add functionality - */ -static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem) -{ - atomic_long_add(delta, (atomic_long_t *)&sem->count); -} - /* * downgrade write lock to read lock */ @@ -134,13 +126,5 @@ static inline void __downgrade_write(struct rw_semaphore *sem) rwsem_downgrade_wake(sem); } -/* - * implement exchange and add functionality - */ -static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem) -{ - return atomic_long_add_return(delta, (atomic_long_t *)&sem->count); -} - #endif /* __KERNEL__ */ #endif /* _ASM_GENERIC_RWSEM_H */ -- cgit v1.2.3 From 6428671bae97caa7040e24e79e969fd87908f4f3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 1 Jun 2016 20:58:15 +0200 Subject: locking/mutex: Optimize mutex_trylock() fast-path A while back Viro posted a number of 'interesting' mutex_is_locked() users on IRC, one of those was RCU. RCU seems to use mutex_is_locked() to avoid doing mutex_trylock(), the regular load before modify pattern. While the use isn't wrong per se, its curious in that its needed at all, mutex_trylock() should be good enough on its own to avoid the pointless cacheline bounces. So fix those and remove the mutex_is_locked() (ab)use from RCU. Reported-by: Al Viro Signed-off-by: Peter Zijlstra (Intel) Acked-by: Paul McKenney Acked-by: Davidlohr Bueso Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Waiman Long Link: http://lkml.kernel.org/r/20160601185815.GW3190@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/ia64/include/asm/mutex.h | 2 +- arch/powerpc/include/asm/mutex.h | 2 +- arch/x86/include/asm/mutex_32.h | 2 +- arch/x86/include/asm/mutex_64.h | 6 +++--- include/asm-generic/mutex-dec.h | 2 +- include/asm-generic/mutex-xchg.h | 6 +++++- kernel/rcu/tree.c | 1 - 7 files changed, 12 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/ia64/include/asm/mutex.h b/arch/ia64/include/asm/mutex.h index f41e66d65e31..28cb819e0ff9 100644 --- a/arch/ia64/include/asm/mutex.h +++ b/arch/ia64/include/asm/mutex.h @@ -82,7 +82,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) static inline int __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) { - if (cmpxchg_acq(count, 1, 0) == 1) + if (atomic_read(count) == 1 && cmpxchg_acq(count, 1, 0) == 1) return 1; return 0; } diff --git a/arch/powerpc/include/asm/mutex.h b/arch/powerpc/include/asm/mutex.h index 127ab23e1f6c..078155fa1189 100644 --- a/arch/powerpc/include/asm/mutex.h +++ b/arch/powerpc/include/asm/mutex.h @@ -124,7 +124,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) static inline int __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) { - if (likely(__mutex_cmpxchg_lock(count, 1, 0) == 1)) + if (likely(atomic_read(count) == 1 && __mutex_cmpxchg_lock(count, 1, 0) == 1)) return 1; return 0; } diff --git a/arch/x86/include/asm/mutex_32.h b/arch/x86/include/asm/mutex_32.h index 85e6cda45a02..e9355a84fc67 100644 --- a/arch/x86/include/asm/mutex_32.h +++ b/arch/x86/include/asm/mutex_32.h @@ -101,7 +101,7 @@ static inline int __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) { /* cmpxchg because it never induces a false contention state. */ - if (likely(atomic_cmpxchg(count, 1, 0) == 1)) + if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1)) return 1; return 0; diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h index 07537a44216e..d9850758464e 100644 --- a/arch/x86/include/asm/mutex_64.h +++ b/arch/x86/include/asm/mutex_64.h @@ -118,10 +118,10 @@ do { \ static inline int __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) { - if (likely(atomic_cmpxchg(count, 1, 0) == 1)) + if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1)) return 1; - else - return 0; + + return 0; } #endif /* _ASM_X86_MUTEX_64_H */ diff --git a/include/asm-generic/mutex-dec.h b/include/asm-generic/mutex-dec.h index fd694cfd678a..c54829d3de37 100644 --- a/include/asm-generic/mutex-dec.h +++ b/include/asm-generic/mutex-dec.h @@ -80,7 +80,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) static inline int __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) { - if (likely(atomic_cmpxchg_acquire(count, 1, 0) == 1)) + if (likely(atomic_read(count) == 1 && atomic_cmpxchg_acquire(count, 1, 0) == 1)) return 1; return 0; } diff --git a/include/asm-generic/mutex-xchg.h b/include/asm-generic/mutex-xchg.h index a6b4a7bd6ac9..3269ec4e195f 100644 --- a/include/asm-generic/mutex-xchg.h +++ b/include/asm-generic/mutex-xchg.h @@ -91,8 +91,12 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) static inline int __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) { - int prev = atomic_xchg_acquire(count, 0); + int prev; + if (atomic_read(count) != 1) + return 0; + + prev = atomic_xchg_acquire(count, 0); if (unlikely(prev < 0)) { /* * The lock was marked contended so we must restore that diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index c7f1bc4f817c..b7326893221f 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3681,7 +3681,6 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s) if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s) && (rnp == rnp_root || ULONG_CMP_LT(READ_ONCE(rnp_root->exp_seq_rq), s)) && - !mutex_is_locked(&rsp->exp_mutex) && mutex_trylock(&rsp->exp_mutex)) goto fastpath; -- cgit v1.2.3 From 2823d4da5d8a0c222747b24eceb65f5b30717d02 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 8 Jun 2016 12:38:37 -0700 Subject: x86, bitops: remove use of "sbb" to return CF Use SETC instead of SBB to return the value of CF from assembly. Using SETcc enables uniformity with other flags-returning pieces of assembly code. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1465414726-197858-2-git-send-email-hpa@linux.intel.com Reviewed-by: Andy Lutomirski Reviewed-by: Borislav Petkov Acked-by: Peter Zijlstra (Intel) --- arch/x86/include/asm/bitops.h | 24 ++++++++++++------------ arch/x86/include/asm/percpu.h | 12 ++++++------ arch/x86/include/asm/signal.h | 6 +++--- arch/x86/include/asm/sync_bitops.h | 18 +++++++++--------- arch/x86/kernel/vm86_32.c | 5 +---- 5 files changed, 31 insertions(+), 34 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 7766d1cf096e..b2b797d1f49a 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -230,11 +230,11 @@ test_and_set_bit_lock(long nr, volatile unsigned long *addr) */ static __always_inline int __test_and_set_bit(long nr, volatile unsigned long *addr) { - int oldbit; + unsigned char oldbit; asm("bts %2,%1\n\t" - "sbb %0,%0" - : "=r" (oldbit), ADDR + "setc %0" + : "=qm" (oldbit), ADDR : "Ir" (nr)); return oldbit; } @@ -270,11 +270,11 @@ static __always_inline int test_and_clear_bit(long nr, volatile unsigned long *a */ static __always_inline int __test_and_clear_bit(long nr, volatile unsigned long *addr) { - int oldbit; + unsigned char oldbit; asm volatile("btr %2,%1\n\t" - "sbb %0,%0" - : "=r" (oldbit), ADDR + "setc %0" + : "=qm" (oldbit), ADDR : "Ir" (nr)); return oldbit; } @@ -282,11 +282,11 @@ static __always_inline int __test_and_clear_bit(long nr, volatile unsigned long /* WARNING: non atomic and it can be reordered! */ static __always_inline int __test_and_change_bit(long nr, volatile unsigned long *addr) { - int oldbit; + unsigned char oldbit; asm volatile("btc %2,%1\n\t" - "sbb %0,%0" - : "=r" (oldbit), ADDR + "setc %0" + : "=qm" (oldbit), ADDR : "Ir" (nr) : "memory"); return oldbit; @@ -313,11 +313,11 @@ static __always_inline int constant_test_bit(long nr, const volatile unsigned lo static __always_inline int variable_test_bit(long nr, volatile const unsigned long *addr) { - int oldbit; + unsigned char oldbit; asm volatile("bt %2,%1\n\t" - "sbb %0,%0" - : "=r" (oldbit) + "setc %0" + : "=qm" (oldbit) : "m" (*(unsigned long *)addr), "Ir" (nr)); return oldbit; diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index e0ba66ca68c6..65039e9571db 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -510,9 +510,9 @@ do { \ /* This is not atomic against other CPUs -- CPU preemption needs to be off */ #define x86_test_and_clear_bit_percpu(bit, var) \ ({ \ - int old__; \ - asm volatile("btr %2,"__percpu_arg(1)"\n\tsbbl %0,%0" \ - : "=r" (old__), "+m" (var) \ + unsigned char old__; \ + asm volatile("btr %2,"__percpu_arg(1)"\n\tsetc %0" \ + : "=qm" (old__), "+m" (var) \ : "dIr" (bit)); \ old__; \ }) @@ -532,11 +532,11 @@ static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr, static inline int x86_this_cpu_variable_test_bit(int nr, const unsigned long __percpu *addr) { - int oldbit; + unsigned char oldbit; asm volatile("bt "__percpu_arg(2)",%1\n\t" - "sbb %0,%0" - : "=r" (oldbit) + "setc %0" + : "=qm" (oldbit) : "m" (*(unsigned long *)addr), "Ir" (nr)); return oldbit; diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index 2138c9ae19ee..dd1e7d6387ab 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -81,9 +81,9 @@ static inline int __const_sigismember(sigset_t *set, int _sig) static inline int __gen_sigismember(sigset_t *set, int _sig) { - int ret; - asm("btl %2,%1\n\tsbbl %0,%0" - : "=r"(ret) : "m"(*set), "Ir"(_sig-1) : "cc"); + unsigned char ret; + asm("btl %2,%1\n\tsetc %0" + : "=qm"(ret) : "m"(*set), "Ir"(_sig-1) : "cc"); return ret; } diff --git a/arch/x86/include/asm/sync_bitops.h b/arch/x86/include/asm/sync_bitops.h index f28a24b51dc7..cbf8847d02a0 100644 --- a/arch/x86/include/asm/sync_bitops.h +++ b/arch/x86/include/asm/sync_bitops.h @@ -79,10 +79,10 @@ static inline void sync_change_bit(long nr, volatile unsigned long *addr) */ static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr) { - int oldbit; + unsigned char oldbit; - asm volatile("lock; bts %2,%1\n\tsbbl %0,%0" - : "=r" (oldbit), "+m" (ADDR) + asm volatile("lock; bts %2,%1\n\tsetc %0" + : "=qm" (oldbit), "+m" (ADDR) : "Ir" (nr) : "memory"); return oldbit; } @@ -97,10 +97,10 @@ static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr) */ static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr) { - int oldbit; + unsigned char oldbit; - asm volatile("lock; btr %2,%1\n\tsbbl %0,%0" - : "=r" (oldbit), "+m" (ADDR) + asm volatile("lock; btr %2,%1\n\tsetc %0" + : "=qm" (oldbit), "+m" (ADDR) : "Ir" (nr) : "memory"); return oldbit; } @@ -115,10 +115,10 @@ static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr) */ static inline int sync_test_and_change_bit(long nr, volatile unsigned long *addr) { - int oldbit; + unsigned char oldbit; - asm volatile("lock; btc %2,%1\n\tsbbl %0,%0" - : "=r" (oldbit), "+m" (ADDR) + asm volatile("lock; btc %2,%1\n\tsetc %0" + : "=qm" (oldbit), "+m" (ADDR) : "Ir" (nr) : "memory"); return oldbit; } diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 3dce1ca0a653..01f30e56f99e 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -440,10 +440,7 @@ static inline unsigned long get_vflags(struct kernel_vm86_regs *regs) static inline int is_revectored(int nr, struct revectored_struct *bitmap) { - __asm__ __volatile__("btl %2,%1\n\tsbbl %0,%0" - :"=r" (nr) - :"m" (*bitmap), "r" (nr)); - return nr; + return test_bit(nr, bitmap->__map); } #define val_byte(val, n) (((__u8 *)&val)[n]) -- cgit v1.2.3 From 117780eef7740729e803bdcc0d5f2f48137ea8e3 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 8 Jun 2016 12:38:38 -0700 Subject: x86, asm: use bool for bitops and other assembly outputs The gcc people have confirmed that using "bool" when combined with inline assembly always is treated as a byte-sized operand that can be assumed to be 0 or 1, which is exactly what the SET instruction emits. Change the output types and intermediate variables of as many operations as practical to "bool". Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1465414726-197858-3-git-send-email-hpa@linux.intel.com Reviewed-by: Andy Lutomirski Reviewed-by: Borislav Petkov Acked-by: Peter Zijlstra (Intel) --- arch/x86/boot/bitops.h | 8 +++++--- arch/x86/boot/boot.h | 8 ++++---- arch/x86/boot/string.c | 2 +- arch/x86/include/asm/apm.h | 6 +++--- arch/x86/include/asm/archrandom.h | 16 ++++++++-------- arch/x86/include/asm/atomic.h | 8 ++++---- arch/x86/include/asm/atomic64_64.h | 10 +++++----- arch/x86/include/asm/bitops.h | 28 ++++++++++++++-------------- arch/x86/include/asm/local.h | 8 ++++---- arch/x86/include/asm/percpu.h | 8 ++++---- arch/x86/include/asm/rmwcc.h | 4 ++-- arch/x86/include/asm/rwsem.h | 17 +++++++++-------- include/linux/random.h | 12 ++++++------ 13 files changed, 69 insertions(+), 66 deletions(-) (limited to 'arch') diff --git a/arch/x86/boot/bitops.h b/arch/x86/boot/bitops.h index 878e4b9940d9..0d41d68131cc 100644 --- a/arch/x86/boot/bitops.h +++ b/arch/x86/boot/bitops.h @@ -16,14 +16,16 @@ #define BOOT_BITOPS_H #define _LINUX_BITOPS_H /* Inhibit inclusion of */ -static inline int constant_test_bit(int nr, const void *addr) +#include + +static inline bool constant_test_bit(int nr, const void *addr) { const u32 *p = (const u32 *)addr; return ((1UL << (nr & 31)) & (p[nr >> 5])) != 0; } -static inline int variable_test_bit(int nr, const void *addr) +static inline bool variable_test_bit(int nr, const void *addr) { - u8 v; + bool v; const u32 *p = (const u32 *)addr; asm("btl %2,%1; setc %0" : "=qm" (v) : "m" (*p), "Ir" (nr)); diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index 9011a88353de..2edb2d53c3a2 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -176,16 +176,16 @@ static inline void wrgs32(u32 v, addr_t addr) } /* Note: these only return true/false, not a signed return value! */ -static inline int memcmp_fs(const void *s1, addr_t s2, size_t len) +static inline bool memcmp_fs(const void *s1, addr_t s2, size_t len) { - u8 diff; + bool diff; asm volatile("fs; repe; cmpsb; setnz %0" : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); return diff; } -static inline int memcmp_gs(const void *s1, addr_t s2, size_t len) +static inline bool memcmp_gs(const void *s1, addr_t s2, size_t len) { - u8 diff; + bool diff; asm volatile("gs; repe; cmpsb; setnz %0" : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); return diff; diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index 318b8465d302..cc3bd583dce1 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c @@ -17,7 +17,7 @@ int memcmp(const void *s1, const void *s2, size_t len) { - u8 diff; + bool diff; asm("repe; cmpsb; setnz %0" : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); return diff; diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h index 20370c6db74b..93eebc636c76 100644 --- a/arch/x86/include/asm/apm.h +++ b/arch/x86/include/asm/apm.h @@ -45,11 +45,11 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, : "memory", "cc"); } -static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in, - u32 ecx_in, u32 *eax) +static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, + u32 ecx_in, u32 *eax) { int cx, dx, si; - u8 error; + bool error; /* * N.B. We do NOT need a cld after the BIOS call diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h index 69f1366f1aa3..ab6f599ce2fd 100644 --- a/arch/x86/include/asm/archrandom.h +++ b/arch/x86/include/asm/archrandom.h @@ -43,7 +43,7 @@ #ifdef CONFIG_ARCH_RANDOM /* Instead of arch_get_random_long() when alternatives haven't run. */ -static inline int rdrand_long(unsigned long *v) +static inline bool rdrand_long(unsigned long *v) { int ok; asm volatile("1: " RDRAND_LONG "\n\t" @@ -53,13 +53,13 @@ static inline int rdrand_long(unsigned long *v) "2:" : "=r" (ok), "=a" (*v) : "0" (RDRAND_RETRY_LOOPS)); - return ok; + return !!ok; } /* A single attempt at RDSEED */ static inline bool rdseed_long(unsigned long *v) { - unsigned char ok; + bool ok; asm volatile(RDSEED_LONG "\n\t" "setc %0" : "=qm" (ok), "=a" (*v)); @@ -67,7 +67,7 @@ static inline bool rdseed_long(unsigned long *v) } #define GET_RANDOM(name, type, rdrand, nop) \ -static inline int name(type *v) \ +static inline bool name(type *v) \ { \ int ok; \ alternative_io("movl $0, %0\n\t" \ @@ -80,13 +80,13 @@ static inline int name(type *v) \ X86_FEATURE_RDRAND, \ ASM_OUTPUT2("=r" (ok), "=a" (*v)), \ "0" (RDRAND_RETRY_LOOPS)); \ - return ok; \ + return !!ok; \ } #define GET_SEED(name, type, rdseed, nop) \ -static inline int name(type *v) \ +static inline bool name(type *v) \ { \ - unsigned char ok; \ + bool ok; \ alternative_io("movb $0, %0\n\t" \ nop, \ rdseed "\n\t" \ @@ -119,7 +119,7 @@ GET_SEED(arch_get_random_seed_int, unsigned int, RDSEED_INT, ASM_NOP4); #else -static inline int rdrand_long(unsigned long *v) +static inline bool rdrand_long(unsigned long *v) { return 0; } diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 3e8674288198..17d881248e6c 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -75,7 +75,7 @@ static __always_inline void atomic_sub(int i, atomic_t *v) * true if the result is zero, or false for all * other cases. */ -static __always_inline int atomic_sub_and_test(int i, atomic_t *v) +static __always_inline bool atomic_sub_and_test(int i, atomic_t *v) { GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", "e"); } @@ -112,7 +112,7 @@ static __always_inline void atomic_dec(atomic_t *v) * returns true if the result is 0, or false for all other * cases. */ -static __always_inline int atomic_dec_and_test(atomic_t *v) +static __always_inline bool atomic_dec_and_test(atomic_t *v) { GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e"); } @@ -125,7 +125,7 @@ static __always_inline int atomic_dec_and_test(atomic_t *v) * and returns true if the result is zero, or false for all * other cases. */ -static __always_inline int atomic_inc_and_test(atomic_t *v) +static __always_inline bool atomic_inc_and_test(atomic_t *v) { GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", "e"); } @@ -139,7 +139,7 @@ static __always_inline int atomic_inc_and_test(atomic_t *v) * if the result is negative, or false when * result is greater than or equal to zero. */ -static __always_inline int atomic_add_negative(int i, atomic_t *v) +static __always_inline bool atomic_add_negative(int i, atomic_t *v) { GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", "s"); } diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 037351022f54..4f881d7f0c39 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -70,7 +70,7 @@ static inline void atomic64_sub(long i, atomic64_t *v) * true if the result is zero, or false for all * other cases. */ -static inline int atomic64_sub_and_test(long i, atomic64_t *v) +static inline bool atomic64_sub_and_test(long i, atomic64_t *v) { GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", "e"); } @@ -109,7 +109,7 @@ static __always_inline void atomic64_dec(atomic64_t *v) * returns true if the result is 0, or false for all other * cases. */ -static inline int atomic64_dec_and_test(atomic64_t *v) +static inline bool atomic64_dec_and_test(atomic64_t *v) { GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", "e"); } @@ -122,7 +122,7 @@ static inline int atomic64_dec_and_test(atomic64_t *v) * and returns true if the result is zero, or false for all * other cases. */ -static inline int atomic64_inc_and_test(atomic64_t *v) +static inline bool atomic64_inc_and_test(atomic64_t *v) { GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", "e"); } @@ -136,7 +136,7 @@ static inline int atomic64_inc_and_test(atomic64_t *v) * if the result is negative, or false when * result is greater than or equal to zero. */ -static inline int atomic64_add_negative(long i, atomic64_t *v) +static inline bool atomic64_add_negative(long i, atomic64_t *v) { GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", "s"); } @@ -180,7 +180,7 @@ static inline long atomic64_xchg(atomic64_t *v, long new) * Atomically adds @a to @v, so long as it was not @u. * Returns the old value of @v. */ -static inline int atomic64_add_unless(atomic64_t *v, long a, long u) +static inline bool atomic64_add_unless(atomic64_t *v, long a, long u) { long c, old; c = atomic64_read(v); diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index b2b797d1f49a..8cbb7f495546 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -201,7 +201,7 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr) * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ -static __always_inline int test_and_set_bit(long nr, volatile unsigned long *addr) +static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr) { GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", "c"); } @@ -213,7 +213,7 @@ static __always_inline int test_and_set_bit(long nr, volatile unsigned long *add * * This is the same as test_and_set_bit on x86. */ -static __always_inline int +static __always_inline bool test_and_set_bit_lock(long nr, volatile unsigned long *addr) { return test_and_set_bit(nr, addr); @@ -228,9 +228,9 @@ test_and_set_bit_lock(long nr, volatile unsigned long *addr) * If two examples of this operation race, one can appear to succeed * but actually fail. You must protect multiple accesses with a lock. */ -static __always_inline int __test_and_set_bit(long nr, volatile unsigned long *addr) +static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *addr) { - unsigned char oldbit; + bool oldbit; asm("bts %2,%1\n\t" "setc %0" @@ -247,7 +247,7 @@ static __always_inline int __test_and_set_bit(long nr, volatile unsigned long *a * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ -static __always_inline int test_and_clear_bit(long nr, volatile unsigned long *addr) +static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr) { GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", "c"); } @@ -268,9 +268,9 @@ static __always_inline int test_and_clear_bit(long nr, volatile unsigned long *a * accessed from a hypervisor on the same CPU if running in a VM: don't change * this without also updating arch/x86/kernel/kvm.c */ -static __always_inline int __test_and_clear_bit(long nr, volatile unsigned long *addr) +static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long *addr) { - unsigned char oldbit; + bool oldbit; asm volatile("btr %2,%1\n\t" "setc %0" @@ -280,9 +280,9 @@ static __always_inline int __test_and_clear_bit(long nr, volatile unsigned long } /* WARNING: non atomic and it can be reordered! */ -static __always_inline int __test_and_change_bit(long nr, volatile unsigned long *addr) +static __always_inline bool __test_and_change_bit(long nr, volatile unsigned long *addr) { - unsigned char oldbit; + bool oldbit; asm volatile("btc %2,%1\n\t" "setc %0" @@ -300,20 +300,20 @@ static __always_inline int __test_and_change_bit(long nr, volatile unsigned long * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ -static __always_inline int test_and_change_bit(long nr, volatile unsigned long *addr) +static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr) { GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", "c"); } -static __always_inline int constant_test_bit(long nr, const volatile unsigned long *addr) +static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr) { return ((1UL << (nr & (BITS_PER_LONG-1))) & (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } -static __always_inline int variable_test_bit(long nr, volatile const unsigned long *addr) +static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr) { - unsigned char oldbit; + bool oldbit; asm volatile("bt %2,%1\n\t" "setc %0" @@ -329,7 +329,7 @@ static __always_inline int variable_test_bit(long nr, volatile const unsigned lo * @nr: bit number to test * @addr: Address to start counting from */ -static int test_bit(int nr, const volatile unsigned long *addr); +static bool test_bit(int nr, const volatile unsigned long *addr); #endif #define test_bit(nr, addr) \ diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h index 4ad6560847b1..0cdc65b0d14d 100644 --- a/arch/x86/include/asm/local.h +++ b/arch/x86/include/asm/local.h @@ -50,7 +50,7 @@ static inline void local_sub(long i, local_t *l) * true if the result is zero, or false for all * other cases. */ -static inline int local_sub_and_test(long i, local_t *l) +static inline bool local_sub_and_test(long i, local_t *l) { GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, "er", i, "%0", "e"); } @@ -63,7 +63,7 @@ static inline int local_sub_and_test(long i, local_t *l) * returns true if the result is 0, or false for all other * cases. */ -static inline int local_dec_and_test(local_t *l) +static inline bool local_dec_and_test(local_t *l) { GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, "%0", "e"); } @@ -76,7 +76,7 @@ static inline int local_dec_and_test(local_t *l) * and returns true if the result is zero, or false for all * other cases. */ -static inline int local_inc_and_test(local_t *l) +static inline bool local_inc_and_test(local_t *l) { GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, "%0", "e"); } @@ -90,7 +90,7 @@ static inline int local_inc_and_test(local_t *l) * if the result is negative, or false when * result is greater than or equal to zero. */ -static inline int local_add_negative(long i, local_t *l) +static inline bool local_add_negative(long i, local_t *l) { GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, "er", i, "%0", "s"); } diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 65039e9571db..184d7f3ecb9f 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -510,14 +510,14 @@ do { \ /* This is not atomic against other CPUs -- CPU preemption needs to be off */ #define x86_test_and_clear_bit_percpu(bit, var) \ ({ \ - unsigned char old__; \ + bool old__; \ asm volatile("btr %2,"__percpu_arg(1)"\n\tsetc %0" \ : "=qm" (old__), "+m" (var) \ : "dIr" (bit)); \ old__; \ }) -static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr, +static __always_inline bool x86_this_cpu_constant_test_bit(unsigned int nr, const unsigned long __percpu *addr) { unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG; @@ -529,10 +529,10 @@ static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr, #endif } -static inline int x86_this_cpu_variable_test_bit(int nr, +static inline bool x86_this_cpu_variable_test_bit(int nr, const unsigned long __percpu *addr) { - unsigned char oldbit; + bool oldbit; asm volatile("bt "__percpu_arg(2)",%1\n\t" "setc %0" diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index 8f7866a5b9a4..a15b73d90be3 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -23,11 +23,11 @@ cc_label: \ #define __GEN_RMWcc(fullop, var, cc, ...) \ do { \ - char c; \ + bool c; \ asm volatile (fullop "; set" cc " %1" \ : "+m" (var), "=qm" (c) \ : __VA_ARGS__ : "memory"); \ - return c != 0; \ + return c; \ } while (0) #define GEN_UNARY_RMWcc(op, var, arg0, cc) \ diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index 453744c1d347..c5087706c02e 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -77,7 +77,7 @@ static inline void __down_read(struct rw_semaphore *sem) /* * trylock for reading -- returns 1 if successful, 0 if contention */ -static inline int __down_read_trylock(struct rw_semaphore *sem) +static inline bool __down_read_trylock(struct rw_semaphore *sem) { long result, tmp; asm volatile("# beginning __down_read_trylock\n\t" @@ -93,7 +93,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) : "+m" (sem->count), "=&a" (result), "=&r" (tmp) : "i" (RWSEM_ACTIVE_READ_BIAS) : "memory", "cc"); - return result >= 0 ? 1 : 0; + return result >= 0; } /* @@ -134,9 +134,10 @@ static inline int __down_write_killable(struct rw_semaphore *sem) /* * trylock for writing -- returns 1 if successful, 0 if contention */ -static inline int __down_write_trylock(struct rw_semaphore *sem) +static inline bool __down_write_trylock(struct rw_semaphore *sem) { - long result, tmp; + bool result; + long tmp0, tmp1; asm volatile("# beginning __down_write_trylock\n\t" " mov %0,%1\n\t" "1:\n\t" @@ -144,14 +145,14 @@ static inline int __down_write_trylock(struct rw_semaphore *sem) /* was the active mask 0 before? */ " jnz 2f\n\t" " mov %1,%2\n\t" - " add %3,%2\n\t" + " add %4,%2\n\t" LOCK_PREFIX " cmpxchg %2,%0\n\t" " jnz 1b\n\t" "2:\n\t" - " sete %b1\n\t" - " movzbl %b1, %k1\n\t" + " sete %3\n\t" "# ending __down_write_trylock\n\t" - : "+m" (sem->count), "=&a" (result), "=&r" (tmp) + : "+m" (sem->count), "=&a" (tmp0), "=&r" (tmp1), + "=qm" (result) : "er" (RWSEM_ACTIVE_WRITE_BIAS) : "memory", "cc"); return result; diff --git a/include/linux/random.h b/include/linux/random.h index e47e533742b5..3d6e9815cd85 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -95,27 +95,27 @@ static inline void prandom_seed_state(struct rnd_state *state, u64 seed) #ifdef CONFIG_ARCH_RANDOM # include #else -static inline int arch_get_random_long(unsigned long *v) +static inline bool arch_get_random_long(unsigned long *v) { return 0; } -static inline int arch_get_random_int(unsigned int *v) +static inline bool arch_get_random_int(unsigned int *v) { return 0; } -static inline int arch_has_random(void) +static inline bool arch_has_random(void) { return 0; } -static inline int arch_get_random_seed_long(unsigned long *v) +static inline bool arch_get_random_seed_long(unsigned long *v) { return 0; } -static inline int arch_get_random_seed_int(unsigned int *v) +static inline bool arch_get_random_seed_int(unsigned int *v) { return 0; } -static inline int arch_has_random_seed(void) +static inline bool arch_has_random_seed(void) { return 0; } -- cgit v1.2.3 From 18fe58229d80c7f4f138a07e84ba608e1ebd232b Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 8 Jun 2016 12:38:39 -0700 Subject: x86, asm: change the GEN_*_RMWcc() macros to not quote the condition Change the lexical defintion of the GEN_*_RMWcc() macros to not take the condition code as a quoted string. This will help support changing them to use the new __GCC_ASM_FLAG_OUTPUTS__ feature in a subsequent patch. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1465414726-197858-4-git-send-email-hpa@linux.intel.com Reviewed-by: Andy Lutomirski Reviewed-by: Borislav Petkov Acked-by: Peter Zijlstra (Intel) --- arch/x86/include/asm/atomic.h | 8 ++++---- arch/x86/include/asm/atomic64_64.h | 8 ++++---- arch/x86/include/asm/bitops.h | 6 +++--- arch/x86/include/asm/local.h | 8 ++++---- arch/x86/include/asm/preempt.h | 2 +- arch/x86/include/asm/rmwcc.h | 4 ++-- 6 files changed, 18 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 17d881248e6c..7322c1566f63 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -77,7 +77,7 @@ static __always_inline void atomic_sub(int i, atomic_t *v) */ static __always_inline bool atomic_sub_and_test(int i, atomic_t *v) { - GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", "e"); + GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", e); } /** @@ -114,7 +114,7 @@ static __always_inline void atomic_dec(atomic_t *v) */ static __always_inline bool atomic_dec_and_test(atomic_t *v) { - GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e"); + GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", e); } /** @@ -127,7 +127,7 @@ static __always_inline bool atomic_dec_and_test(atomic_t *v) */ static __always_inline bool atomic_inc_and_test(atomic_t *v) { - GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", "e"); + GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", e); } /** @@ -141,7 +141,7 @@ static __always_inline bool atomic_inc_and_test(atomic_t *v) */ static __always_inline bool atomic_add_negative(int i, atomic_t *v) { - GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", "s"); + GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", s); } /** diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 4f881d7f0c39..57bf925710d9 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -72,7 +72,7 @@ static inline void atomic64_sub(long i, atomic64_t *v) */ static inline bool atomic64_sub_and_test(long i, atomic64_t *v) { - GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", "e"); + GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", e); } /** @@ -111,7 +111,7 @@ static __always_inline void atomic64_dec(atomic64_t *v) */ static inline bool atomic64_dec_and_test(atomic64_t *v) { - GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", "e"); + GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", e); } /** @@ -124,7 +124,7 @@ static inline bool atomic64_dec_and_test(atomic64_t *v) */ static inline bool atomic64_inc_and_test(atomic64_t *v) { - GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", "e"); + GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", e); } /** @@ -138,7 +138,7 @@ static inline bool atomic64_inc_and_test(atomic64_t *v) */ static inline bool atomic64_add_negative(long i, atomic64_t *v) { - GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", "s"); + GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", s); } /** diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 8cbb7f495546..ed8f4851262f 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -203,7 +203,7 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr) */ static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr) { - GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", "c"); + GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", c); } /** @@ -249,7 +249,7 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long * */ static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr) { - GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", "c"); + GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", c); } /** @@ -302,7 +302,7 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon */ static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr) { - GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", "c"); + GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", c); } static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr) diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h index 0cdc65b0d14d..7511978093eb 100644 --- a/arch/x86/include/asm/local.h +++ b/arch/x86/include/asm/local.h @@ -52,7 +52,7 @@ static inline void local_sub(long i, local_t *l) */ static inline bool local_sub_and_test(long i, local_t *l) { - GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, "er", i, "%0", "e"); + GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, "er", i, "%0", e); } /** @@ -65,7 +65,7 @@ static inline bool local_sub_and_test(long i, local_t *l) */ static inline bool local_dec_and_test(local_t *l) { - GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, "%0", "e"); + GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, "%0", e); } /** @@ -78,7 +78,7 @@ static inline bool local_dec_and_test(local_t *l) */ static inline bool local_inc_and_test(local_t *l) { - GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, "%0", "e"); + GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, "%0", e); } /** @@ -92,7 +92,7 @@ static inline bool local_inc_and_test(local_t *l) */ static inline bool local_add_negative(long i, local_t *l) { - GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, "er", i, "%0", "s"); + GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, "er", i, "%0", s); } /** diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index d397deb58146..17f218645701 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -81,7 +81,7 @@ static __always_inline void __preempt_count_sub(int val) */ static __always_inline bool __preempt_count_dec_and_test(void) { - GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e"); + GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), e); } /* diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index a15b73d90be3..e3264c414c4a 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -5,7 +5,7 @@ #define __GEN_RMWcc(fullop, var, cc, ...) \ do { \ - asm_volatile_goto (fullop "; j" cc " %l[cc_label]" \ + asm_volatile_goto (fullop "; j" #cc " %l[cc_label]" \ : : "m" (var), ## __VA_ARGS__ \ : "memory" : cc_label); \ return 0; \ @@ -24,7 +24,7 @@ cc_label: \ #define __GEN_RMWcc(fullop, var, cc, ...) \ do { \ bool c; \ - asm volatile (fullop "; set" cc " %1" \ + asm volatile (fullop "; set" #cc " %1" \ : "+m" (var), "=qm" (c) \ : __VA_ARGS__ : "memory"); \ return c; \ -- cgit v1.2.3 From ff3554b409b82d349f71e9d7082648b7b0a1a5bb Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 8 Jun 2016 12:38:40 -0700 Subject: x86, asm: define CC_SET() and CC_OUT() macros The CC_SET() and CC_OUT() macros can be used together to take advantage of the new __GCC_ASM_FLAG_OUTPUTS__ feature in gcc 6+ while remaining backwards compatible. CC_SET() generates a SET instruction on older compilers; CC_OUT() makes sure the output is received in the correct variable. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1465414726-197858-5-git-send-email-hpa@linux.intel.com Reviewed-by: Andy Lutomirski Reviewed-by: Borislav Petkov Acked-by: Peter Zijlstra (Intel) --- arch/x86/include/asm/asm.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index f5063b6659eb..7acb51c49fec 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -42,6 +42,18 @@ #define _ASM_SI __ASM_REG(si) #define _ASM_DI __ASM_REG(di) +/* + * Macros to generate condition code outputs from inline assembly, + * The output operand must be type "bool". + */ +#ifdef __GCC_ASM_FLAG_OUTPUTS__ +# define CC_SET(c) "\n\t/* output condition code " #c "*/\n" +# define CC_OUT(c) "=@cc" #c +#else +# define CC_SET(c) "\n\tset" #c " %[_cc_" #c "]\n" +# define CC_OUT(c) [_cc_ ## c] "=qm" +#endif + /* Exception table entry */ #ifdef __ASSEMBLY__ # define _ASM_EXTABLE_HANDLE(from, to, handler) \ -- cgit v1.2.3 From ba741e356c49bfce0adcfa851080666870867f6b Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 8 Jun 2016 12:38:41 -0700 Subject: x86, asm: change GEN_*_RMWcc() to use CC_SET()/CC_OUT() Change the GEN_*_RMWcc() macros to use the CC_SET()/CC_OUT() macros defined in , and disable the use of asm goto if __GCC_ASM_FLAG_OUTPUTS__ is enabled. This allows gcc to receive the flags output directly in gcc 6+. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1465414726-197858-6-git-send-email-hpa@linux.intel.com Reviewed-by: Andy Lutomirski Reviewed-by: Borislav Petkov Acked-by: Peter Zijlstra (Intel) --- arch/x86/include/asm/rmwcc.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index e3264c414c4a..661dd305694a 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -1,7 +1,9 @@ #ifndef _ASM_X86_RMWcc #define _ASM_X86_RMWcc -#ifdef CC_HAVE_ASM_GOTO +#if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CC_HAVE_ASM_GOTO) + +/* Use asm goto */ #define __GEN_RMWcc(fullop, var, cc, ...) \ do { \ @@ -19,13 +21,15 @@ cc_label: \ #define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \ __GEN_RMWcc(op " %1, " arg0, var, cc, vcon (val)) -#else /* !CC_HAVE_ASM_GOTO */ +#else /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */ + +/* Use flags output or a set instruction */ #define __GEN_RMWcc(fullop, var, cc, ...) \ do { \ bool c; \ - asm volatile (fullop "; set" #cc " %1" \ - : "+m" (var), "=qm" (c) \ + asm volatile (fullop ";" CC_SET(cc) \ + : "+m" (var), CC_OUT(cc) (c) \ : __VA_ARGS__ : "memory"); \ return c; \ } while (0) @@ -36,6 +40,6 @@ do { \ #define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \ __GEN_RMWcc(op " %2, " arg0, var, cc, vcon (val)) -#endif /* CC_HAVE_ASM_GOTO */ +#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */ #endif /* _ASM_X86_RMWcc */ -- cgit v1.2.3 From 86b61240d4c233b440cd29daf0baa440daf4a148 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 8 Jun 2016 12:38:42 -0700 Subject: x86, asm: Use CC_SET()/CC_OUT() in Remove open-coded uses of set instructions to use CC_SET()/CC_OUT() in . Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1465414726-197858-7-git-send-email-hpa@linux.intel.com Reviewed-by: Andy Lutomirski Reviewed-by: Borislav Petkov Acked-by: Peter Zijlstra (Intel) --- arch/x86/include/asm/bitops.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index ed8f4851262f..68557f52b961 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -233,8 +233,8 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long * bool oldbit; asm("bts %2,%1\n\t" - "setc %0" - : "=qm" (oldbit), ADDR + CC_SET(c) + : CC_OUT(c) (oldbit), ADDR : "Ir" (nr)); return oldbit; } @@ -273,8 +273,8 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long bool oldbit; asm volatile("btr %2,%1\n\t" - "setc %0" - : "=qm" (oldbit), ADDR + CC_SET(c) + : CC_OUT(c) (oldbit), ADDR : "Ir" (nr)); return oldbit; } @@ -285,8 +285,8 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon bool oldbit; asm volatile("btc %2,%1\n\t" - "setc %0" - : "=qm" (oldbit), ADDR + CC_SET(c) + : CC_OUT(c) (oldbit), ADDR : "Ir" (nr) : "memory"); return oldbit; @@ -316,8 +316,8 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l bool oldbit; asm volatile("bt %2,%1\n\t" - "setc %0" - : "=qm" (oldbit) + CC_SET(c) + : CC_OUT(c) (oldbit) : "m" (*(unsigned long *)addr), "Ir" (nr)); return oldbit; -- cgit v1.2.3 From 64be6d36f5674f3424d1901772f76e21874f4954 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 8 Jun 2016 12:38:43 -0700 Subject: x86, asm: Use CC_SET()/CC_OUT() in Remove open-coded uses of set instructions to use CC_SET()/CC_OUT() in . Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1465414726-197858-8-git-send-email-hpa@linux.intel.com Reviewed-by: Andy Lutomirski Reviewed-by: Borislav Petkov Acked-by: Peter Zijlstra (Intel) --- arch/x86/include/asm/percpu.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 184d7f3ecb9f..e02e3f80d363 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -511,8 +511,9 @@ do { \ #define x86_test_and_clear_bit_percpu(bit, var) \ ({ \ bool old__; \ - asm volatile("btr %2,"__percpu_arg(1)"\n\tsetc %0" \ - : "=qm" (old__), "+m" (var) \ + asm volatile("btr %2,"__percpu_arg(1)"\n\t" \ + CC_SET(c) \ + : CC_OUT(c) (old__), "+m" (var) \ : "dIr" (bit)); \ old__; \ }) @@ -535,8 +536,8 @@ static inline bool x86_this_cpu_variable_test_bit(int nr, bool oldbit; asm volatile("bt "__percpu_arg(2)",%1\n\t" - "setc %0" - : "=qm" (oldbit) + CC_SET(c) + : CC_OUT(c) (oldbit) : "m" (*(unsigned long *)addr), "Ir" (nr)); return oldbit; -- cgit v1.2.3 From 35ccfb7114e2f0f454f264c049b03c31f4c6bbc0 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 8 Jun 2016 12:38:44 -0700 Subject: x86, asm: Use CC_SET()/CC_OUT() in Remove open-coded uses of set instructions to use CC_SET()/CC_OUT() in . Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1465414726-197858-9-git-send-email-hpa@linux.intel.com Reviewed-by: Andy Lutomirski Reviewed-by: Borislav Petkov Acked-by: Peter Zijlstra (Intel) --- arch/x86/include/asm/rwsem.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index c5087706c02e..1e8be263065e 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -149,10 +149,10 @@ static inline bool __down_write_trylock(struct rw_semaphore *sem) LOCK_PREFIX " cmpxchg %2,%0\n\t" " jnz 1b\n\t" "2:\n\t" - " sete %3\n\t" + CC_SET(e) "# ending __down_write_trylock\n\t" : "+m" (sem->count), "=&a" (tmp0), "=&r" (tmp1), - "=qm" (result) + CC_OUT(e) (result) : "er" (RWSEM_ACTIVE_WRITE_BIAS) : "memory", "cc"); return result; -- cgit v1.2.3 From 66928b4eb92dfb6d87c204238057b9278b36452b Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 8 Jun 2016 12:38:45 -0700 Subject: x86, asm, boot: Use CC_SET()/CC_OUT() in arch/x86/boot/boot.h Remove open-coded uses of set instructions to use CC_SET()/CC_OUT() in arch/x86/boot/boot.h. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1465414726-197858-10-git-send-email-hpa@linux.intel.com Reviewed-by: Andy Lutomirski Reviewed-by: Borislav Petkov Acked-by: Peter Zijlstra (Intel) --- arch/x86/boot/boot.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index 2edb2d53c3a2..7c1495f2b799 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -24,6 +24,7 @@ #include #include #include +#include #include "bitops.h" #include "ctype.h" #include "cpuflags.h" @@ -179,15 +180,15 @@ static inline void wrgs32(u32 v, addr_t addr) static inline bool memcmp_fs(const void *s1, addr_t s2, size_t len) { bool diff; - asm volatile("fs; repe; cmpsb; setnz %0" - : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); + asm volatile("fs; repe; cmpsb" CC_SET(nz) + : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len)); return diff; } static inline bool memcmp_gs(const void *s1, addr_t s2, size_t len) { bool diff; - asm volatile("gs; repe; cmpsb; setnz %0" - : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); + asm volatile("gs; repe; cmpsb" CC_SET(nz) + : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len)); return diff; } -- cgit v1.2.3 From 3b290398638ee4e57f1fb2e35c02005cba9a737f Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 8 Jun 2016 12:38:46 -0700 Subject: x86, asm: Use CC_SET()/CC_OUT() and static_cpu_has() in archrandom.h Use CC_SET()/CC_OUT() and static_cpu_has(). This produces code good enough to eliminate ad hoc use of alternatives in , greatly simplifying the code. While we are at it, make x86_init_rdrand() compile out completely if we don't need it. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1465414726-197858-11-git-send-email-hpa@linux.intel.com v2: fix a conflict between and discovered by Ingo Molnar. There are a few places in x86-specific code where we need all of even when CONFIG_ARCH_RANDOM is disabled, so does not suffice. --- arch/x86/include/asm/archrandom.h | 128 ++++++++++++++++++-------------------- arch/x86/kernel/cpu/rdrand.c | 4 +- 2 files changed, 62 insertions(+), 70 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h index ab6f599ce2fd..5b0579abb398 100644 --- a/arch/x86/include/asm/archrandom.h +++ b/arch/x86/include/asm/archrandom.h @@ -25,8 +25,6 @@ #include #include -#include -#include #define RDRAND_RETRY_LOOPS 10 @@ -40,97 +38,91 @@ # define RDSEED_LONG RDSEED_INT #endif -#ifdef CONFIG_ARCH_RANDOM +/* Unconditional execution of RDRAND and RDSEED */ -/* Instead of arch_get_random_long() when alternatives haven't run. */ static inline bool rdrand_long(unsigned long *v) { - int ok; - asm volatile("1: " RDRAND_LONG "\n\t" - "jc 2f\n\t" - "decl %0\n\t" - "jnz 1b\n\t" - "2:" - : "=r" (ok), "=a" (*v) - : "0" (RDRAND_RETRY_LOOPS)); - return !!ok; + bool ok; + unsigned int retry = RDRAND_RETRY_LOOPS; + do { + asm volatile(RDRAND_LONG "\n\t" + CC_SET(c) + : CC_OUT(c) (ok), "=a" (*v)); + if (ok) + return true; + } while (--retry); + return false; +} + +static inline bool rdrand_int(unsigned int *v) +{ + bool ok; + unsigned int retry = RDRAND_RETRY_LOOPS; + do { + asm volatile(RDRAND_INT "\n\t" + CC_SET(c) + : CC_OUT(c) (ok), "=a" (*v)); + if (ok) + return true; + } while (--retry); + return false; } -/* A single attempt at RDSEED */ static inline bool rdseed_long(unsigned long *v) { bool ok; asm volatile(RDSEED_LONG "\n\t" - "setc %0" - : "=qm" (ok), "=a" (*v)); + CC_SET(c) + : CC_OUT(c) (ok), "=a" (*v)); return ok; } -#define GET_RANDOM(name, type, rdrand, nop) \ -static inline bool name(type *v) \ -{ \ - int ok; \ - alternative_io("movl $0, %0\n\t" \ - nop, \ - "\n1: " rdrand "\n\t" \ - "jc 2f\n\t" \ - "decl %0\n\t" \ - "jnz 1b\n\t" \ - "2:", \ - X86_FEATURE_RDRAND, \ - ASM_OUTPUT2("=r" (ok), "=a" (*v)), \ - "0" (RDRAND_RETRY_LOOPS)); \ - return !!ok; \ -} - -#define GET_SEED(name, type, rdseed, nop) \ -static inline bool name(type *v) \ -{ \ - bool ok; \ - alternative_io("movb $0, %0\n\t" \ - nop, \ - rdseed "\n\t" \ - "setc %0", \ - X86_FEATURE_RDSEED, \ - ASM_OUTPUT2("=q" (ok), "=a" (*v))); \ - return ok; \ +static inline bool rdseed_int(unsigned int *v) +{ + bool ok; + asm volatile(RDSEED_INT "\n\t" + CC_SET(c) + : CC_OUT(c) (ok), "=a" (*v)); + return ok; } -#ifdef CONFIG_X86_64 - -GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP5); -GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP4); - -GET_SEED(arch_get_random_seed_long, unsigned long, RDSEED_LONG, ASM_NOP5); -GET_SEED(arch_get_random_seed_int, unsigned int, RDSEED_INT, ASM_NOP4); - -#else - -GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP3); -GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP3); - -GET_SEED(arch_get_random_seed_long, unsigned long, RDSEED_LONG, ASM_NOP4); -GET_SEED(arch_get_random_seed_int, unsigned int, RDSEED_INT, ASM_NOP4); - -#endif /* CONFIG_X86_64 */ - +/* Conditional execution based on CPU type */ #define arch_has_random() static_cpu_has(X86_FEATURE_RDRAND) #define arch_has_random_seed() static_cpu_has(X86_FEATURE_RDSEED) -#else +/* + * These are the generic interfaces; they must not be declared if the + * stubs in are to be invoked, + * i.e. CONFIG_ARCH_RANDOM is not defined. + */ +#ifdef CONFIG_ARCH_RANDOM -static inline bool rdrand_long(unsigned long *v) +static inline bool arch_get_random_long(unsigned long *v) { - return 0; + return arch_has_random() ? rdrand_long(v) : false; } -static inline bool rdseed_long(unsigned long *v) +static inline bool arch_get_random_int(unsigned int *v) { - return 0; + return arch_has_random() ? rdrand_int(v) : false; } -#endif /* CONFIG_ARCH_RANDOM */ +static inline bool arch_get_random_seed_long(unsigned long *v) +{ + return arch_has_random_seed() ? rdseed_long(v) : false; +} + +static inline bool arch_get_random_seed_int(unsigned int *v) +{ + return arch_has_random_seed() ? rdseed_int(v) : false; +} extern void x86_init_rdrand(struct cpuinfo_x86 *c); +#else /* !CONFIG_ARCH_RANDOM */ + +static inline void x86_init_rdrand(struct cpuinfo_x86 *c) { } + +#endif /* !CONFIG_ARCH_RANDOM */ + #endif /* ASM_X86_ARCHRANDOM_H */ diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c index f6f50c4ceaec..cfa97ff67bda 100644 --- a/arch/x86/kernel/cpu/rdrand.c +++ b/arch/x86/kernel/cpu/rdrand.c @@ -39,9 +39,9 @@ __setup("nordrand", x86_rdrand_setup); */ #define SANITY_CHECK_LOOPS 8 +#ifdef CONFIG_ARCH_RANDOM void x86_init_rdrand(struct cpuinfo_x86 *c) { -#ifdef CONFIG_ARCH_RANDOM unsigned long tmp; int i; @@ -55,5 +55,5 @@ void x86_init_rdrand(struct cpuinfo_x86 *c) return; } } -#endif } +#endif -- cgit v1.2.3 From 0145071b33142cbccffb51486b1ce921677b1d2d Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 2 Jun 2016 14:20:18 +0200 Subject: x86: Do away with ARCH_[WANT_OPTIONAL|REQUIRE]_GPIOLIB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This replaces: - "select ARCH_REQUIRE_GPIOLIB" with "select GPIOLIB" as this can now be selected directly. - "select ARCH_WANT_OPTIONAL_GPIOLIB" with no dependency: GPIOLIB is now selectable by everyone, so we need not declare our intent to select it. When ordering the symbols the following rationale was used: if the selects were in alphabetical order, I moved select GPIOLIB to be in alphabetical order, but if the selects were not maintained in alphabetical order, I just replaced "select ARCH_REQUIRE_GPIOLIB" with "select GPIOLIB". Signed-off-by: Linus Walleij Cc: Michael Büsch Link: http://lkml.kernel.org/r/1464870018-8281-1-git-send-email-linus.walleij@linaro.org Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0a7b885964ba..607382b95372 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -49,7 +49,6 @@ config X86 select ARCH_WANTS_DYNAMIC_TASK_STRUCT select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_IPC_PARSE_VERSION if X86_32 - select ARCH_WANT_OPTIONAL_GPIOLIB select BUILDTIME_EXTABLE_SORT select CLKEVT_I8253 select CLKSRC_I8253 if X86_32 @@ -643,7 +642,7 @@ config STA2X11 select X86_DMA_REMAP select SWIOTLB select MFD_STA2X11 - select ARCH_REQUIRE_GPIOLIB + select GPIOLIB default n ---help--- This adds support for boards based on the STA2X11 IO-Hub, -- cgit v1.2.3 From 7faf90ef995ea470f32f43810266ece8ac8ba6c7 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Thu, 3 Mar 2016 13:01:40 +0100 Subject: lguest: Read length of device_cap later Read the length of the capability with type VIRTIO_PCI_CAP_DEVICE_CFG only when we're sure we're going to need it. Which is just before the check whether the virtio console actually has an emerg_wr field. Signed-off-by: Paul Bolle Cc: Rusty Russell Cc: lguest@lists.ozlabs.org Link: http://lkml.kernel.org/r/1457006501-5377-2-git-send-email-pebolle@tiscali.nl Signed-off-by: Thomas Gleixner --- arch/x86/lguest/boot.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 3847e736702e..146d11c8cf78 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -1258,7 +1258,7 @@ static void probe_pci_console(void) u8 vndr = read_pci_config_byte(0, 1, 0, cap); if (vndr == PCI_CAP_ID_VNDR) { u8 type, bar; - u32 offset, length; + u32 offset; type = read_pci_config_byte(0, 1, 0, cap + offsetof(struct virtio_pci_cap, cfg_type)); @@ -1266,15 +1266,12 @@ static void probe_pci_console(void) cap + offsetof(struct virtio_pci_cap, bar)); offset = read_pci_config(0, 1, 0, cap + offsetof(struct virtio_pci_cap, offset)); - length = read_pci_config(0, 1, 0, - cap + offsetof(struct virtio_pci_cap, length)); switch (type) { case VIRTIO_PCI_CAP_DEVICE_CFG: if (bar == 0) { device_cap = cap; device_offset = offset; - device_len = length; } break; case VIRTIO_PCI_CAP_PCI_CFG: @@ -1297,6 +1294,8 @@ static void probe_pci_console(void) * emerg_wr. If it doesn't support VIRTIO_CONSOLE_F_EMERG_WRITE * it should ignore the access. */ + device_len = read_pci_config(0, 1, 0, + device_cap + offsetof(struct virtio_pci_cap, length)); if (device_len < (offsetof(struct virtio_console_config, emerg_wr) + sizeof(u32))) { printk(KERN_ERR "lguest: console missing emerg_wr field\n"); -- cgit v1.2.3 From cf2cf0f50c14e86e04cda2c684357eed77922666 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Thu, 3 Mar 2016 13:01:41 +0100 Subject: lguest: Read offset of device_cap later Read the offset of the capability with type VIRTIO_PCI_CAP_DEVICE_CFG only when we're sure we're going to need it. Which is when all checks have passed and we know we have a virtio console with an emerg_wr field. Signed-off-by: Paul Bolle Cc: Rusty Russell Cc: lguest@lists.ozlabs.org Link: http://lkml.kernel.org/r/1457006501-5377-3-git-send-email-pebolle@tiscali.nl Signed-off-by: Thomas Gleixner --- arch/x86/lguest/boot.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 146d11c8cf78..25da5bc8d83d 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -1233,8 +1233,6 @@ static void write_bar_via_cfg(u32 cfg_offset, u32 off, u32 val) static void probe_pci_console(void) { u8 cap, common_cap = 0, device_cap = 0; - /* Offset within BAR0 */ - u32 device_offset; u32 device_len; /* Avoid recursive printk into here. */ @@ -1258,21 +1256,16 @@ static void probe_pci_console(void) u8 vndr = read_pci_config_byte(0, 1, 0, cap); if (vndr == PCI_CAP_ID_VNDR) { u8 type, bar; - u32 offset; type = read_pci_config_byte(0, 1, 0, cap + offsetof(struct virtio_pci_cap, cfg_type)); bar = read_pci_config_byte(0, 1, 0, cap + offsetof(struct virtio_pci_cap, bar)); - offset = read_pci_config(0, 1, 0, - cap + offsetof(struct virtio_pci_cap, offset)); switch (type) { case VIRTIO_PCI_CAP_DEVICE_CFG: - if (bar == 0) { + if (bar == 0) device_cap = cap; - device_offset = offset; - } break; case VIRTIO_PCI_CAP_PCI_CFG: console_access_cap = cap; @@ -1302,7 +1295,8 @@ static void probe_pci_console(void) return; } - console_cfg_offset = device_offset; + console_cfg_offset = read_pci_config(0, 1, 0, + device_cap + offsetof(struct virtio_pci_cap, offset)); printk(KERN_INFO "lguest: Console via virtio-pci emerg_wr\n"); } -- cgit v1.2.3 From 4855531eb8582a98cb905d2baf86021254d7a675 Mon Sep 17 00:00:00 2001 From: Rui Wang Date: Wed, 8 Jun 2016 14:59:53 +0800 Subject: x86/ioapic: Simplify ioapic_setup_resources() Optimize the function by removing the variable 'num'. Signed-off-by: Rui Wang Signed-off-by: Thomas Gleixner Cc: tony.luck@intel.com Cc: linux-pci@vger.kernel.org Cc: rjw@rjwysocki.net Cc: linux-acpi@vger.kernel.org Cc: bhelgaas@google.com Link: http://lkml.kernel.org/r/1465369193-4816-4-git-send-email-rui.y.wang@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 446702ed99dc..e58729597a7a 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2567,29 +2567,25 @@ static struct resource * __init ioapic_setup_resources(void) unsigned long n; struct resource *res; char *mem; - int i, num = 0; + int i; - for_each_ioapic(i) - num++; - if (num == 0) + if (nr_ioapics == 0) return NULL; n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource); - n *= num; + n *= nr_ioapics; mem = alloc_bootmem(n); res = (void *)mem; - mem += sizeof(struct resource) * num; + mem += sizeof(struct resource) * nr_ioapics; - num = 0; for_each_ioapic(i) { - res[num].name = mem; - res[num].flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res[i].name = mem; + res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i); mem += IOAPIC_RESOURCE_NAME_SIZE; - ioapics[i].iomem_res = &res[num]; - num++; + ioapics[i].iomem_res = &res[i]; } ioapic_resources = res; -- cgit v1.2.3 From 3c8fad9183ab7b3b3471fd2bb3d604104dd447cb Mon Sep 17 00:00:00 2001 From: Claudio Fontana Date: Thu, 9 Jun 2016 12:31:58 +0200 Subject: x86/apic: Fix misspelled APIC Signed-off-by: Claudio Fontana Signed-off-by: Thomas Gleixner Cc: trivial@kernel.org Link: http://lkml.kernel.org/r/1465468318-19867-1-git-send-email-hw.claudio@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 60078a67d7e3..f943d2f453a4 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2045,7 +2045,7 @@ int generic_processor_info(int apicid, int version) int thiscpu = max + disabled_cpus - 1; pr_warning( - "ACPI: NR_CPUS/possible_cpus limit of %i almost" + "APIC: NR_CPUS/possible_cpus limit of %i almost" " reached. Keeping one slot for boot cpu." " Processor %d/0x%x ignored.\n", max, thiscpu, apicid); @@ -2057,7 +2057,7 @@ int generic_processor_info(int apicid, int version) int thiscpu = max + disabled_cpus; pr_warning( - "ACPI: NR_CPUS/possible_cpus limit of %i reached." + "APIC: NR_CPUS/possible_cpus limit of %i reached." " Processor %d/0x%x ignored.\n", max, thiscpu, apicid); disabled_cpus++; @@ -2085,7 +2085,7 @@ int generic_processor_info(int apicid, int version) if (topology_update_package_map(apicid, cpu) < 0) { int thiscpu = max + disabled_cpus; - pr_warning("ACPI: Package limit reached. Processor %d/0x%x ignored.\n", + pr_warning("APIC: Package limit reached. Processor %d/0x%x ignored.\n", thiscpu, apicid); disabled_cpus++; return -ENOSPC; -- cgit v1.2.3 From 99158f10e91768d34c5004c40c42f802b719bcae Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 24 May 2016 15:48:38 -0700 Subject: x86/xen: Simplify set_aliased_prot() A year ago, via the following commit: aa1acff356bb ("x86/xen: Probe target addresses in set_aliased_prot() before the hypercall") I added an explicit probe to work around a hypercall issue. The code can be simplified by using probe_kernel_read(). No change in functionality. Signed-off-by: Andy Lutomirski Reviewed-by: Andrew Cooper Acked-by: David Vrabel Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: David Vrabel Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jan Beulich Cc: Kees Cook Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: xen-devel Link: http://lkml.kernel.org/r/0706f1a2538e481194514197298cca6b5e3f2638.1464129798.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 760789ae8562..0f87db2cc6a8 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -521,9 +521,7 @@ static void set_aliased_prot(void *v, pgprot_t prot) preempt_disable(); - pagefault_disable(); /* Avoid warnings due to being atomic. */ - __get_user(dummy, (unsigned char __user __force *)v); - pagefault_enable(); + probe_kernel_read(&dummy, v, 1); if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0)) BUG(); -- cgit v1.2.3 From b2de43605410d1970dc9e0f349e399f1d561be13 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 27 May 2016 14:11:06 -0700 Subject: x86/mce: Do not use bank 1 for APEI generated error logs BIOS can report a memory error to Linux using ACPI/APEI mechanism. When it does this, we create a fictitious machine check error record and feed it into the standard mce_log() function. The error record needs a machine check bank number, and for some reason we chose "1" for this. But "1" is a valid bank number, and this causes confusion and heartburn among h/w folks who are concerned that a memory error signature was somehow logged in bank 1. Change to use "-1" (field is a "u8" so will typically print as 255). This should make it clearer that this error did not originate in a machine check bank. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Aristeu Rozanski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Mauro Carvalho Chehab Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-edac Link: http://lkml.kernel.org/r/b7fffb2b326bc1dd150ffceb9919a803f9496e0e.1464805958.git.tony.luck@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce-apei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c index 34c89a3e8260..83f1a98d37db 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-apei.c +++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c @@ -46,7 +46,7 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err) return; mce_setup(&m); - m.bank = 1; + m.bank = -1; /* Fake a memory read error with unknown channel */ m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f; -- cgit v1.2.3 From 2348140d58f4f4245e9635ea8f1a77e940a4d877 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 13 Jun 2016 18:32:44 +0800 Subject: KVM: Fix steal clock warp during guest CPU hotplug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sometimes, after CPU hotplug you can observe a spike in stolen time (100%) followed by the CPU being marked as 100% idle when it's actually busy with a CPU hog task. The trace looks like the following: cpuhp/1-12 [001] d.h1 167.461657: account_process_tick: steal = 1291385514, prev_steal_time = 0 cpuhp/1-12 [001] d.h1 167.461659: account_process_tick: steal_jiffies = 1291 -0 [001] d.h1 167.462663: account_process_tick: steal = 18732255, prev_steal_time = 1291000000 -0 [001] d.h1 167.462664: account_process_tick: steal_jiffies = 18446744072437 The sudden decrease of "steal" causes steal_jiffies to underflow. The root cause is kvm_steal_time being reset to 0 after hot-plugging back in a CPU. Instead, the preexisting value can be used, which is what the core scheduler code expects. John Stultz also reported a similar issue after guest S3. Suggested-by: Paolo Bonzini Signed-off-by: Wanpeng Li Signed-off-by: Peter Zijlstra (Intel) Acked-by: Paolo Bonzini Cc: Frederic Weisbecker Cc: John Stultz Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Radim Krčmář Cc: Rik van Riel Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1465813966-3116-2-git-send-email-wanpeng.li@hotmail.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/kvm.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index eea2a6f72b31..1ef5e48b3a36 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -301,8 +301,6 @@ static void kvm_register_steal_time(void) if (!has_steal_clock) return; - memset(st, 0, sizeof(*st)); - wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED)); pr_info("kvm-stealtime: cpu %d, msr %llx\n", cpu, (unsigned long long) slow_virt_to_phys(st)); -- cgit v1.2.3 From 281ee056e3f27d925350d65e5eb504b1320d7d5a Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 31 May 2016 16:25:27 -0500 Subject: perf/x86/intel/uncore: Remove redundant pci_get_drvdata() Remove redundant pci_get_drvdata() call. There's another call a few lines down, just before we test "box" for NULL. No functional change intended. Signed-off-by: Bjorn Helgaas Signed-off-by: Peter Zijlstra (Intel) Acked-by: Thomas Gleixner Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Vince Weaver Link: http://lkml.kernel.org/r/20160531212527.28718.92371.stgit@bhelgaas-glaptop2.roam.corp.google.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 4e70d2721249..dc965d2cf076 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -974,7 +974,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id static void uncore_pci_remove(struct pci_dev *pdev) { - struct intel_uncore_box *box = pci_get_drvdata(pdev); + struct intel_uncore_box *box; struct intel_uncore_pmu *pmu; int i, phys_id, pkg; -- cgit v1.2.3 From 2c95afc1e83d93fac3be6923465e1753c2c53b0a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 9 Jun 2016 06:14:38 -0700 Subject: perf/x86/intel, watchdog: Switch NMI watchdog to ref cycles on x86 The NMI watchdog uses either the fixed cycles or a generic cycles counter. This causes a lot of conflicts with users of the PMU who want to run a full group including the cycles fixed counter, for example the --topdown support recently added to perf stat. The code needs to fall back to not use groups, which can cause measurement inaccuracy due to multiplexing errors. This patch switches the NMI watchdog to use reference cycles on Intel systems. This is actually more accurate than cycles, because cycles can tick faster than the measured CPU Frequency due to Turbo mode. The ref cycles always tick at their frequency, or slower when the system is idling. That means the NMI watchdog can never expire too early, unlike with cycles. The reference cycles tick roughly at the frequency of the TSC, so the same period computation can be used. Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: acme@kernel.org Cc: jolsa@kernel.org Link: http://lkml.kernel.org/r/1465478079-19993-1-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/hw_nmi.c | 8 ++++++++ include/linux/nmi.h | 1 + kernel/watchdog.c | 7 +++++++ 3 files changed, 16 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index 7788ce643bf4..016f4263fad4 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -18,8 +18,16 @@ #include #include #include +#include #ifdef CONFIG_HARDLOCKUP_DETECTOR +int hw_nmi_get_event(void) +{ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + return PERF_COUNT_HW_REF_CPU_CYCLES; + return PERF_COUNT_HW_CPU_CYCLES; +} + u64 hw_nmi_get_sample_period(int watchdog_thresh) { return (u64)(cpu_khz) * 1000 * watchdog_thresh; diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 4630eeae18e0..79858af27209 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -66,6 +66,7 @@ static inline bool trigger_allbutself_cpu_backtrace(void) #ifdef CONFIG_LOCKUP_DETECTOR u64 hw_nmi_get_sample_period(int watchdog_thresh); +int hw_nmi_get_event(void); extern int nmi_watchdog_enabled; extern int soft_watchdog_enabled; extern int watchdog_user_enabled; diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 9acb29f280ec..8dd30fcd91be 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -315,6 +315,12 @@ static int is_softlockup(unsigned long touch_ts) #ifdef CONFIG_HARDLOCKUP_DETECTOR +/* Can be overriden by architecture */ +__weak int hw_nmi_get_event(void) +{ + return PERF_COUNT_HW_CPU_CYCLES; +} + static struct perf_event_attr wd_hw_attr = { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, @@ -604,6 +610,7 @@ static int watchdog_nmi_enable(unsigned int cpu) wd_attr = &wd_hw_attr; wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh); + wd_attr->config = hw_nmi_get_event(); /* Try to register using hardware perf events */ event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL); -- cgit v1.2.3 From b464d1270a8016edcf1fd20d77cefdecf9b0b73e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 May 2016 14:32:04 +0200 Subject: locking/barriers, tile: Provide TILE specific smp_acquire__after_ctrl_dep() Since TILE doesn't do read speculation, its control dependencies also guarantee LOAD->LOAD order and we don't need the additional RMB otherwise required to provide ACQUIRE semantics. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Chris Metcalf Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/tile/include/asm/barrier.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/tile/include/asm/barrier.h b/arch/tile/include/asm/barrier.h index d55222806c2f..4c419ab95ab7 100644 --- a/arch/tile/include/asm/barrier.h +++ b/arch/tile/include/asm/barrier.h @@ -87,6 +87,13 @@ mb_incoherent(void) #define __smp_mb__after_atomic() __smp_mb() #endif +/* + * The TILE architecture does not do speculative reads; this ensures + * that a control dependency also orders against loads and already provides + * a LOAD->{LOAD,STORE} order and can forgo the additional RMB. + */ +#define smp_acquire__after_ctrl_dep() barrier() + #include #endif /* !__ASSEMBLY__ */ -- cgit v1.2.3 From 726328d92a42b6d4b76078e2659f43067f82c4e8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 26 May 2016 10:35:03 +0200 Subject: locking/spinlock, arch: Update and fix spin_unlock_wait() implementations This patch updates/fixes all spin_unlock_wait() implementations. The update is in semantics; where it previously was only a control dependency, we now upgrade to a full load-acquire to match the store-release from the spin_unlock() we waited on. This ensures that when spin_unlock_wait() returns, we're guaranteed to observe the full critical section we waited on. This fixes a number of spin_unlock_wait() users that (not unreasonably) rely on this. I also fixed a number of ticket lock versions to only wait on the current lock holder, instead of for a full unlock, as this is sufficient. Furthermore; again for ticket locks; I added an smp_rmb() in between the initial ticket load and the spin loop testing the current value because I could not convince myself the address dependency is sufficient, esp. if the loads are of different sizes. I'm more than happy to remove this smp_rmb() again if people are certain the address dependency does indeed work as expected. Note: PPC32 will be fixed independently Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: chris@zankel.net Cc: cmetcalf@mellanox.com Cc: davem@davemloft.net Cc: dhowells@redhat.com Cc: james.hogan@imgtec.com Cc: jejb@parisc-linux.org Cc: linux@armlinux.org.uk Cc: mpe@ellerman.id.au Cc: ralf@linux-mips.org Cc: realmz6@gmail.com Cc: rkuo@codeaurora.org Cc: rth@twiddle.net Cc: schwidefsky@de.ibm.com Cc: tony.luck@intel.com Cc: vgupta@synopsys.com Cc: ysato@users.sourceforge.jp Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/alpha/include/asm/spinlock.h | 9 +++++++-- arch/arc/include/asm/spinlock.h | 7 +++++-- arch/arm/include/asm/spinlock.h | 19 +++++++++++++++++-- arch/blackfin/include/asm/spinlock.h | 5 +++-- arch/hexagon/include/asm/spinlock.h | 10 ++++++++-- arch/ia64/include/asm/spinlock.h | 4 ++++ arch/m32r/include/asm/spinlock.h | 9 +++++++-- arch/metag/include/asm/spinlock.h | 14 ++++++++++++-- arch/mips/include/asm/spinlock.h | 19 +++++++++++++++++-- arch/mn10300/include/asm/spinlock.h | 8 +++++++- arch/parisc/include/asm/spinlock.h | 9 +++++++-- arch/s390/include/asm/spinlock.h | 3 +++ arch/sh/include/asm/spinlock.h | 10 ++++++++-- arch/sparc/include/asm/spinlock_32.h | 7 +++++-- arch/sparc/include/asm/spinlock_64.h | 10 +++++++--- arch/tile/lib/spinlock_32.c | 6 ++++++ arch/tile/lib/spinlock_64.c | 6 ++++++ arch/xtensa/include/asm/spinlock.h | 10 ++++++++-- include/asm-generic/barrier.h | 2 +- include/linux/spinlock_up.h | 10 +++++++--- 20 files changed, 145 insertions(+), 32 deletions(-) (limited to 'arch') diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h index fed9c6f44c19..a40b9fc0c6c3 100644 --- a/arch/alpha/include/asm/spinlock.h +++ b/arch/alpha/include/asm/spinlock.h @@ -3,6 +3,8 @@ #include #include +#include +#include /* * Simple spin lock operations. There are two variants, one clears IRQ's @@ -13,8 +15,11 @@ #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) #define arch_spin_is_locked(x) ((x)->lock != 0) -#define arch_spin_unlock_wait(x) \ - do { cpu_relax(); } while ((x)->lock) + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->lock, !VAL); +} static inline int arch_spin_value_unlocked(arch_spinlock_t lock) { diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h index cded4a9b5438..233d5ffe6ec7 100644 --- a/arch/arc/include/asm/spinlock.h +++ b/arch/arc/include/asm/spinlock.h @@ -15,8 +15,11 @@ #define arch_spin_is_locked(x) ((x)->slock != __ARCH_SPIN_LOCK_UNLOCKED__) #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) -#define arch_spin_unlock_wait(x) \ - do { while (arch_spin_is_locked(x)) cpu_relax(); } while (0) + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->slock, !VAL); +} #ifdef CONFIG_ARC_HAS_LLSC diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index 0fa418463f49..4bec45442072 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -6,6 +6,8 @@ #endif #include +#include +#include /* * sev and wfe are ARMv6K extensions. Uniprocessor ARMv6 may not have the K @@ -50,8 +52,21 @@ static inline void dsb_sev(void) * memory. */ -#define arch_spin_unlock_wait(lock) \ - do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + u16 owner = READ_ONCE(lock->tickets.owner); + + for (;;) { + arch_spinlock_t tmp = READ_ONCE(*lock); + + if (tmp.tickets.owner == tmp.tickets.next || + tmp.tickets.owner != owner) + break; + + wfe(); + } + smp_acquire__after_ctrl_dep(); +} #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) diff --git a/arch/blackfin/include/asm/spinlock.h b/arch/blackfin/include/asm/spinlock.h index 490c7caa02d9..c58f4a83ed6f 100644 --- a/arch/blackfin/include/asm/spinlock.h +++ b/arch/blackfin/include/asm/spinlock.h @@ -12,6 +12,8 @@ #else #include +#include +#include asmlinkage int __raw_spin_is_locked_asm(volatile int *ptr); asmlinkage void __raw_spin_lock_asm(volatile int *ptr); @@ -48,8 +50,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) { - while (arch_spin_is_locked(lock)) - cpu_relax(); + smp_cond_load_acquire(&lock->lock, !VAL); } static inline int arch_read_can_lock(arch_rwlock_t *rw) diff --git a/arch/hexagon/include/asm/spinlock.h b/arch/hexagon/include/asm/spinlock.h index 12ca4ebc0338..a1c55788c5d6 100644 --- a/arch/hexagon/include/asm/spinlock.h +++ b/arch/hexagon/include/asm/spinlock.h @@ -23,6 +23,8 @@ #define _ASM_SPINLOCK_H #include +#include +#include /* * This file is pulled in for SMP builds. @@ -176,8 +178,12 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock) * SMP spinlocks are intended to allow only a single CPU at the lock */ #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) -#define arch_spin_unlock_wait(lock) \ - do {while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->lock, !VAL); +} + #define arch_spin_is_locked(x) ((x)->lock != 0) #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h index 45698cd15b7b..ca9e76149a4a 100644 --- a/arch/ia64/include/asm/spinlock.h +++ b/arch/ia64/include/asm/spinlock.h @@ -15,6 +15,8 @@ #include #include +#include +#include #define arch_spin_lock_init(x) ((x)->lock = 0) @@ -86,6 +88,8 @@ static __always_inline void __ticket_spin_unlock_wait(arch_spinlock_t *lock) return; cpu_relax(); } + + smp_acquire__after_ctrl_dep(); } static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) diff --git a/arch/m32r/include/asm/spinlock.h b/arch/m32r/include/asm/spinlock.h index fa13694eaae3..323c7fc953cd 100644 --- a/arch/m32r/include/asm/spinlock.h +++ b/arch/m32r/include/asm/spinlock.h @@ -13,6 +13,8 @@ #include #include #include +#include +#include /* * Your basic SMP spinlocks, allowing only a single CPU anywhere @@ -27,8 +29,11 @@ #define arch_spin_is_locked(x) (*(volatile int *)(&(x)->slock) <= 0) #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) -#define arch_spin_unlock_wait(x) \ - do { cpu_relax(); } while (arch_spin_is_locked(x)) + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->slock, VAL > 0); +} /** * arch_spin_trylock - Try spin lock and return a result diff --git a/arch/metag/include/asm/spinlock.h b/arch/metag/include/asm/spinlock.h index 86a7cf3d1386..c0c7a22be1ae 100644 --- a/arch/metag/include/asm/spinlock.h +++ b/arch/metag/include/asm/spinlock.h @@ -1,14 +1,24 @@ #ifndef __ASM_SPINLOCK_H #define __ASM_SPINLOCK_H +#include +#include + #ifdef CONFIG_METAG_ATOMICITY_LOCK1 #include #else #include #endif -#define arch_spin_unlock_wait(lock) \ - do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) +/* + * both lock1 and lnkget are test-and-set spinlocks with 0 unlocked and 1 + * locked. + */ + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->lock, !VAL); +} #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h index 40196bebe849..f485afe51514 100644 --- a/arch/mips/include/asm/spinlock.h +++ b/arch/mips/include/asm/spinlock.h @@ -12,6 +12,7 @@ #include #include +#include #include #include @@ -48,8 +49,22 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock) } #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) -#define arch_spin_unlock_wait(x) \ - while (arch_spin_is_locked(x)) { cpu_relax(); } + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + u16 owner = READ_ONCE(lock->h.serving_now); + smp_rmb(); + for (;;) { + arch_spinlock_t tmp = READ_ONCE(*lock); + + if (tmp.h.serving_now == tmp.h.ticket || + tmp.h.serving_now != owner) + break; + + cpu_relax(); + } + smp_acquire__after_ctrl_dep(); +} static inline int arch_spin_is_contended(arch_spinlock_t *lock) { diff --git a/arch/mn10300/include/asm/spinlock.h b/arch/mn10300/include/asm/spinlock.h index 1ae580f38933..9c7b8f7942d8 100644 --- a/arch/mn10300/include/asm/spinlock.h +++ b/arch/mn10300/include/asm/spinlock.h @@ -12,6 +12,8 @@ #define _ASM_SPINLOCK_H #include +#include +#include #include #include @@ -23,7 +25,11 @@ */ #define arch_spin_is_locked(x) (*(volatile signed char *)(&(x)->slock) != 0) -#define arch_spin_unlock_wait(x) do { barrier(); } while (arch_spin_is_locked(x)) + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->slock, !VAL); +} static inline void arch_spin_unlock(arch_spinlock_t *lock) { diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h index 64f2992e439f..e32936cd7f10 100644 --- a/arch/parisc/include/asm/spinlock.h +++ b/arch/parisc/include/asm/spinlock.h @@ -13,8 +13,13 @@ static inline int arch_spin_is_locked(arch_spinlock_t *x) } #define arch_spin_lock(lock) arch_spin_lock_flags(lock, 0) -#define arch_spin_unlock_wait(x) \ - do { cpu_relax(); } while (arch_spin_is_locked(x)) + +static inline void arch_spin_unlock_wait(arch_spinlock_t *x) +{ + volatile unsigned int *a = __ldcw_align(x); + + smp_cond_load_acquire(a, VAL); +} static inline void arch_spin_lock_flags(arch_spinlock_t *x, unsigned long flags) diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 63ebf37d3143..7e9e09f600fa 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -10,6 +10,8 @@ #define __ASM_SPINLOCK_H #include +#include +#include #define SPINLOCK_LOCKVAL (S390_lowcore.spinlock_lockval) @@ -97,6 +99,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) { while (arch_spin_is_locked(lock)) arch_spin_relax(lock); + smp_acquire__after_ctrl_dep(); } /* diff --git a/arch/sh/include/asm/spinlock.h b/arch/sh/include/asm/spinlock.h index bdc0f3b6c56a..416834b60ad0 100644 --- a/arch/sh/include/asm/spinlock.h +++ b/arch/sh/include/asm/spinlock.h @@ -19,14 +19,20 @@ #error "Need movli.l/movco.l for spinlocks" #endif +#include +#include + /* * Your basic SMP spinlocks, allowing only a single CPU anywhere */ #define arch_spin_is_locked(x) ((x)->lock <= 0) #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) -#define arch_spin_unlock_wait(x) \ - do { while (arch_spin_is_locked(x)) cpu_relax(); } while (0) + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->lock, VAL > 0); +} /* * Simple spin lock operations. There are two variants, one clears IRQ's diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h index bcc98fc35281..d9c5876c6121 100644 --- a/arch/sparc/include/asm/spinlock_32.h +++ b/arch/sparc/include/asm/spinlock_32.h @@ -9,12 +9,15 @@ #ifndef __ASSEMBLY__ #include +#include #include /* for cpu_relax */ #define arch_spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0) -#define arch_spin_unlock_wait(lock) \ - do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->lock, !VAL); +} static inline void arch_spin_lock(arch_spinlock_t *lock) { diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h index 968917694978..87990b7c6b0d 100644 --- a/arch/sparc/include/asm/spinlock_64.h +++ b/arch/sparc/include/asm/spinlock_64.h @@ -8,6 +8,9 @@ #ifndef __ASSEMBLY__ +#include +#include + /* To get debugging spinlocks which detect and catch * deadlock situations, set CONFIG_DEBUG_SPINLOCK * and rebuild your kernel. @@ -23,9 +26,10 @@ #define arch_spin_is_locked(lp) ((lp)->lock != 0) -#define arch_spin_unlock_wait(lp) \ - do { rmb(); \ - } while((lp)->lock) +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->lock, !VAL); +} static inline void arch_spin_lock(arch_spinlock_t *lock) { diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c index 88c2a53362e7..076c6cc43113 100644 --- a/arch/tile/lib/spinlock_32.c +++ b/arch/tile/lib/spinlock_32.c @@ -76,6 +76,12 @@ void arch_spin_unlock_wait(arch_spinlock_t *lock) do { delay_backoff(iterations++); } while (READ_ONCE(lock->current_ticket) == curr); + + /* + * The TILE architecture doesn't do read speculation; therefore + * a control dependency guarantees a LOAD->{LOAD,STORE} order. + */ + barrier(); } EXPORT_SYMBOL(arch_spin_unlock_wait); diff --git a/arch/tile/lib/spinlock_64.c b/arch/tile/lib/spinlock_64.c index c8d1f94ff1fe..a4b5b2cbce93 100644 --- a/arch/tile/lib/spinlock_64.c +++ b/arch/tile/lib/spinlock_64.c @@ -76,6 +76,12 @@ void arch_spin_unlock_wait(arch_spinlock_t *lock) do { delay_backoff(iterations++); } while (arch_spin_current(READ_ONCE(lock->lock)) == curr); + + /* + * The TILE architecture doesn't do read speculation; therefore + * a control dependency guarantees a LOAD->{LOAD,STORE} order. + */ + barrier(); } EXPORT_SYMBOL(arch_spin_unlock_wait); diff --git a/arch/xtensa/include/asm/spinlock.h b/arch/xtensa/include/asm/spinlock.h index 1d95fa5dcd10..a36221cf6363 100644 --- a/arch/xtensa/include/asm/spinlock.h +++ b/arch/xtensa/include/asm/spinlock.h @@ -11,6 +11,9 @@ #ifndef _XTENSA_SPINLOCK_H #define _XTENSA_SPINLOCK_H +#include +#include + /* * spinlock * @@ -29,8 +32,11 @@ */ #define arch_spin_is_locked(x) ((x)->slock != 0) -#define arch_spin_unlock_wait(lock) \ - do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->slock, !VAL); +} #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index ab7b0bd7d4dd..fe297b599b0a 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -194,7 +194,7 @@ do { \ }) #endif -#endif +#endif /* CONFIG_SMP */ /* Barriers for virtual machine guests when talking to an SMP host */ #define virt_mb() __smp_mb() diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h index 8b3ac0d718eb..0d9848de677d 100644 --- a/include/linux/spinlock_up.h +++ b/include/linux/spinlock_up.h @@ -6,6 +6,7 @@ #endif #include /* for cpu_relax() */ +#include /* * include/linux/spinlock_up.h - UP-debug version of spinlocks. @@ -25,6 +26,11 @@ #ifdef CONFIG_DEBUG_SPINLOCK #define arch_spin_is_locked(x) ((x)->slock == 0) +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->slock, VAL); +} + static inline void arch_spin_lock(arch_spinlock_t *lock) { lock->slock = 0; @@ -67,6 +73,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) #else /* DEBUG_SPINLOCK */ #define arch_spin_is_locked(lock) ((void)(lock), 0) +#define arch_spin_unlock_wait(lock) do { barrier(); (void)(lock); } while (0) /* for sched/core.c and kernel_lock.c: */ # define arch_spin_lock(lock) do { barrier(); (void)(lock); } while (0) # define arch_spin_lock_flags(lock, flags) do { barrier(); (void)(lock); } while (0) @@ -79,7 +86,4 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) #define arch_read_can_lock(lock) (((void)(lock), 1)) #define arch_write_can_lock(lock) (((void)(lock), 1)) -#define arch_spin_unlock_wait(lock) \ - do { cpu_relax(); } while (arch_spin_is_locked(lock)) - #endif /* __LINUX_SPINLOCK_UP_H */ -- cgit v1.2.3 From f0702555b16d31d61dc758fac6efb994c3fe3ec6 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 9 Jun 2016 13:57:04 -0700 Subject: x86/vdso/32: Assemble sigreturn.S separately sigreturn.S was historically included by the various __kernel_vsyscall implementations due to assumptions about all the 32-bit vDSO images having the sigreturn symbols at the same address. Those assumptions were removed in v3.16, and as of v4.4, there is only a single 32-bit vDSO left. Simplify the build process by assembling sigreturn.S into a normal object file. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/d7b6dfde3c7397aa26977320da90448363b5a7e9.1465505753.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/vdso/Makefile | 5 +++-- arch/x86/entry/vdso/vdso32/sigreturn.S | 8 -------- arch/x86/entry/vdso/vdso32/system_call.S | 7 +------ 3 files changed, 4 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 253b72eaade6..68b63fddc209 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -134,7 +134,7 @@ VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1 override obj-dirs = $(dir $(obj)) $(obj)/vdso32/ targets += vdso32/vdso32.lds -targets += vdso32/note.o vdso32/vclock_gettime.o vdso32/system_call.o +targets += vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o targets += vdso32/vclock_gettime.o KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) -DBUILD_VDSO @@ -156,7 +156,8 @@ $(obj)/vdso32.so.dbg: FORCE \ $(obj)/vdso32/vdso32.lds \ $(obj)/vdso32/vclock_gettime.o \ $(obj)/vdso32/note.o \ - $(obj)/vdso32/system_call.o + $(obj)/vdso32/system_call.o \ + $(obj)/vdso32/sigreturn.o $(call if_changed,vdso) # diff --git a/arch/x86/entry/vdso/vdso32/sigreturn.S b/arch/x86/entry/vdso/vdso32/sigreturn.S index d7ec4e251c0a..20633e026e82 100644 --- a/arch/x86/entry/vdso/vdso32/sigreturn.S +++ b/arch/x86/entry/vdso/vdso32/sigreturn.S @@ -1,11 +1,3 @@ -/* - * Common code for the sigreturn entry points in vDSO images. - * So far this code is the same for both int80 and sysenter versions. - * This file is #include'd by int80.S et al to define them first thing. - * The kernel assumes that the addresses of these routines are constant - * for all vDSO implementations. - */ - #include #include #include diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S index 0109ac6cb79c..ed4bc9731cbb 100644 --- a/arch/x86/entry/vdso/vdso32/system_call.S +++ b/arch/x86/entry/vdso/vdso32/system_call.S @@ -2,16 +2,11 @@ * AT_SYSINFO entry point */ +#include #include #include #include -/* - * First get the common code for the sigreturn entry points. - * This must come first. - */ -#include "sigreturn.S" - .text .globl __kernel_vsyscall .type __kernel_vsyscall,@function -- cgit v1.2.3 From a4455082dc6f0b5d51a23523f77600e8ede47c79 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 8 Jun 2016 10:25:33 -0700 Subject: x86/signals: Add missing signal_compat code for x86 features The 32-bit siginfo is a different binary format than the 64-bit one. So, when running 32-bit binaries on 64-bit kernels, we have to convert the kernel's 64-bit version to a 32-bit version that userspace can grok. We've added a few features to siginfo over the past few years and neglected to add them to arch/x86/kernel/signal_compat.c: 1. The si_addr_lsb used in SIGBUS's sent for machine checks 2. The upper/lower bounds for MPX SIGSEGV faults 3. The protection key for pkey faults I caught this with some protection keys unit tests and realized it affected a few more features. This was tested only with my protection keys patch that looks for a proper value in si_pkey. I didn't actually test the machine check or MPX code. Signed-off-by: Dave Hansen Cc: Al Viro Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-edac@vger.kernel.org Link: http://lkml.kernel.org/r/20160608172533.F8F05637@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/compat.h | 11 +++++++++++ arch/x86/kernel/signal_compat.c | 15 +++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 5a3b2c119ed0..a18806165fe4 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -40,6 +40,7 @@ typedef s32 compat_long_t; typedef s64 __attribute__((aligned(4))) compat_s64; typedef u32 compat_uint_t; typedef u32 compat_ulong_t; +typedef u32 compat_u32; typedef u64 __attribute__((aligned(4))) compat_u64; typedef u32 compat_uptr_t; @@ -181,6 +182,16 @@ typedef struct compat_siginfo { /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ struct { unsigned int _addr; /* faulting insn/memory ref. */ + short int _addr_lsb; /* Valid LSB of the reported address. */ + union { + /* used when si_code=SEGV_BNDERR */ + struct { + compat_uptr_t _lower; + compat_uptr_t _upper; + } _addr_bnd; + /* used when si_code=SEGV_PKUERR */ + compat_u32 _pkey; + }; } _sigfault; /* SIGPOLL */ diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index dc3c0b1c816f..5335ad96a290 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -32,6 +32,21 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) &to->_sifields._pad[0]); switch (from->si_code >> 16) { case __SI_FAULT >> 16: + if (from->si_signo == SIGBUS && + (from->si_code == BUS_MCEERR_AR || + from->si_code == BUS_MCEERR_AO)) + put_user_ex(from->si_addr_lsb, &to->si_addr_lsb); + + if (from->si_signo == SIGSEGV) { + if (from->si_code == SEGV_BNDERR) { + compat_uptr_t lower = (unsigned long)&to->si_lower; + compat_uptr_t upper = (unsigned long)&to->si_upper; + put_user_ex(lower, &to->si_lower); + put_user_ex(upper, &to->si_upper); + } + if (from->si_code == SEGV_PKUERR) + put_user_ex(from->si_pkey, &to->si_pkey); + } break; case __SI_SYS >> 16: put_user_ex(from->si_syscall, &to->si_syscall); -- cgit v1.2.3 From 02e8fda2cc00419a11cf38199afea4c0d7172be8 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 8 Jun 2016 10:25:34 -0700 Subject: x86/signals: Add build-time checks to the siginfo compat code There were at least 3 features added to the __SI_FAULT area of the siginfo struct that did not make it to the compat siginfo: 1. The si_addr_lsb used in SIGBUS's sent for machine checks 2. The upper/lower bounds for MPX SIGSEGV faults 3. The protection key for pkey faults There was also some turmoil when I was attempting to add the pkey field because it needs to be a fixed size on 32 and 64-bit and not have any alignment constraints. This patch adds some compile-time checks to the compat code to make it harder to screw this up. Basically, the checks are supposed to trip any time someone changes the siginfo structure. That sounds bad, but it's what we want. If someone changes siginfo, we want them to also be _forced_ to go look at the compat code. The details are in the comments. Signed-off-by: Dave Hansen Cc: Al Viro Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-edac@vger.kernel.org Link: http://lkml.kernel.org/r/20160608172534.C73DAFC3@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_compat.c | 93 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index 5335ad96a290..b44564bf86a8 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -1,11 +1,104 @@ #include #include +/* + * The compat_siginfo_t structure and handing code is very easy + * to break in several ways. It must always be updated when new + * updates are made to the main siginfo_t, and + * copy_siginfo_to_user32() must be updated when the + * (arch-independent) copy_siginfo_to_user() is updated. + * + * It is also easy to put a new member in the compat_siginfo_t + * which has implicit alignment which can move internal structure + * alignment around breaking the ABI. This can happen if you, + * for instance, put a plain 64-bit value in there. + */ +static inline void signal_compat_build_tests(void) +{ + int _sifields_offset = offsetof(compat_siginfo_t, _sifields); + + /* + * If adding a new si_code, there is probably new data in + * the siginfo. Make sure folks bumping the si_code + * limits also have to look at this code. Make sure any + * new fields are handled in copy_siginfo_to_user32()! + */ + BUILD_BUG_ON(NSIGILL != 8); + BUILD_BUG_ON(NSIGFPE != 8); + BUILD_BUG_ON(NSIGSEGV != 4); + BUILD_BUG_ON(NSIGBUS != 5); + BUILD_BUG_ON(NSIGTRAP != 4); + BUILD_BUG_ON(NSIGCHLD != 6); + BUILD_BUG_ON(NSIGSYS != 1); + + /* This is part of the ABI and can never change in size: */ + BUILD_BUG_ON(sizeof(compat_siginfo_t) != 128); + /* + * The offsets of all the (unioned) si_fields are fixed + * in the ABI, of course. Make sure none of them ever + * move and are always at the beginning: + */ + BUILD_BUG_ON(offsetof(compat_siginfo_t, _sifields) != 3 * sizeof(int)); +#define CHECK_CSI_OFFSET(name) BUILD_BUG_ON(_sifields_offset != offsetof(compat_siginfo_t, _sifields.name)) + + /* + * Ensure that the size of each si_field never changes. + * If it does, it is a sign that the + * copy_siginfo_to_user32() code below needs to updated + * along with the size in the CHECK_SI_SIZE(). + * + * We repeat this check for both the generic and compat + * siginfos. + * + * Note: it is OK for these to grow as long as the whole + * structure stays within the padding size (checked + * above). + */ +#define CHECK_CSI_SIZE(name, size) BUILD_BUG_ON(size != sizeof(((compat_siginfo_t *)0)->_sifields.name)) +#define CHECK_SI_SIZE(name, size) BUILD_BUG_ON(size != sizeof(((siginfo_t *)0)->_sifields.name)) + + CHECK_CSI_OFFSET(_kill); + CHECK_CSI_SIZE (_kill, 2*sizeof(int)); + CHECK_SI_SIZE (_kill, 2*sizeof(int)); + + CHECK_CSI_OFFSET(_timer); + CHECK_CSI_SIZE (_timer, 5*sizeof(int)); + CHECK_SI_SIZE (_timer, 6*sizeof(int)); + + CHECK_CSI_OFFSET(_rt); + CHECK_CSI_SIZE (_rt, 3*sizeof(int)); + CHECK_SI_SIZE (_rt, 4*sizeof(int)); + + CHECK_CSI_OFFSET(_sigchld); + CHECK_CSI_SIZE (_sigchld, 5*sizeof(int)); + CHECK_SI_SIZE (_sigchld, 8*sizeof(int)); + + CHECK_CSI_OFFSET(_sigchld_x32); + CHECK_CSI_SIZE (_sigchld_x32, 7*sizeof(int)); + /* no _sigchld_x32 in the generic siginfo_t */ + + CHECK_CSI_OFFSET(_sigfault); + CHECK_CSI_SIZE (_sigfault, 4*sizeof(int)); + CHECK_SI_SIZE (_sigfault, 8*sizeof(int)); + + CHECK_CSI_OFFSET(_sigpoll); + CHECK_CSI_SIZE (_sigpoll, 2*sizeof(int)); + CHECK_SI_SIZE (_sigpoll, 4*sizeof(int)); + + CHECK_CSI_OFFSET(_sigsys); + CHECK_CSI_SIZE (_sigsys, 3*sizeof(int)); + CHECK_SI_SIZE (_sigsys, 4*sizeof(int)); + + /* any new si_fields should be added here */ +} + int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) { int err = 0; bool ia32 = test_thread_flag(TIF_IA32); + signal_compat_build_tests(); + if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t))) return -EFAULT; -- cgit v1.2.3 From bb27570525a71f48347ed0e0c265063e7952bb61 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 13 Jun 2016 21:28:00 +0300 Subject: x86/platform/intel_mid_pci: Rework IRQ0 workaround On Intel Merrifield platform several PCI devices have a bogus configuration, i.e. the IRQ0 had been assigned to few of them. These are PCI root bridge, eMMC0, HS UART common registers, PWM, and HDMI. The actual interrupt line can be allocated to one device exclusively, in our case to eMMC0, the rest should cope without it and basically known drivers for them are not using interrupt line at all. Rework IRQ0 workaround, which was previously done to avoid conflict between eMMC0 and HS UART common registers, to behave differently based on the device in question, i.e. allocate interrupt line to eMMC0, but silently skip interrupt allocation for the rest except HS UART common registers which are not used anyway. With this rework IOSF MBI driver in particular would be used. Signed-off-by: Andy Shevchenko Acked-by: Thomas Gleixner Cc: Bjorn Helgaas Cc: Linus Torvalds Cc: Peter Zijlstra Fixes: 39d9b77b8deb ("x86/pci/intel_mid_pci: Work around for IRQ0 assignment") Link: http://lkml.kernel.org/r/1465842481-136852-1-git-send-email-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/pci/intel_mid_pci.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c index 8b93e634af84..ae97f24a4371 100644 --- a/arch/x86/pci/intel_mid_pci.c +++ b/arch/x86/pci/intel_mid_pci.c @@ -37,6 +37,7 @@ /* Quirks for the listed devices */ #define PCI_DEVICE_ID_INTEL_MRFL_MMC 0x1190 +#define PCI_DEVICE_ID_INTEL_MRFL_HSU 0x1191 /* Fixed BAR fields */ #define PCIE_VNDR_CAP_ID_FIXED_BAR 0x00 /* Fixed BAR (TBD) */ @@ -224,14 +225,21 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev) /* Special treatment for IRQ0 */ if (dev->irq == 0) { + /* + * Skip HS UART common registers device since it has + * IRQ0 assigned and not used by the kernel. + */ + if (dev->device == PCI_DEVICE_ID_INTEL_MRFL_HSU) + return -EBUSY; /* * TNG has IRQ0 assigned to eMMC controller. But there * are also other devices with bogus PCI configuration * that have IRQ0 assigned. This check ensures that - * eMMC gets it. + * eMMC gets it. The rest of devices still could be + * enabled without interrupt line being allocated. */ if (dev->device != PCI_DEVICE_ID_INTEL_MRFL_MMC) - return -EBUSY; + return 0; } break; default: -- cgit v1.2.3 From 9485f8b6a75921e1b9e94b001cdb45872a598534 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 13 Jun 2016 21:28:01 +0300 Subject: x86/platform/atom/punit: Enable support for Merrifield Intel Merrifield platform has Punit generation that somehow compatible to what is already supported by punit_atom_debug driver. Add necessary bits to enable that support. Signed-off-by: Andy Shevchenko Acked-by: Thomas Gleixner Cc: Bjorn Helgaas Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1465842481-136852-2-git-send-email-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/platform/atom/punit_atom_debug.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/platform/atom/punit_atom_debug.c b/arch/x86/platform/atom/punit_atom_debug.c index 109782996867..8ff7b9355416 100644 --- a/arch/x86/platform/atom/punit_atom_debug.c +++ b/arch/x86/platform/atom/punit_atom_debug.c @@ -26,8 +26,6 @@ #include #include -/* Power gate status reg */ -#define PWRGT_STATUS 0x61 /* Subsystem config/status Video processor */ #define VED_SS_PM0 0x32 /* Subsystem config/status ISP (Image Signal Processor) */ @@ -36,12 +34,16 @@ #define MIO_SS_PM 0x3B /* Shift bits for getting status for video, isp and i/o */ #define SSS_SHIFT 24 + +/* Power gate status reg */ +#define PWRGT_STATUS 0x61 /* Shift bits for getting status for graphics rendering */ #define RENDER_POS 0 /* Shift bits for getting status for media control */ #define MEDIA_POS 2 /* Shift bits for getting status for Valley View/Baytrail display */ #define VLV_DISPLAY_POS 6 + /* Subsystem config/status display for Cherry Trail SOC */ #define CHT_DSP_SSS 0x36 /* Shift bits for getting status for display */ @@ -53,6 +55,14 @@ struct punit_device { int sss_pos; }; +static const struct punit_device punit_device_tng[] = { + { "DISPLAY", CHT_DSP_SSS, SSS_SHIFT }, + { "VED", VED_SS_PM0, SSS_SHIFT }, + { "ISP", ISP_SS_PM0, SSS_SHIFT }, + { "MIO", MIO_SS_PM, SSS_SHIFT }, + { NULL } +}; + static const struct punit_device punit_device_byt[] = { { "GFX RENDER", PWRGT_STATUS, RENDER_POS }, { "GFX MEDIA", PWRGT_STATUS, MEDIA_POS }, @@ -145,6 +155,7 @@ static void punit_dbgfs_unregister(void) static const struct x86_cpu_id intel_punit_cpu_ids[] = { ICPU(INTEL_FAM6_ATOM_SILVERMONT1, punit_device_byt), + ICPU(INTEL_FAM6_ATOM_MERRIFIELD1, punit_device_tng), ICPU(INTEL_FAM6_ATOM_AIRMONT, punit_device_cht), {} }; -- cgit v1.2.3 From 5823d0893ec284f37902e2ecd332dbb396a143d1 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 14 Jun 2016 21:29:45 +0300 Subject: x86/platform/intel-mid: Add Power Management Unit driver Add Power Management Unit driver to handle power states of South Complex devices on Intel Tangier. In the future it might be expanded to cover North Complex devices as well. With this driver the power state of the host controllers such as SPI, I2C, UART, eMMC, and DMA would be managed. Signed-off-by: Andy Shevchenko Cc: Bjorn Helgaas Cc: David Cohen Cc: Linus Torvalds Cc: Mika Westerberg Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-pci@vger.kernel.org Link: http://lkml.kernel.org/r/1465928985-12113-1-git-send-email-andriy.shevchenko@linux.intel.com [ Minor readability edits. ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/intel-mid.h | 8 + arch/x86/pci/intel_mid_pci.c | 40 +++- arch/x86/platform/intel-mid/Makefile | 2 +- arch/x86/platform/intel-mid/pwr.c | 416 +++++++++++++++++++++++++++++++++++ drivers/pci/Makefile | 3 + drivers/pci/pci-mid.c | 77 +++++++ 6 files changed, 540 insertions(+), 6 deletions(-) create mode 100644 arch/x86/platform/intel-mid/pwr.c create mode 100644 drivers/pci/pci-mid.c (limited to 'arch') diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h index 7c5af123bdbd..38498a4fb44f 100644 --- a/arch/x86/include/asm/intel-mid.h +++ b/arch/x86/include/asm/intel-mid.h @@ -12,9 +12,17 @@ #define _ASM_X86_INTEL_MID_H #include +#include #include extern int intel_mid_pci_init(void); +extern int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state); + +#define INTEL_MID_PWR_LSS_OFFSET 4 +#define INTEL_MID_PWR_LSS_TYPE (1 << 7) + +extern int intel_mid_pwr_get_lss_id(struct pci_dev *pdev); + extern int get_gpio_by_name(const char *name); extern void intel_scu_device_register(struct platform_device *pdev); extern int __init sfi_parse_mrtc(struct sfi_table_header *table); diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c index ae97f24a4371..a9710433be4d 100644 --- a/arch/x86/pci/intel_mid_pci.c +++ b/arch/x86/pci/intel_mid_pci.c @@ -316,14 +316,44 @@ static void pci_d3delay_fixup(struct pci_dev *dev) } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_d3delay_fixup); -static void mrst_power_off_unused_dev(struct pci_dev *dev) +static void mid_power_off_dev(struct pci_dev *dev) { + u16 pmcsr; + + /* + * Update current state first, otherwise PCI core enforces PCI_D0 in + * pci_set_power_state() for devices which status was PCI_UNKNOWN. + */ + pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr); + dev->current_state = (pci_power_t __force)(pmcsr & PCI_PM_CTRL_STATE_MASK); + pci_set_power_state(dev, PCI_D3hot); } -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0801, mrst_power_off_unused_dev); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0809, mrst_power_off_unused_dev); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x080C, mrst_power_off_unused_dev); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0815, mrst_power_off_unused_dev); + +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0801, mid_power_off_dev); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0809, mid_power_off_dev); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x080C, mid_power_off_dev); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0815, mid_power_off_dev); + +static void mrfld_power_off_dev(struct pci_dev *dev) +{ + int id; + + if (!pci_soc_mode) + return; + + id = intel_mid_pwr_get_lss_id(dev); + if (id < 0) + return; + + /* + * This sets only PMCSR bits. The actual power off will happen in + * arch/x86/platform/intel-mid/pwr.c. + */ + mid_power_off_dev(dev); +} + +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, mrfld_power_off_dev); /* * Langwell devices reside at fixed offsets, don't try to move them. diff --git a/arch/x86/platform/intel-mid/Makefile b/arch/x86/platform/intel-mid/Makefile index 0ce1b1913673..aebb5b9ea80a 100644 --- a/arch/x86/platform/intel-mid/Makefile +++ b/arch/x86/platform/intel-mid/Makefile @@ -1,4 +1,4 @@ -obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o intel_mid_vrtc.o mfld.o mrfl.o +obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o intel_mid_vrtc.o mfld.o mrfl.o pwr.o # SFI specific code ifdef CONFIG_X86_INTEL_MID diff --git a/arch/x86/platform/intel-mid/pwr.c b/arch/x86/platform/intel-mid/pwr.c new file mode 100644 index 000000000000..59faf05d23f5 --- /dev/null +++ b/arch/x86/platform/intel-mid/pwr.c @@ -0,0 +1,416 @@ +/* + * Intel MID Power Management Unit (PWRMU) device driver + * + * Copyright (C) 2016, Intel Corporation + * + * Author: Andy Shevchenko + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * Intel MID Power Management Unit device driver handles the South Complex PCI + * devices such as GPDMA, SPI, I2C, PWM, and so on. By default PCI core + * modifies bits in PMCSR register in the PCI configuration space. This is not + * enough on some SoCs like Intel Tangier. In such case PCI core sets a new + * power state of the device in question through a PM hook registered in struct + * pci_platform_pm_ops (see drivers/pci/pci-mid.c). + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* Registers */ +#define PM_STS 0x00 +#define PM_CMD 0x04 +#define PM_ICS 0x08 +#define PM_WKC(x) (0x10 + (x) * 4) +#define PM_WKS(x) (0x18 + (x) * 4) +#define PM_SSC(x) (0x20 + (x) * 4) +#define PM_SSS(x) (0x30 + (x) * 4) + +/* Bits in PM_STS */ +#define PM_STS_BUSY (1 << 8) + +/* Bits in PM_CMD */ +#define PM_CMD_CMD(x) ((x) << 0) +#define PM_CMD_IOC (1 << 8) +#define PM_CMD_D3cold (1 << 21) + +/* List of commands */ +#define CMD_SET_CFG 0x01 + +/* Bits in PM_ICS */ +#define PM_ICS_INT_STATUS(x) ((x) & 0xff) +#define PM_ICS_IE (1 << 8) +#define PM_ICS_IP (1 << 9) +#define PM_ICS_SW_INT_STS (1 << 10) + +/* List of interrupts */ +#define INT_INVALID 0 +#define INT_CMD_COMPLETE 1 +#define INT_CMD_ERR 2 +#define INT_WAKE_EVENT 3 +#define INT_LSS_POWER_ERR 4 +#define INT_S0iX_MSG_ERR 5 +#define INT_NO_C6 6 +#define INT_TRIGGER_ERR 7 +#define INT_INACTIVITY 8 + +/* South Complex devices */ +#define LSS_MAX_SHARED_DEVS 4 +#define LSS_MAX_DEVS 64 + +#define LSS_WS_BITS 1 /* wake state width */ +#define LSS_PWS_BITS 2 /* power state width */ + +/* Supported device IDs */ +#define PCI_DEVICE_ID_TANGIER 0x11a1 + +struct mid_pwr_dev { + struct pci_dev *pdev; + pci_power_t state; +}; + +struct mid_pwr { + struct device *dev; + void __iomem *regs; + int irq; + bool available; + + struct mutex lock; + struct mid_pwr_dev lss[LSS_MAX_DEVS][LSS_MAX_SHARED_DEVS]; +}; + +static struct mid_pwr *midpwr; + +static u32 mid_pwr_get_state(struct mid_pwr *pwr, int reg) +{ + return readl(pwr->regs + PM_SSS(reg)); +} + +static void mid_pwr_set_state(struct mid_pwr *pwr, int reg, u32 value) +{ + writel(value, pwr->regs + PM_SSC(reg)); +} + +static void mid_pwr_set_wake(struct mid_pwr *pwr, int reg, u32 value) +{ + writel(value, pwr->regs + PM_WKC(reg)); +} + +static void mid_pwr_interrupt_disable(struct mid_pwr *pwr) +{ + writel(~PM_ICS_IE, pwr->regs + PM_ICS); +} + +static bool mid_pwr_is_busy(struct mid_pwr *pwr) +{ + return !!(readl(pwr->regs + PM_STS) & PM_STS_BUSY); +} + +/* Wait 500ms that the latest PWRMU command finished */ +static int mid_pwr_wait(struct mid_pwr *pwr) +{ + unsigned int count = 500000; + bool busy; + + do { + busy = mid_pwr_is_busy(pwr); + if (!busy) + return 0; + udelay(1); + } while (--count); + + return -EBUSY; +} + +static int mid_pwr_wait_for_cmd(struct mid_pwr *pwr, u8 cmd) +{ + writel(PM_CMD_CMD(cmd), pwr->regs + PM_CMD); + return mid_pwr_wait(pwr); +} + +static int __update_power_state(struct mid_pwr *pwr, int reg, int bit, int new) +{ + int curstate; + u32 power; + int ret; + + /* Check if the device is already in desired state */ + power = mid_pwr_get_state(pwr, reg); + curstate = (power >> bit) & 3; + if (curstate == new) + return 0; + + /* Update the power state */ + mid_pwr_set_state(pwr, reg, (power & ~(3 << bit)) | (new << bit)); + + /* Send command to SCU */ + ret = mid_pwr_wait_for_cmd(pwr, CMD_SET_CFG); + if (ret) + return ret; + + /* Check if the device is already in desired state */ + power = mid_pwr_get_state(pwr, reg); + curstate = (power >> bit) & 3; + if (curstate != new) + return -EAGAIN; + + return 0; +} + +static pci_power_t __find_weakest_power_state(struct mid_pwr_dev *lss, + struct pci_dev *pdev, + pci_power_t state) +{ + pci_power_t weakest = PCI_D3hot; + unsigned int j; + + /* Find device in cache or first free cell */ + for (j = 0; j < LSS_MAX_SHARED_DEVS; j++) { + if (lss[j].pdev == pdev || !lss[j].pdev) + break; + } + + /* Store the desired state in cache */ + if (j < LSS_MAX_SHARED_DEVS) { + lss[j].pdev = pdev; + lss[j].state = state; + } else { + dev_WARN(&pdev->dev, "No room for device in PWRMU LSS cache\n"); + weakest = state; + } + + /* Find the power state we may use */ + for (j = 0; j < LSS_MAX_SHARED_DEVS; j++) { + if (lss[j].state < weakest) + weakest = lss[j].state; + } + + return weakest; +} + +static int __set_power_state(struct mid_pwr *pwr, struct pci_dev *pdev, + pci_power_t state, int id, int reg, int bit) +{ + const char *name; + int ret; + + state = __find_weakest_power_state(pwr->lss[id], pdev, state); + name = pci_power_name(state); + + ret = __update_power_state(pwr, reg, bit, (__force int)state); + if (ret) { + dev_warn(&pdev->dev, "Can't set power state %s: %d\n", name, ret); + return ret; + } + + dev_vdbg(&pdev->dev, "Set power state %s\n", name); + return 0; +} + +static int mid_pwr_set_power_state(struct mid_pwr *pwr, struct pci_dev *pdev, + pci_power_t state) +{ + int id, reg, bit; + int ret; + + id = intel_mid_pwr_get_lss_id(pdev); + if (id < 0) + return id; + + reg = (id * LSS_PWS_BITS) / 32; + bit = (id * LSS_PWS_BITS) % 32; + + /* We support states between PCI_D0 and PCI_D3hot */ + if (state < PCI_D0) + state = PCI_D0; + if (state > PCI_D3hot) + state = PCI_D3hot; + + mutex_lock(&pwr->lock); + ret = __set_power_state(pwr, pdev, state, id, reg, bit); + mutex_unlock(&pwr->lock); + return ret; +} + +int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state) +{ + struct mid_pwr *pwr = midpwr; + int ret = 0; + + might_sleep(); + + if (pwr && pwr->available) + ret = mid_pwr_set_power_state(pwr, pdev, state); + dev_vdbg(&pdev->dev, "set_power_state() returns %d\n", ret); + + return 0; +} +EXPORT_SYMBOL_GPL(intel_mid_pci_set_power_state); + +int intel_mid_pwr_get_lss_id(struct pci_dev *pdev) +{ + int vndr; + u8 id; + + /* + * Mapping to PWRMU index is kept in the Logical SubSystem ID byte of + * Vendor capability. + */ + vndr = pci_find_capability(pdev, PCI_CAP_ID_VNDR); + if (!vndr) + return -EINVAL; + + /* Read the Logical SubSystem ID byte */ + pci_read_config_byte(pdev, vndr + INTEL_MID_PWR_LSS_OFFSET, &id); + if (!(id & INTEL_MID_PWR_LSS_TYPE)) + return -ENODEV; + + id &= ~INTEL_MID_PWR_LSS_TYPE; + if (id >= LSS_MAX_DEVS) + return -ERANGE; + + return id; +} + +static irqreturn_t mid_pwr_irq_handler(int irq, void *dev_id) +{ + struct mid_pwr *pwr = dev_id; + u32 ics; + + ics = readl(pwr->regs + PM_ICS); + if (!(ics & PM_ICS_IP)) + return IRQ_NONE; + + writel(ics | PM_ICS_IP, pwr->regs + PM_ICS); + + dev_warn(pwr->dev, "Unexpected IRQ: %#x\n", PM_ICS_INT_STATUS(ics)); + return IRQ_HANDLED; +} + +struct mid_pwr_device_info { + int (*set_initial_state)(struct mid_pwr *pwr); +}; + +static int mid_pwr_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct mid_pwr_device_info *info = (void *)id->driver_data; + struct device *dev = &pdev->dev; + struct mid_pwr *pwr; + int ret; + + ret = pcim_enable_device(pdev); + if (ret < 0) { + dev_err(&pdev->dev, "error: could not enable device\n"); + return ret; + } + + ret = pcim_iomap_regions(pdev, 1 << 0, pci_name(pdev)); + if (ret) { + dev_err(&pdev->dev, "I/O memory remapping failed\n"); + return ret; + } + + pwr = devm_kzalloc(dev, sizeof(*pwr), GFP_KERNEL); + if (!pwr) + return -ENOMEM; + + pwr->dev = dev; + pwr->regs = pcim_iomap_table(pdev)[0]; + pwr->irq = pdev->irq; + + mutex_init(&pwr->lock); + + /* Disable interrupts */ + mid_pwr_interrupt_disable(pwr); + + if (info && info->set_initial_state) { + ret = info->set_initial_state(pwr); + if (ret) + dev_warn(dev, "Can't set initial state: %d\n", ret); + } + + ret = devm_request_irq(dev, pdev->irq, mid_pwr_irq_handler, + IRQF_NO_SUSPEND, pci_name(pdev), pwr); + if (ret) + return ret; + + pwr->available = true; + midpwr = pwr; + + pci_set_drvdata(pdev, pwr); + return 0; +} + +static int tng_set_initial_state(struct mid_pwr *pwr) +{ + unsigned int i, j; + int ret; + + /* + * Enable wake events. + * + * PWRMU supports up to 32 sources for wake up the system. Ungate them + * all here. + */ + mid_pwr_set_wake(pwr, 0, 0xffffffff); + mid_pwr_set_wake(pwr, 1, 0xffffffff); + + /* + * Power off South Complex devices. + * + * There is a map (see a note below) of 64 devices with 2 bits per each + * on 32-bit HW registers. The following calls set all devices to one + * known initial state, i.e. PCI_D3hot. This is done in conjunction + * with PMCSR setting in arch/x86/pci/intel_mid_pci.c. + * + * NOTE: The actual device mapping is provided by a platform at run + * time using vendor capability of PCI configuration space. + */ + mid_pwr_set_state(pwr, 0, 0xffffffff); + mid_pwr_set_state(pwr, 1, 0xffffffff); + mid_pwr_set_state(pwr, 2, 0xffffffff); + mid_pwr_set_state(pwr, 3, 0xffffffff); + + /* Send command to SCU */ + ret = mid_pwr_wait_for_cmd(pwr, CMD_SET_CFG); + if (ret) + return ret; + + for (i = 0; i < LSS_MAX_DEVS; i++) { + for (j = 0; j < LSS_MAX_SHARED_DEVS; j++) + pwr->lss[i][j].state = PCI_D3hot; + } + + return 0; +} + +static const struct mid_pwr_device_info tng_info = { + .set_initial_state = tng_set_initial_state, +}; + +static const struct pci_device_id mid_pwr_pci_ids[] = { + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_TANGIER), (kernel_ulong_t)&tng_info }, + {} +}; +MODULE_DEVICE_TABLE(pci, mid_pwr_pci_ids); + +static struct pci_driver mid_pwr_pci_driver = { + .name = "intel_mid_pwr", + .probe = mid_pwr_probe, + .id_table = mid_pwr_pci_ids, +}; + +builtin_pci_driver(mid_pwr_pci_driver); diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index 1fa6925733d3..8db5079f09a7 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -51,6 +51,9 @@ obj-$(CONFIG_ACPI) += pci-acpi.o # SMBIOS provided firmware instance and labels obj-$(CONFIG_PCI_LABEL) += pci-label.o +# Intel MID platform PM support +obj-$(CONFIG_X86_INTEL_MID) += pci-mid.o + obj-$(CONFIG_PCI_SYSCALL) += syscall.o obj-$(CONFIG_PCI_STUB) += pci-stub.o diff --git a/drivers/pci/pci-mid.c b/drivers/pci/pci-mid.c new file mode 100644 index 000000000000..c878aa71173b --- /dev/null +++ b/drivers/pci/pci-mid.c @@ -0,0 +1,77 @@ +/* + * Intel MID platform PM support + * + * Copyright (C) 2016, Intel Corporation + * + * Author: Andy Shevchenko + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + */ + +#include +#include + +#include +#include +#include + +#include "pci.h" + +static bool mid_pci_power_manageable(struct pci_dev *dev) +{ + return true; +} + +static int mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state) +{ + return intel_mid_pci_set_power_state(pdev, state); +} + +static pci_power_t mid_pci_choose_state(struct pci_dev *pdev) +{ + return PCI_D3hot; +} + +static int mid_pci_sleep_wake(struct pci_dev *dev, bool enable) +{ + return 0; +} + +static int mid_pci_run_wake(struct pci_dev *dev, bool enable) +{ + return 0; +} + +static bool mid_pci_need_resume(struct pci_dev *dev) +{ + return false; +} + +static struct pci_platform_pm_ops mid_pci_platform_pm = { + .is_manageable = mid_pci_power_manageable, + .set_state = mid_pci_set_power_state, + .choose_state = mid_pci_choose_state, + .sleep_wake = mid_pci_sleep_wake, + .run_wake = mid_pci_run_wake, + .need_resume = mid_pci_need_resume, +}; + +#define ICPU(model) { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, } + +static const struct x86_cpu_id lpss_cpu_ids[] = { + ICPU(INTEL_FAM6_ATOM_MERRIFIELD1), + {} +}; + +static int __init mid_pci_init(void) +{ + const struct x86_cpu_id *id; + + id = x86_match_cpu(lpss_cpu_ids); + if (id) + pci_set_platform_pm(&mid_pci_platform_pm); + return 0; +} +arch_initcall(mid_pci_init); -- cgit v1.2.3 From 00688272157d83e48d1369d7d11c479571324e40 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 15 Jun 2016 12:48:53 +0300 Subject: x86/platform/intel-mid: Enable GPIO expanders on Edison Intel Edison board provides GPIO expanders connected to I2C bus. Add necessary file to get those enumerated. Signed-off-by: Andy Shevchenko Cc: Dan O'Donovan Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1465984133-41639-1-git-send-email-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/platform/intel-mid/device_libs/Makefile | 6 +- .../intel-mid/device_libs/platform_pcal9555a.c | 99 ++++++++++++++++++++++ 2 files changed, 103 insertions(+), 2 deletions(-) create mode 100644 arch/x86/platform/intel-mid/device_libs/platform_pcal9555a.c (limited to 'arch') diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile index 91ec9f8704bf..abe8ba87c970 100644 --- a/arch/x86/platform/intel-mid/device_libs/Makefile +++ b/arch/x86/platform/intel-mid/device_libs/Makefile @@ -11,11 +11,13 @@ obj-$(subst m,y,$(CONFIG_INTEL_MFLD_THERMAL)) += platform_msic_thermal.o # I2C Devices obj-$(subst m,y,$(CONFIG_SENSORS_EMC1403)) += platform_emc1403.o obj-$(subst m,y,$(CONFIG_SENSORS_LIS3LV02D)) += platform_lis331.o -obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_max7315.o obj-$(subst m,y,$(CONFIG_INPUT_MPU3050)) += platform_mpu3050.o obj-$(subst m,y,$(CONFIG_INPUT_BMA150)) += platform_bma023.o -obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_tca6416.o obj-$(subst m,y,$(CONFIG_DRM_MEDFIELD)) += platform_tc35876x.o +# I2C GPIO Expanders +obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_max7315.o +obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_pcal9555a.o +obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_tca6416.o # MISC Devices obj-$(subst m,y,$(CONFIG_KEYBOARD_GPIO)) += platform_gpio_keys.o obj-$(subst m,y,$(CONFIG_INTEL_MID_WATCHDOG)) += platform_wdt.o diff --git a/arch/x86/platform/intel-mid/device_libs/platform_pcal9555a.c b/arch/x86/platform/intel-mid/device_libs/platform_pcal9555a.c new file mode 100644 index 000000000000..429a94192671 --- /dev/null +++ b/arch/x86/platform/intel-mid/device_libs/platform_pcal9555a.c @@ -0,0 +1,99 @@ +/* + * PCAL9555a platform data initilization file + * + * Copyright (C) 2016, Intel Corporation + * + * Authors: Andy Shevchenko + * Dan O'Donovan + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include +#include +#include +#include +#include + +#include + +#define PCAL9555A_NUM 4 + +static struct pca953x_platform_data pcal9555a_pdata[PCAL9555A_NUM]; +static int nr; + +static void __init *pcal9555a_platform_data(void *info) +{ + struct i2c_board_info *i2c_info = info; + char *type = i2c_info->type; + struct pca953x_platform_data *pcal9555a; + char base_pin_name[SFI_NAME_LEN + 1]; + char intr_pin_name[SFI_NAME_LEN + 1]; + int gpio_base, intr; + + snprintf(base_pin_name, sizeof(base_pin_name), "%s_base", type); + snprintf(intr_pin_name, sizeof(intr_pin_name), "%s_int", type); + + gpio_base = get_gpio_by_name(base_pin_name); + intr = get_gpio_by_name(intr_pin_name); + + /* Check if the SFI record valid */ + if (gpio_base == -1) + return NULL; + + if (nr >= PCAL9555A_NUM) { + pr_err("%s: Too many instances, only %d supported\n", __func__, + PCAL9555A_NUM); + return NULL; + } + + pcal9555a = &pcal9555a_pdata[nr++]; + pcal9555a->gpio_base = gpio_base; + + if (intr >= 0) { + i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET; + pcal9555a->irq_base = gpio_base + INTEL_MID_IRQ_OFFSET; + } else { + i2c_info->irq = -1; + pcal9555a->irq_base = -1; + } + + strcpy(type, "pcal9555a"); + return pcal9555a; +} + +static const struct devs_id pcal9555a_1_dev_id __initconst = { + .name = "pcal9555a-1", + .type = SFI_DEV_TYPE_I2C, + .delay = 1, + .get_platform_data = &pcal9555a_platform_data, +}; + +static const struct devs_id pcal9555a_2_dev_id __initconst = { + .name = "pcal9555a-2", + .type = SFI_DEV_TYPE_I2C, + .delay = 1, + .get_platform_data = &pcal9555a_platform_data, +}; + +static const struct devs_id pcal9555a_3_dev_id __initconst = { + .name = "pcal9555a-3", + .type = SFI_DEV_TYPE_I2C, + .delay = 1, + .get_platform_data = &pcal9555a_platform_data, +}; + +static const struct devs_id pcal9555a_4_dev_id __initconst = { + .name = "pcal9555a-4", + .type = SFI_DEV_TYPE_I2C, + .delay = 1, + .get_platform_data = &pcal9555a_platform_data, +}; + +sfi_device(pcal9555a_1_dev_id); +sfi_device(pcal9555a_2_dev_id); +sfi_device(pcal9555a_3_dev_id); +sfi_device(pcal9555a_4_dev_id); -- cgit v1.2.3 From 5fc39d347267bd029fcc9099c70e2fe2d53130e9 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 15 Jun 2016 13:20:19 +0200 Subject: ARM: sunxi/dt: make the CHIP inherit from allwinner,sun5i-a13 The sun4i-timer driver registers its sched_clock only if the machine is compatible with "allwinner,sun5i-a13", "allwinner,sun5i-a10s" or "allwinner,sun4i-a10". Add the missing "allwinner,sun5i-a13" string to the machine compatible. Signed-off-by: Boris Brezillon Fixes: 465a225fb2af ("ARM: sun5i: Add C.H.I.P DTS") Cc: Signed-off-by: Maxime Ripard --- arch/arm/boot/dts/sun5i-r8-chip.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/sun5i-r8-chip.dts b/arch/arm/boot/dts/sun5i-r8-chip.dts index a8d8b4582397..f694482bdeb6 100644 --- a/arch/arm/boot/dts/sun5i-r8-chip.dts +++ b/arch/arm/boot/dts/sun5i-r8-chip.dts @@ -52,7 +52,7 @@ / { model = "NextThing C.H.I.P."; - compatible = "nextthing,chip", "allwinner,sun5i-r8"; + compatible = "nextthing,chip", "allwinner,sun5i-r8", "allwinner,sun5i-a13"; aliases { i2c0 = &i2c0; -- cgit v1.2.3 From 1f51dee7ca7424be6f84067395166f878dbdd8be Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:10 +0200 Subject: locking/atomic, arch/alpha: Implement atomic{,64}_fetch_{add,sub,and,andnot,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Ivan Kokshaysky Cc: Linus Torvalds Cc: Matt Turner Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Richard Henderson Cc: Thomas Gleixner Cc: linux-alpha@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/alpha/include/asm/atomic.h | 65 +++++++++++++++++++++++++++++++++++------ 1 file changed, 56 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h index 572b228c44c7..8243f17999e3 100644 --- a/arch/alpha/include/asm/atomic.h +++ b/arch/alpha/include/asm/atomic.h @@ -65,6 +65,25 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return result; \ } +#define ATOMIC_FETCH_OP(op, asm_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + long temp, result; \ + smp_mb(); \ + __asm__ __volatile__( \ + "1: ldl_l %2,%1\n" \ + " " #asm_op " %2,%3,%0\n" \ + " stl_c %0,%1\n" \ + " beq %0,2f\n" \ + ".subsection 2\n" \ + "2: br 1b\n" \ + ".previous" \ + :"=&r" (temp), "=m" (v->counter), "=&r" (result) \ + :"Ir" (i), "m" (v->counter) : "memory"); \ + smp_mb(); \ + return result; \ +} + #define ATOMIC64_OP(op, asm_op) \ static __inline__ void atomic64_##op(long i, atomic64_t * v) \ { \ @@ -101,11 +120,32 @@ static __inline__ long atomic64_##op##_return(long i, atomic64_t * v) \ return result; \ } +#define ATOMIC64_FETCH_OP(op, asm_op) \ +static __inline__ long atomic64_fetch_##op(long i, atomic64_t * v) \ +{ \ + long temp, result; \ + smp_mb(); \ + __asm__ __volatile__( \ + "1: ldq_l %2,%1\n" \ + " " #asm_op " %2,%3,%0\n" \ + " stq_c %0,%1\n" \ + " beq %0,2f\n" \ + ".subsection 2\n" \ + "2: br 1b\n" \ + ".previous" \ + :"=&r" (temp), "=m" (v->counter), "=&r" (result) \ + :"Ir" (i), "m" (v->counter) : "memory"); \ + smp_mb(); \ + return result; \ +} + #define ATOMIC_OPS(op) \ ATOMIC_OP(op, op##l) \ ATOMIC_OP_RETURN(op, op##l) \ + ATOMIC_FETCH_OP(op, op##l) \ ATOMIC64_OP(op, op##q) \ - ATOMIC64_OP_RETURN(op, op##q) + ATOMIC64_OP_RETURN(op, op##q) \ + ATOMIC64_FETCH_OP(op, op##q) ATOMIC_OPS(add) ATOMIC_OPS(sub) @@ -113,18 +153,25 @@ ATOMIC_OPS(sub) #define atomic_andnot atomic_andnot #define atomic64_andnot atomic64_andnot -ATOMIC_OP(and, and) -ATOMIC_OP(andnot, bic) -ATOMIC_OP(or, bis) -ATOMIC_OP(xor, xor) -ATOMIC64_OP(and, and) -ATOMIC64_OP(andnot, bic) -ATOMIC64_OP(or, bis) -ATOMIC64_OP(xor, xor) +#define atomic_fetch_or atomic_fetch_or + +#undef ATOMIC_OPS +#define ATOMIC_OPS(op, asm) \ + ATOMIC_OP(op, asm) \ + ATOMIC_FETCH_OP(op, asm) \ + ATOMIC64_OP(op, asm) \ + ATOMIC64_FETCH_OP(op, asm) + +ATOMIC_OPS(and, and) +ATOMIC_OPS(andnot, bic) +ATOMIC_OPS(or, bis) +ATOMIC_OPS(xor, xor) #undef ATOMIC_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP -- cgit v1.2.3 From fbffe892e5253dd02c016c59a9d792eafe9d53e1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:09 +0200 Subject: locking/atomic, arch/arc: Implement atomic_fetch_{add,sub,and,andnot,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Acked-by: Vineet Gupta Cc: Andrew Morton Cc: Linus Torvalds Cc: Noam Camus Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-snps-arc@lists.infradead.org Signed-off-by: Ingo Molnar --- arch/arc/include/asm/atomic.h | 103 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 94 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index dd683995bc9d..c066a21caaaf 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -67,6 +67,37 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return val; \ } +#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned int val, orig; \ + SCOND_FAIL_RETRY_VAR_DEF \ + \ + /* \ + * Explicit full memory barrier needed before/after as \ + * LLOCK/SCOND thmeselves don't provide any such semantics \ + */ \ + smp_mb(); \ + \ + __asm__ __volatile__( \ + "1: llock %[orig], [%[ctr]] \n" \ + " " #asm_op " %[val], %[orig], %[i] \n" \ + " scond %[val], [%[ctr]] \n" \ + " \n" \ + SCOND_FAIL_RETRY_ASM \ + \ + : [val] "=&r" (val), \ + [orig] "=&r" (orig) \ + SCOND_FAIL_RETRY_VARS \ + : [ctr] "r" (&v->counter), \ + [i] "ir" (i) \ + : "cc"); \ + \ + smp_mb(); \ + \ + return orig; \ +} + #else /* !CONFIG_ARC_HAS_LLSC */ #ifndef CONFIG_SMP @@ -129,21 +160,46 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return temp; \ } +#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned long flags; \ + unsigned long orig; \ + \ + /* \ + * spin lock/unlock provides the needed smp_mb() before/after \ + */ \ + atomic_ops_lock(flags); \ + orig = v->counter; \ + v->counter c_op i; \ + atomic_ops_unlock(flags); \ + \ + return orig; \ +} + #endif /* !CONFIG_ARC_HAS_LLSC */ #define ATOMIC_OPS(op, c_op, asm_op) \ ATOMIC_OP(op, c_op, asm_op) \ - ATOMIC_OP_RETURN(op, c_op, asm_op) + ATOMIC_OP_RETURN(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) ATOMIC_OPS(add, +=, add) ATOMIC_OPS(sub, -=, sub) #define atomic_andnot atomic_andnot -ATOMIC_OP(and, &=, and) -ATOMIC_OP(andnot, &= ~, bic) -ATOMIC_OP(or, |=, or) -ATOMIC_OP(xor, ^=, xor) +#define atomic_fetch_or atomic_fetch_or + +#undef ATOMIC_OPS +#define ATOMIC_OPS(op, c_op, asm_op) \ + ATOMIC_OP(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) + +ATOMIC_OPS(and, &=, and) +ATOMIC_OPS(andnot, &= ~, bic) +ATOMIC_OPS(or, |=, or) +ATOMIC_OPS(xor, ^=, xor) #undef SCOND_FAIL_RETRY_VAR_DEF #undef SCOND_FAIL_RETRY_ASM @@ -208,22 +264,51 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return temp; \ } +#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned int temp = i; \ + \ + /* Explicit full memory barrier needed before/after */ \ + smp_mb(); \ + \ + __asm__ __volatile__( \ + " mov r2, %0\n" \ + " mov r3, %1\n" \ + " .word %2\n" \ + " mov %0, r2" \ + : "+r"(temp) \ + : "r"(&v->counter), "i"(asm_op) \ + : "r2", "r3", "memory"); \ + \ + smp_mb(); \ + \ + return temp; \ +} + #define ATOMIC_OPS(op, c_op, asm_op) \ ATOMIC_OP(op, c_op, asm_op) \ - ATOMIC_OP_RETURN(op, c_op, asm_op) + ATOMIC_OP_RETURN(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) ATOMIC_OPS(add, +=, CTOP_INST_AADD_DI_R2_R2_R3) #define atomic_sub(i, v) atomic_add(-(i), (v)) #define atomic_sub_return(i, v) atomic_add_return(-(i), (v)) -ATOMIC_OP(and, &=, CTOP_INST_AAND_DI_R2_R2_R3) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op, c_op, asm_op) \ + ATOMIC_OP(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) + +ATOMIC_OPS(and, &=, CTOP_INST_AAND_DI_R2_R2_R3) #define atomic_andnot(mask, v) atomic_and(~(mask), (v)) -ATOMIC_OP(or, |=, CTOP_INST_AOR_DI_R2_R2_R3) -ATOMIC_OP(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3) +ATOMIC_OPS(or, |=, CTOP_INST_AOR_DI_R2_R2_R3) +ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3) #endif /* CONFIG_ARC_PLAT_EZNPS */ #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP -- cgit v1.2.3 From 6da068c1beba684b2a0dbf43a07b0529edd9e959 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:10:52 +0200 Subject: locking/atomic, arch/arm: Implement atomic{,64}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Acked-by: Will Deacon Cc: Andrew Morton Cc: Davidlohr Bueso Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Russell King Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/arm/include/asm/atomic.h | 108 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 98 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index 9e10c4567eb4..0feb110ec542 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h @@ -77,8 +77,36 @@ static inline int atomic_##op##_return_relaxed(int i, atomic_t *v) \ return result; \ } +#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ +static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v) \ +{ \ + unsigned long tmp; \ + int result, val; \ + \ + prefetchw(&v->counter); \ + \ + __asm__ __volatile__("@ atomic_fetch_" #op "\n" \ +"1: ldrex %0, [%4]\n" \ +" " #asm_op " %1, %0, %5\n" \ +" strex %2, %1, [%4]\n" \ +" teq %2, #0\n" \ +" bne 1b" \ + : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Qo" (v->counter) \ + : "r" (&v->counter), "Ir" (i) \ + : "cc"); \ + \ + return result; \ +} + #define atomic_add_return_relaxed atomic_add_return_relaxed #define atomic_sub_return_relaxed atomic_sub_return_relaxed +#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed +#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed + +#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed +#define atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed +#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed +#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed static inline int atomic_cmpxchg_relaxed(atomic_t *ptr, int old, int new) { @@ -159,6 +187,22 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return val; \ } +#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned long flags; \ + int val; \ + \ + raw_local_irq_save(flags); \ + val = v->counter; \ + v->counter c_op i; \ + raw_local_irq_restore(flags); \ + \ + return val; \ +} + +#define atomic_fetch_or atomic_fetch_or + static inline int atomic_cmpxchg(atomic_t *v, int old, int new) { int ret; @@ -187,19 +231,26 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) #define ATOMIC_OPS(op, c_op, asm_op) \ ATOMIC_OP(op, c_op, asm_op) \ - ATOMIC_OP_RETURN(op, c_op, asm_op) + ATOMIC_OP_RETURN(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) ATOMIC_OPS(add, +=, add) ATOMIC_OPS(sub, -=, sub) #define atomic_andnot atomic_andnot -ATOMIC_OP(and, &=, and) -ATOMIC_OP(andnot, &= ~, bic) -ATOMIC_OP(or, |=, orr) -ATOMIC_OP(xor, ^=, eor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op, c_op, asm_op) \ + ATOMIC_OP(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) + +ATOMIC_OPS(and, &=, and) +ATOMIC_OPS(andnot, &= ~, bic) +ATOMIC_OPS(or, |=, orr) +ATOMIC_OPS(xor, ^=, eor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP @@ -317,24 +368,61 @@ atomic64_##op##_return_relaxed(long long i, atomic64_t *v) \ return result; \ } +#define ATOMIC64_FETCH_OP(op, op1, op2) \ +static inline long long \ +atomic64_fetch_##op##_relaxed(long long i, atomic64_t *v) \ +{ \ + long long result, val; \ + unsigned long tmp; \ + \ + prefetchw(&v->counter); \ + \ + __asm__ __volatile__("@ atomic64_fetch_" #op "\n" \ +"1: ldrexd %0, %H0, [%4]\n" \ +" " #op1 " %Q1, %Q0, %Q5\n" \ +" " #op2 " %R1, %R0, %R5\n" \ +" strexd %2, %1, %H1, [%4]\n" \ +" teq %2, #0\n" \ +" bne 1b" \ + : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Qo" (v->counter) \ + : "r" (&v->counter), "r" (i) \ + : "cc"); \ + \ + return result; \ +} + #define ATOMIC64_OPS(op, op1, op2) \ ATOMIC64_OP(op, op1, op2) \ - ATOMIC64_OP_RETURN(op, op1, op2) + ATOMIC64_OP_RETURN(op, op1, op2) \ + ATOMIC64_FETCH_OP(op, op1, op2) ATOMIC64_OPS(add, adds, adc) ATOMIC64_OPS(sub, subs, sbc) #define atomic64_add_return_relaxed atomic64_add_return_relaxed #define atomic64_sub_return_relaxed atomic64_sub_return_relaxed +#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed +#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed + +#undef ATOMIC64_OPS +#define ATOMIC64_OPS(op, op1, op2) \ + ATOMIC64_OP(op, op1, op2) \ + ATOMIC64_FETCH_OP(op, op1, op2) #define atomic64_andnot atomic64_andnot -ATOMIC64_OP(and, and, and) -ATOMIC64_OP(andnot, bic, bic) -ATOMIC64_OP(or, orr, orr) -ATOMIC64_OP(xor, eor, eor) +ATOMIC64_OPS(and, and, and) +ATOMIC64_OPS(andnot, bic, bic) +ATOMIC64_OPS(or, orr, orr) +ATOMIC64_OPS(xor, eor, eor) + +#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed +#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed +#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed +#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed #undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP -- cgit v1.2.3 From e490f9b1d3b40ba32ad07432b63b813ce3052d41 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:09 +0200 Subject: locking/atomic, arch/arm64: Implement atomic{,64}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). [wildea01: compile fixes for ll/sc] Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Catalin Marinas Cc: Linus Torvalds Cc: Lorenzo Pieralisi Cc: Mark Rutland Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Steve Capper Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/arm64/include/asm/atomic.h | 62 +++++++++++++++++++ arch/arm64/include/asm/atomic_ll_sc.h | 110 ++++++++++++++++++++++++++-------- 2 files changed, 148 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index f3a3586a421c..3128c3d7c1ff 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -76,6 +76,36 @@ #define atomic_dec_return_release(v) atomic_sub_return_release(1, (v)) #define atomic_dec_return(v) atomic_sub_return(1, (v)) +#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed +#define atomic_fetch_add_acquire atomic_fetch_add_acquire +#define atomic_fetch_add_release atomic_fetch_add_release +#define atomic_fetch_add atomic_fetch_add + +#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed +#define atomic_fetch_sub_acquire atomic_fetch_sub_acquire +#define atomic_fetch_sub_release atomic_fetch_sub_release +#define atomic_fetch_sub atomic_fetch_sub + +#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed +#define atomic_fetch_and_acquire atomic_fetch_and_acquire +#define atomic_fetch_and_release atomic_fetch_and_release +#define atomic_fetch_and atomic_fetch_and + +#define atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed +#define atomic_fetch_andnot_acquire atomic_fetch_andnot_acquire +#define atomic_fetch_andnot_release atomic_fetch_andnot_release +#define atomic_fetch_andnot atomic_fetch_andnot + +#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed +#define atomic_fetch_or_acquire atomic_fetch_or_acquire +#define atomic_fetch_or_release atomic_fetch_or_release +#define atomic_fetch_or atomic_fetch_or + +#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed +#define atomic_fetch_xor_acquire atomic_fetch_xor_acquire +#define atomic_fetch_xor_release atomic_fetch_xor_release +#define atomic_fetch_xor atomic_fetch_xor + #define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new)) #define atomic_xchg_acquire(v, new) xchg_acquire(&((v)->counter), (new)) #define atomic_xchg_release(v, new) xchg_release(&((v)->counter), (new)) @@ -98,6 +128,8 @@ #define __atomic_add_unless(v, a, u) ___atomic_add_unless(v, a, u,) #define atomic_andnot atomic_andnot +#define atomic_fetch_or atomic_fetch_or + /* * 64-bit atomic operations. */ @@ -125,6 +157,36 @@ #define atomic64_dec_return_release(v) atomic64_sub_return_release(1, (v)) #define atomic64_dec_return(v) atomic64_sub_return(1, (v)) +#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed +#define atomic64_fetch_add_acquire atomic64_fetch_add_acquire +#define atomic64_fetch_add_release atomic64_fetch_add_release +#define atomic64_fetch_add atomic64_fetch_add + +#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed +#define atomic64_fetch_sub_acquire atomic64_fetch_sub_acquire +#define atomic64_fetch_sub_release atomic64_fetch_sub_release +#define atomic64_fetch_sub atomic64_fetch_sub + +#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed +#define atomic64_fetch_and_acquire atomic64_fetch_and_acquire +#define atomic64_fetch_and_release atomic64_fetch_and_release +#define atomic64_fetch_and atomic64_fetch_and + +#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed +#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot_acquire +#define atomic64_fetch_andnot_release atomic64_fetch_andnot_release +#define atomic64_fetch_andnot atomic64_fetch_andnot + +#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed +#define atomic64_fetch_or_acquire atomic64_fetch_or_acquire +#define atomic64_fetch_or_release atomic64_fetch_or_release +#define atomic64_fetch_or atomic64_fetch_or + +#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed +#define atomic64_fetch_xor_acquire atomic64_fetch_xor_acquire +#define atomic64_fetch_xor_release atomic64_fetch_xor_release +#define atomic64_fetch_xor atomic64_fetch_xor + #define atomic64_xchg_relaxed atomic_xchg_relaxed #define atomic64_xchg_acquire atomic_xchg_acquire #define atomic64_xchg_release atomic_xchg_release diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index f61c84f6ba02..f819fdcff1ac 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -77,26 +77,57 @@ __LL_SC_PREFIX(atomic_##op##_return##name(int i, atomic_t *v)) \ } \ __LL_SC_EXPORT(atomic_##op##_return##name); +#define ATOMIC_FETCH_OP(name, mb, acq, rel, cl, op, asm_op) \ +__LL_SC_INLINE int \ +__LL_SC_PREFIX(atomic_fetch_##op##name(int i, atomic_t *v)) \ +{ \ + unsigned long tmp; \ + int val, result; \ + \ + asm volatile("// atomic_fetch_" #op #name "\n" \ +" prfm pstl1strm, %3\n" \ +"1: ld" #acq "xr %w0, %3\n" \ +" " #asm_op " %w1, %w0, %w4\n" \ +" st" #rel "xr %w2, %w1, %3\n" \ +" cbnz %w2, 1b\n" \ +" " #mb \ + : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \ + : "Ir" (i) \ + : cl); \ + \ + return result; \ +} \ +__LL_SC_EXPORT(atomic_fetch_##op##name); + #define ATOMIC_OPS(...) \ ATOMIC_OP(__VA_ARGS__) \ - ATOMIC_OP_RETURN( , dmb ish, , l, "memory", __VA_ARGS__) - -#define ATOMIC_OPS_RLX(...) \ - ATOMIC_OPS(__VA_ARGS__) \ + ATOMIC_OP_RETURN( , dmb ish, , l, "memory", __VA_ARGS__)\ ATOMIC_OP_RETURN(_relaxed, , , , , __VA_ARGS__)\ ATOMIC_OP_RETURN(_acquire, , a, , "memory", __VA_ARGS__)\ - ATOMIC_OP_RETURN(_release, , , l, "memory", __VA_ARGS__) + ATOMIC_OP_RETURN(_release, , , l, "memory", __VA_ARGS__)\ + ATOMIC_FETCH_OP ( , dmb ish, , l, "memory", __VA_ARGS__)\ + ATOMIC_FETCH_OP (_relaxed, , , , , __VA_ARGS__)\ + ATOMIC_FETCH_OP (_acquire, , a, , "memory", __VA_ARGS__)\ + ATOMIC_FETCH_OP (_release, , , l, "memory", __VA_ARGS__) -ATOMIC_OPS_RLX(add, add) -ATOMIC_OPS_RLX(sub, sub) +ATOMIC_OPS(add, add) +ATOMIC_OPS(sub, sub) + +#undef ATOMIC_OPS +#define ATOMIC_OPS(...) \ + ATOMIC_OP(__VA_ARGS__) \ + ATOMIC_FETCH_OP ( , dmb ish, , l, "memory", __VA_ARGS__)\ + ATOMIC_FETCH_OP (_relaxed, , , , , __VA_ARGS__)\ + ATOMIC_FETCH_OP (_acquire, , a, , "memory", __VA_ARGS__)\ + ATOMIC_FETCH_OP (_release, , , l, "memory", __VA_ARGS__) -ATOMIC_OP(and, and) -ATOMIC_OP(andnot, bic) -ATOMIC_OP(or, orr) -ATOMIC_OP(xor, eor) +ATOMIC_OPS(and, and) +ATOMIC_OPS(andnot, bic) +ATOMIC_OPS(or, orr) +ATOMIC_OPS(xor, eor) -#undef ATOMIC_OPS_RLX #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP @@ -140,26 +171,57 @@ __LL_SC_PREFIX(atomic64_##op##_return##name(long i, atomic64_t *v)) \ } \ __LL_SC_EXPORT(atomic64_##op##_return##name); +#define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op) \ +__LL_SC_INLINE long \ +__LL_SC_PREFIX(atomic64_fetch_##op##name(long i, atomic64_t *v)) \ +{ \ + long result, val; \ + unsigned long tmp; \ + \ + asm volatile("// atomic64_fetch_" #op #name "\n" \ +" prfm pstl1strm, %3\n" \ +"1: ld" #acq "xr %0, %3\n" \ +" " #asm_op " %1, %0, %4\n" \ +" st" #rel "xr %w2, %1, %3\n" \ +" cbnz %w2, 1b\n" \ +" " #mb \ + : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \ + : "Ir" (i) \ + : cl); \ + \ + return result; \ +} \ +__LL_SC_EXPORT(atomic64_fetch_##op##name); + #define ATOMIC64_OPS(...) \ ATOMIC64_OP(__VA_ARGS__) \ - ATOMIC64_OP_RETURN(, dmb ish, , l, "memory", __VA_ARGS__) - -#define ATOMIC64_OPS_RLX(...) \ - ATOMIC64_OPS(__VA_ARGS__) \ + ATOMIC64_OP_RETURN(, dmb ish, , l, "memory", __VA_ARGS__) \ ATOMIC64_OP_RETURN(_relaxed,, , , , __VA_ARGS__) \ ATOMIC64_OP_RETURN(_acquire,, a, , "memory", __VA_ARGS__) \ - ATOMIC64_OP_RETURN(_release,, , l, "memory", __VA_ARGS__) + ATOMIC64_OP_RETURN(_release,, , l, "memory", __VA_ARGS__) \ + ATOMIC64_FETCH_OP (, dmb ish, , l, "memory", __VA_ARGS__) \ + ATOMIC64_FETCH_OP (_relaxed,, , , , __VA_ARGS__) \ + ATOMIC64_FETCH_OP (_acquire,, a, , "memory", __VA_ARGS__) \ + ATOMIC64_FETCH_OP (_release,, , l, "memory", __VA_ARGS__) -ATOMIC64_OPS_RLX(add, add) -ATOMIC64_OPS_RLX(sub, sub) +ATOMIC64_OPS(add, add) +ATOMIC64_OPS(sub, sub) + +#undef ATOMIC64_OPS +#define ATOMIC64_OPS(...) \ + ATOMIC64_OP(__VA_ARGS__) \ + ATOMIC64_FETCH_OP (, dmb ish, , l, "memory", __VA_ARGS__) \ + ATOMIC64_FETCH_OP (_relaxed,, , , , __VA_ARGS__) \ + ATOMIC64_FETCH_OP (_acquire,, a, , "memory", __VA_ARGS__) \ + ATOMIC64_FETCH_OP (_release,, , l, "memory", __VA_ARGS__) -ATOMIC64_OP(and, and) -ATOMIC64_OP(andnot, bic) -ATOMIC64_OP(or, orr) -ATOMIC64_OP(xor, eor) +ATOMIC64_OPS(and, and) +ATOMIC64_OPS(andnot, bic) +ATOMIC64_OPS(or, orr) +ATOMIC64_OPS(xor, eor) -#undef ATOMIC64_OPS_RLX #undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP -- cgit v1.2.3 From 6822a84dd4e35a1beb70028e46b5f60c14fc422d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 22 Apr 2016 18:01:32 +0100 Subject: locking/atomic, arch/arm64: Generate LSE non-return cases using common macros atomic[64]_{add,and,andnot,or,xor} all follow the same patterns, so generate them using macros, like we do for the LL/SC case already. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Linus Torvalds Cc: Lorenzo Pieralisi Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Steve Capper Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/1461344493-8262-1-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar --- arch/arm64/include/asm/atomic_lse.h | 122 ++++++++++-------------------------- 1 file changed, 32 insertions(+), 90 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 39c1d340fec5..37a0f03560f7 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -26,54 +26,25 @@ #endif #define __LL_SC_ATOMIC(op) __LL_SC_CALL(atomic_##op) - -static inline void atomic_andnot(int i, atomic_t *v) -{ - register int w0 asm ("w0") = i; - register atomic_t *x1 asm ("x1") = v; - - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(andnot), - " stclr %w[i], %[v]\n") - : [i] "+r" (w0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); -} - -static inline void atomic_or(int i, atomic_t *v) -{ - register int w0 asm ("w0") = i; - register atomic_t *x1 asm ("x1") = v; - - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(or), - " stset %w[i], %[v]\n") - : [i] "+r" (w0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); -} - -static inline void atomic_xor(int i, atomic_t *v) -{ - register int w0 asm ("w0") = i; - register atomic_t *x1 asm ("x1") = v; - - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(xor), - " steor %w[i], %[v]\n") - : [i] "+r" (w0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); +#define ATOMIC_OP(op, asm_op) \ +static inline void atomic_##op(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(op), \ +" " #asm_op " %w[i], %[v]\n") \ + : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS); \ } -static inline void atomic_add(int i, atomic_t *v) -{ - register int w0 asm ("w0") = i; - register atomic_t *x1 asm ("x1") = v; +ATOMIC_OP(andnot, stclr) +ATOMIC_OP(or, stset) +ATOMIC_OP(xor, steor) +ATOMIC_OP(add, stadd) - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(add), - " stadd %w[i], %[v]\n") - : [i] "+r" (w0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); -} +#undef ATOMIC_OP #define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \ static inline int atomic_add_return##name(int i, atomic_t *v) \ @@ -167,54 +138,25 @@ ATOMIC_OP_SUB_RETURN( , al, "memory") #undef __LL_SC_ATOMIC #define __LL_SC_ATOMIC64(op) __LL_SC_CALL(atomic64_##op) - -static inline void atomic64_andnot(long i, atomic64_t *v) -{ - register long x0 asm ("x0") = i; - register atomic64_t *x1 asm ("x1") = v; - - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(andnot), - " stclr %[i], %[v]\n") - : [i] "+r" (x0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); -} - -static inline void atomic64_or(long i, atomic64_t *v) -{ - register long x0 asm ("x0") = i; - register atomic64_t *x1 asm ("x1") = v; - - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(or), - " stset %[i], %[v]\n") - : [i] "+r" (x0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); -} - -static inline void atomic64_xor(long i, atomic64_t *v) -{ - register long x0 asm ("x0") = i; - register atomic64_t *x1 asm ("x1") = v; - - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(xor), - " steor %[i], %[v]\n") - : [i] "+r" (x0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); +#define ATOMIC64_OP(op, asm_op) \ +static inline void atomic64_##op(long i, atomic64_t *v) \ +{ \ + register long x0 asm ("x0") = i; \ + register atomic64_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(op), \ +" " #asm_op " %[i], %[v]\n") \ + : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS); \ } -static inline void atomic64_add(long i, atomic64_t *v) -{ - register long x0 asm ("x0") = i; - register atomic64_t *x1 asm ("x1") = v; +ATOMIC64_OP(andnot, stclr) +ATOMIC64_OP(or, stset) +ATOMIC64_OP(xor, steor) +ATOMIC64_OP(add, stadd) - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(add), - " stadd %[i], %[v]\n") - : [i] "+r" (x0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); -} +#undef ATOMIC64_OP #define ATOMIC64_OP_ADD_RETURN(name, mb, cl...) \ static inline long atomic64_add_return##name(long i, atomic64_t *v) \ -- cgit v1.2.3 From 2efe95fe695270ae1a225805f016303505972d86 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 22 Apr 2016 18:01:33 +0100 Subject: locking/atomic, arch/arm64: Implement atomic{,64}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}() for LSE instructions Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). This patch implements the LSE variants. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Linus Torvalds Cc: Lorenzo Pieralisi Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Steve Capper Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/1461344493-8262-2-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar --- arch/arm64/include/asm/atomic_lse.h | 172 ++++++++++++++++++++++++++++++++++++ 1 file changed, 172 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 37a0f03560f7..b5890be8f257 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -46,6 +46,38 @@ ATOMIC_OP(add, stadd) #undef ATOMIC_OP +#define ATOMIC_FETCH_OP(name, mb, op, asm_op, cl...) \ +static inline int atomic_fetch_##op##name(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + __LL_SC_ATOMIC(fetch_##op##name), \ + /* LSE atomics */ \ +" " #asm_op #mb " %w[i], %w[i], %[v]") \ + : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS, ##cl); \ + \ + return w0; \ +} + +#define ATOMIC_FETCH_OPS(op, asm_op) \ + ATOMIC_FETCH_OP(_relaxed, , op, asm_op) \ + ATOMIC_FETCH_OP(_acquire, a, op, asm_op, "memory") \ + ATOMIC_FETCH_OP(_release, l, op, asm_op, "memory") \ + ATOMIC_FETCH_OP( , al, op, asm_op, "memory") + +ATOMIC_FETCH_OPS(andnot, ldclr) +ATOMIC_FETCH_OPS(or, ldset) +ATOMIC_FETCH_OPS(xor, ldeor) +ATOMIC_FETCH_OPS(add, ldadd) + +#undef ATOMIC_FETCH_OP +#undef ATOMIC_FETCH_OPS + #define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \ static inline int atomic_add_return##name(int i, atomic_t *v) \ { \ @@ -90,6 +122,33 @@ static inline void atomic_and(int i, atomic_t *v) : __LL_SC_CLOBBERS); } +#define ATOMIC_FETCH_OP_AND(name, mb, cl...) \ +static inline int atomic_fetch_and##name(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + __LL_SC_ATOMIC(fetch_and##name), \ + /* LSE atomics */ \ + " mvn %w[i], %w[i]\n" \ + " ldclr" #mb " %w[i], %w[i], %[v]") \ + : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS, ##cl); \ + \ + return w0; \ +} + +ATOMIC_FETCH_OP_AND(_relaxed, ) +ATOMIC_FETCH_OP_AND(_acquire, a, "memory") +ATOMIC_FETCH_OP_AND(_release, l, "memory") +ATOMIC_FETCH_OP_AND( , al, "memory") + +#undef ATOMIC_FETCH_OP_AND + static inline void atomic_sub(int i, atomic_t *v) { register int w0 asm ("w0") = i; @@ -135,6 +194,33 @@ ATOMIC_OP_SUB_RETURN(_release, l, "memory") ATOMIC_OP_SUB_RETURN( , al, "memory") #undef ATOMIC_OP_SUB_RETURN + +#define ATOMIC_FETCH_OP_SUB(name, mb, cl...) \ +static inline int atomic_fetch_sub##name(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + __LL_SC_ATOMIC(fetch_sub##name), \ + /* LSE atomics */ \ + " neg %w[i], %w[i]\n" \ + " ldadd" #mb " %w[i], %w[i], %[v]") \ + : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS, ##cl); \ + \ + return w0; \ +} + +ATOMIC_FETCH_OP_SUB(_relaxed, ) +ATOMIC_FETCH_OP_SUB(_acquire, a, "memory") +ATOMIC_FETCH_OP_SUB(_release, l, "memory") +ATOMIC_FETCH_OP_SUB( , al, "memory") + +#undef ATOMIC_FETCH_OP_SUB #undef __LL_SC_ATOMIC #define __LL_SC_ATOMIC64(op) __LL_SC_CALL(atomic64_##op) @@ -158,6 +244,38 @@ ATOMIC64_OP(add, stadd) #undef ATOMIC64_OP +#define ATOMIC64_FETCH_OP(name, mb, op, asm_op, cl...) \ +static inline long atomic64_fetch_##op##name(long i, atomic64_t *v) \ +{ \ + register long x0 asm ("x0") = i; \ + register atomic64_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + __LL_SC_ATOMIC64(fetch_##op##name), \ + /* LSE atomics */ \ +" " #asm_op #mb " %[i], %[i], %[v]") \ + : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS, ##cl); \ + \ + return x0; \ +} + +#define ATOMIC64_FETCH_OPS(op, asm_op) \ + ATOMIC64_FETCH_OP(_relaxed, , op, asm_op) \ + ATOMIC64_FETCH_OP(_acquire, a, op, asm_op, "memory") \ + ATOMIC64_FETCH_OP(_release, l, op, asm_op, "memory") \ + ATOMIC64_FETCH_OP( , al, op, asm_op, "memory") + +ATOMIC64_FETCH_OPS(andnot, ldclr) +ATOMIC64_FETCH_OPS(or, ldset) +ATOMIC64_FETCH_OPS(xor, ldeor) +ATOMIC64_FETCH_OPS(add, ldadd) + +#undef ATOMIC64_FETCH_OP +#undef ATOMIC64_FETCH_OPS + #define ATOMIC64_OP_ADD_RETURN(name, mb, cl...) \ static inline long atomic64_add_return##name(long i, atomic64_t *v) \ { \ @@ -202,6 +320,33 @@ static inline void atomic64_and(long i, atomic64_t *v) : __LL_SC_CLOBBERS); } +#define ATOMIC64_FETCH_OP_AND(name, mb, cl...) \ +static inline long atomic64_fetch_and##name(long i, atomic64_t *v) \ +{ \ + register long x0 asm ("w0") = i; \ + register atomic64_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + __LL_SC_ATOMIC64(fetch_and##name), \ + /* LSE atomics */ \ + " mvn %[i], %[i]\n" \ + " ldclr" #mb " %[i], %[i], %[v]") \ + : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS, ##cl); \ + \ + return x0; \ +} + +ATOMIC64_FETCH_OP_AND(_relaxed, ) +ATOMIC64_FETCH_OP_AND(_acquire, a, "memory") +ATOMIC64_FETCH_OP_AND(_release, l, "memory") +ATOMIC64_FETCH_OP_AND( , al, "memory") + +#undef ATOMIC64_FETCH_OP_AND + static inline void atomic64_sub(long i, atomic64_t *v) { register long x0 asm ("x0") = i; @@ -248,6 +393,33 @@ ATOMIC64_OP_SUB_RETURN( , al, "memory") #undef ATOMIC64_OP_SUB_RETURN +#define ATOMIC64_FETCH_OP_SUB(name, mb, cl...) \ +static inline long atomic64_fetch_sub##name(long i, atomic64_t *v) \ +{ \ + register long x0 asm ("w0") = i; \ + register atomic64_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + __LL_SC_ATOMIC64(fetch_sub##name), \ + /* LSE atomics */ \ + " neg %[i], %[i]\n" \ + " ldadd" #mb " %[i], %[i], %[v]") \ + : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS, ##cl); \ + \ + return x0; \ +} + +ATOMIC64_FETCH_OP_SUB(_relaxed, ) +ATOMIC64_FETCH_OP_SUB(_acquire, a, "memory") +ATOMIC64_FETCH_OP_SUB(_release, l, "memory") +ATOMIC64_FETCH_OP_SUB( , al, "memory") + +#undef ATOMIC64_FETCH_OP_SUB + static inline long atomic64_dec_if_positive(atomic64_t *v) { register long x0 asm ("x0") = (long)v; -- cgit v1.2.3 From 1a6eafacd4811cdc1b138faee858527658eee4e1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:08 +0200 Subject: locking/atomic, arch/avr32: Implement atomic_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Acked-by: Hans-Christian Noren Egtvedt Cc: Andrew Morton Cc: Haavard Skinnemoen Cc: Hans-Christian Egtvedt Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/avr32/include/asm/atomic.h | 56 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/avr32/include/asm/atomic.h b/arch/avr32/include/asm/atomic.h index d74fd8ce980a..b8681fd495ef 100644 --- a/arch/avr32/include/asm/atomic.h +++ b/arch/avr32/include/asm/atomic.h @@ -41,21 +41,51 @@ static inline int __atomic_##op##_return(int i, atomic_t *v) \ return result; \ } +#define ATOMIC_FETCH_OP(op, asm_op, asm_con) \ +static inline int __atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int result, val; \ + \ + asm volatile( \ + "/* atomic_fetch_" #op " */\n" \ + "1: ssrf 5\n" \ + " ld.w %0, %3\n" \ + " mov %1, %0\n" \ + " " #asm_op " %1, %4\n" \ + " stcond %2, %1\n" \ + " brne 1b" \ + : "=&r" (result), "=&r" (val), "=o" (v->counter) \ + : "m" (v->counter), #asm_con (i) \ + : "cc"); \ + \ + return result; \ +} + ATOMIC_OP_RETURN(sub, sub, rKs21) ATOMIC_OP_RETURN(add, add, r) +ATOMIC_FETCH_OP (sub, sub, rKs21) +ATOMIC_FETCH_OP (add, add, r) + +#define atomic_fetch_or atomic_fetch_or -#define ATOMIC_OP(op, asm_op) \ +#define ATOMIC_OPS(op, asm_op) \ ATOMIC_OP_RETURN(op, asm_op, r) \ static inline void atomic_##op(int i, atomic_t *v) \ { \ (void)__atomic_##op##_return(i, v); \ +} \ +ATOMIC_FETCH_OP(op, asm_op, r) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + return __atomic_fetch_##op(i, v); \ } -ATOMIC_OP(and, and) -ATOMIC_OP(or, or) -ATOMIC_OP(xor, eor) +ATOMIC_OPS(and, and) +ATOMIC_OPS(or, or) +ATOMIC_OPS(xor, eor) -#undef ATOMIC_OP +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN /* @@ -87,6 +117,14 @@ static inline int atomic_add_return(int i, atomic_t *v) return __atomic_add_return(i, v); } +static inline int atomic_fetch_add(int i, atomic_t *v) +{ + if (IS_21BIT_CONST(i)) + return __atomic_fetch_sub(-i, v); + + return __atomic_fetch_add(i, v); +} + /* * atomic_sub_return - subtract the atomic variable * @i: integer value to subtract @@ -102,6 +140,14 @@ static inline int atomic_sub_return(int i, atomic_t *v) return __atomic_add_return(-i, v); } +static inline int atomic_fetch_sub(int i, atomic_t *v) +{ + if (IS_21BIT_CONST(i)) + return __atomic_fetch_sub(i, v); + + return __atomic_fetch_add(-i, v); +} + /* * __atomic_add_unless - add unless the number is a given value * @v: pointer of type atomic_t -- cgit v1.2.3 From e87fc0ec070554e34812be68267a9450271868d6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:08 +0200 Subject: locking/atomic, arch/blackfin: Implement atomic_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Steven Miao Cc: Thomas Gleixner Cc: adi-buildroot-devel@lists.sourceforge.net Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/blackfin/include/asm/atomic.h | 8 +++++++ arch/blackfin/kernel/bfin_ksyms.c | 1 + arch/blackfin/mach-bf561/atomic.S | 43 +++++++++++++++++++++++++++----------- 3 files changed, 40 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/blackfin/include/asm/atomic.h b/arch/blackfin/include/asm/atomic.h index 1c1c42330c99..63c7deceeeb6 100644 --- a/arch/blackfin/include/asm/atomic.h +++ b/arch/blackfin/include/asm/atomic.h @@ -17,6 +17,7 @@ asmlinkage int __raw_uncached_fetch_asm(const volatile int *ptr); asmlinkage int __raw_atomic_add_asm(volatile int *ptr, int value); +asmlinkage int __raw_atomic_xadd_asm(volatile int *ptr, int value); asmlinkage int __raw_atomic_and_asm(volatile int *ptr, int value); asmlinkage int __raw_atomic_or_asm(volatile int *ptr, int value); @@ -28,10 +29,17 @@ asmlinkage int __raw_atomic_test_asm(const volatile int *ptr, int value); #define atomic_add_return(i, v) __raw_atomic_add_asm(&(v)->counter, i) #define atomic_sub_return(i, v) __raw_atomic_add_asm(&(v)->counter, -(i)) +#define atomic_fetch_add(i, v) __raw_atomic_xadd_asm(&(v)->counter, i) +#define atomic_fetch_sub(i, v) __raw_atomic_xadd_asm(&(v)->counter, -(i)) + #define atomic_or(i, v) (void)__raw_atomic_or_asm(&(v)->counter, i) #define atomic_and(i, v) (void)__raw_atomic_and_asm(&(v)->counter, i) #define atomic_xor(i, v) (void)__raw_atomic_xor_asm(&(v)->counter, i) +#define atomic_fetch_or(i, v) __raw_atomic_or_asm(&(v)->counter, i) +#define atomic_fetch_and(i, v) __raw_atomic_and_asm(&(v)->counter, i) +#define atomic_fetch_xor(i, v) __raw_atomic_xor_asm(&(v)->counter, i) + #endif #include diff --git a/arch/blackfin/kernel/bfin_ksyms.c b/arch/blackfin/kernel/bfin_ksyms.c index a401c27b69b4..68096e8f787f 100644 --- a/arch/blackfin/kernel/bfin_ksyms.c +++ b/arch/blackfin/kernel/bfin_ksyms.c @@ -84,6 +84,7 @@ EXPORT_SYMBOL(insl_16); #ifdef CONFIG_SMP EXPORT_SYMBOL(__raw_atomic_add_asm); +EXPORT_SYMBOL(__raw_atomic_xadd_asm); EXPORT_SYMBOL(__raw_atomic_and_asm); EXPORT_SYMBOL(__raw_atomic_or_asm); EXPORT_SYMBOL(__raw_atomic_xor_asm); diff --git a/arch/blackfin/mach-bf561/atomic.S b/arch/blackfin/mach-bf561/atomic.S index 26fccb5568b9..1e2989c5d6b2 100644 --- a/arch/blackfin/mach-bf561/atomic.S +++ b/arch/blackfin/mach-bf561/atomic.S @@ -605,6 +605,28 @@ ENTRY(___raw_atomic_add_asm) rts; ENDPROC(___raw_atomic_add_asm) +/* + * r0 = ptr + * r1 = value + * + * ADD a signed value to a 32bit word and return the old value atomically. + * Clobbers: r3:0, p1:0 + */ +ENTRY(___raw_atomic_xadd_asm) + p1 = r0; + r3 = r1; + [--sp] = rets; + call _get_core_lock; + r3 = [p1]; + r2 = r3 + r2; + [p1] = r2; + r1 = p1; + call _put_core_lock; + r0 = r3; + rets = [sp++]; + rts; +ENDPROC(___raw_atomic_add_asm) + /* * r0 = ptr * r1 = mask @@ -618,10 +640,9 @@ ENTRY(___raw_atomic_and_asm) r3 = r1; [--sp] = rets; call _get_core_lock; - r2 = [p1]; - r3 = r2 & r3; - [p1] = r3; - r3 = r2; + r3 = [p1]; + r2 = r2 & r3; + [p1] = r2; r1 = p1; call _put_core_lock; r0 = r3; @@ -642,10 +663,9 @@ ENTRY(___raw_atomic_or_asm) r3 = r1; [--sp] = rets; call _get_core_lock; - r2 = [p1]; - r3 = r2 | r3; - [p1] = r3; - r3 = r2; + r3 = [p1]; + r2 = r2 | r3; + [p1] = r2; r1 = p1; call _put_core_lock; r0 = r3; @@ -666,10 +686,9 @@ ENTRY(___raw_atomic_xor_asm) r3 = r1; [--sp] = rets; call _get_core_lock; - r2 = [p1]; - r3 = r2 ^ r3; - [p1] = r3; - r3 = r2; + r3 = [p1]; + r2 = r2 ^ r3; + [p1] = r2; r1 = p1; call _put_core_lock; r0 = r3; -- cgit v1.2.3 From d9c730281617e55ca470e66f8e9d7d3f5f420fec Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:08 +0200 Subject: locking/atomic, arch/frv: Implement atomic{,64}_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/frv/include/asm/atomic.h | 32 ++++++++++++-------------------- arch/frv/include/asm/atomic_defs.h | 2 ++ 2 files changed, 14 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/frv/include/asm/atomic.h b/arch/frv/include/asm/atomic.h index 64f02d451aa8..e3e06da0cd59 100644 --- a/arch/frv/include/asm/atomic.h +++ b/arch/frv/include/asm/atomic.h @@ -60,16 +60,6 @@ static inline int atomic_add_negative(int i, atomic_t *v) return atomic_add_return(i, v) < 0; } -static inline void atomic_add(int i, atomic_t *v) -{ - atomic_add_return(i, v); -} - -static inline void atomic_sub(int i, atomic_t *v) -{ - atomic_sub_return(i, v); -} - static inline void atomic_inc(atomic_t *v) { atomic_inc_return(v); @@ -84,6 +74,8 @@ static inline void atomic_dec(atomic_t *v) #define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0) #define atomic_inc_and_test(v) (atomic_add_return(1, (v)) == 0) +#define atomic_fetch_or atomic_fetch_or + /* * 64-bit atomic ops */ @@ -136,16 +128,6 @@ static inline long long atomic64_add_negative(long long i, atomic64_t *v) return atomic64_add_return(i, v) < 0; } -static inline void atomic64_add(long long i, atomic64_t *v) -{ - atomic64_add_return(i, v); -} - -static inline void atomic64_sub(long long i, atomic64_t *v) -{ - atomic64_sub_return(i, v); -} - static inline void atomic64_inc(atomic64_t *v) { atomic64_inc_return(v); @@ -182,11 +164,19 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u) } #define ATOMIC_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + return __atomic32_fetch_##op(i, &v->counter); \ +} \ static inline void atomic_##op(int i, atomic_t *v) \ { \ (void)__atomic32_fetch_##op(i, &v->counter); \ } \ \ +static inline long long atomic64_fetch_##op(long long i, atomic64_t *v) \ +{ \ + return __atomic64_fetch_##op(i, &v->counter); \ +} \ static inline void atomic64_##op(long long i, atomic64_t *v) \ { \ (void)__atomic64_fetch_##op(i, &v->counter); \ @@ -195,6 +185,8 @@ static inline void atomic64_##op(long long i, atomic64_t *v) \ ATOMIC_OP(or) ATOMIC_OP(and) ATOMIC_OP(xor) +ATOMIC_OP(add) +ATOMIC_OP(sub) #undef ATOMIC_OP diff --git a/arch/frv/include/asm/atomic_defs.h b/arch/frv/include/asm/atomic_defs.h index 36e126d2f801..d4912c88b829 100644 --- a/arch/frv/include/asm/atomic_defs.h +++ b/arch/frv/include/asm/atomic_defs.h @@ -162,6 +162,8 @@ ATOMIC_EXPORT(__atomic64_fetch_##op); ATOMIC_FETCH_OP(or) ATOMIC_FETCH_OP(and) ATOMIC_FETCH_OP(xor) +ATOMIC_FETCH_OP(add) +ATOMIC_FETCH_OP(sub) ATOMIC_OP_RETURN(add) ATOMIC_OP_RETURN(sub) -- cgit v1.2.3 From 0c074cbc33091dd69fe70ec27474d228c3184860 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:08 +0200 Subject: locking/atomic, arch/h8300: Implement atomic_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yoshinori Sato Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: uclinux-h8-devel@lists.sourceforge.jp Signed-off-by: Ingo Molnar --- arch/h8300/include/asm/atomic.h | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h index 4435a445ae7e..0961b618bdde 100644 --- a/arch/h8300/include/asm/atomic.h +++ b/arch/h8300/include/asm/atomic.h @@ -28,6 +28,19 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return ret; \ } +#define ATOMIC_FETCH_OP(op, c_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + h8300flags flags; \ + int ret; \ + \ + flags = arch_local_irq_save(); \ + ret = v->counter; \ + v->counter c_op i; \ + arch_local_irq_restore(flags); \ + return ret; \ +} + #define ATOMIC_OP(op, c_op) \ static inline void atomic_##op(int i, atomic_t *v) \ { \ @@ -41,17 +54,23 @@ static inline void atomic_##op(int i, atomic_t *v) \ ATOMIC_OP_RETURN(add, +=) ATOMIC_OP_RETURN(sub, -=) -ATOMIC_OP(and, &=) -ATOMIC_OP(or, |=) -ATOMIC_OP(xor, ^=) +#define atomic_fetch_or atomic_fetch_or +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op, c_op) \ + ATOMIC_FETCH_OP(op, c_op) + +ATOMIC_OPS(and, &=) +ATOMIC_OPS(or, |=) +ATOMIC_OPS(xor, ^=) +ATOMIC_OPS(add, +=) +ATOMIC_OPS(sub, -=) + +#undef ATOMIC_OPS #undef ATOMIC_OP_RETURN #undef ATOMIC_OP -#define atomic_add(i, v) (void)atomic_add_return(i, v) #define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0) - -#define atomic_sub(i, v) (void)atomic_sub_return(i, v) #define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0) #define atomic_inc_return(v) atomic_add_return(1, v) -- cgit v1.2.3 From 4be7dd393515615430a4d07ca1ffceaf2a331620 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:07 +0200 Subject: locking/atomic, arch/hexagon: Implement atomic_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Richard Kuo Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-hexagon@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/hexagon/include/asm/atomic.h | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h index 55696c4100d4..07dbb3332b4a 100644 --- a/arch/hexagon/include/asm/atomic.h +++ b/arch/hexagon/include/asm/atomic.h @@ -110,7 +110,7 @@ static inline void atomic_##op(int i, atomic_t *v) \ ); \ } \ -#define ATOMIC_OP_RETURN(op) \ +#define ATOMIC_OP_RETURN(op) \ static inline int atomic_##op##_return(int i, atomic_t *v) \ { \ int output; \ @@ -127,16 +127,39 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return output; \ } -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int output, val; \ + \ + __asm__ __volatile__ ( \ + "1: %0 = memw_locked(%2);\n" \ + " %1 = "#op "(%0,%3);\n" \ + " memw_locked(%2,P3)=%1;\n" \ + " if !P3 jump 1b;\n" \ + : "=&r" (output), "=&r" (val) \ + : "r" (&v->counter), "r" (i) \ + : "memory", "p3" \ + ); \ + return output; \ +} + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +#define atomic_fetch_or atomic_fetch_or + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP -- cgit v1.2.3 From cc102507fac75f9f4f37938f49d10c25e596a608 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:07 +0200 Subject: locking/atomic, arch/ia64: Implement atomic{,64}_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Fenghua Yu Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-arch@vger.kernel.org Cc: linux-ia64@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/ia64/include/asm/atomic.h | 130 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 114 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h index 8dfb5f6f6c35..f565ad376142 100644 --- a/arch/ia64/include/asm/atomic.h +++ b/arch/ia64/include/asm/atomic.h @@ -42,8 +42,27 @@ ia64_atomic_##op (int i, atomic_t *v) \ return new; \ } -ATOMIC_OP(add, +) -ATOMIC_OP(sub, -) +#define ATOMIC_FETCH_OP(op, c_op) \ +static __inline__ int \ +ia64_atomic_fetch_##op (int i, atomic_t *v) \ +{ \ + __s32 old, new; \ + CMPXCHG_BUGCHECK_DECL \ + \ + do { \ + CMPXCHG_BUGCHECK(v); \ + old = atomic_read(v); \ + new = old c_op i; \ + } while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic_t)) != old); \ + return old; \ +} + +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op, c_op) \ + ATOMIC_FETCH_OP(op, c_op) + +ATOMIC_OPS(add, +) +ATOMIC_OPS(sub, -) #define atomic_add_return(i,v) \ ({ \ @@ -69,14 +88,44 @@ ATOMIC_OP(sub, -) : ia64_atomic_sub(__ia64_asr_i, v); \ }) -ATOMIC_OP(and, &) -ATOMIC_OP(or, |) -ATOMIC_OP(xor, ^) +#define atomic_fetch_add(i,v) \ +({ \ + int __ia64_aar_i = (i); \ + (__builtin_constant_p(i) \ + && ( (__ia64_aar_i == 1) || (__ia64_aar_i == 4) \ + || (__ia64_aar_i == 8) || (__ia64_aar_i == 16) \ + || (__ia64_aar_i == -1) || (__ia64_aar_i == -4) \ + || (__ia64_aar_i == -8) || (__ia64_aar_i == -16))) \ + ? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq) \ + : ia64_atomic_fetch_add(__ia64_aar_i, v); \ +}) + +#define atomic_fetch_sub(i,v) \ +({ \ + int __ia64_asr_i = (i); \ + (__builtin_constant_p(i) \ + && ( (__ia64_asr_i == 1) || (__ia64_asr_i == 4) \ + || (__ia64_asr_i == 8) || (__ia64_asr_i == 16) \ + || (__ia64_asr_i == -1) || (__ia64_asr_i == -4) \ + || (__ia64_asr_i == -8) || (__ia64_asr_i == -16))) \ + ? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq) \ + : ia64_atomic_fetch_sub(__ia64_asr_i, v); \ +}) + +ATOMIC_FETCH_OP(and, &) +ATOMIC_FETCH_OP(or, |) +ATOMIC_FETCH_OP(xor, ^) + +#define atomic_and(i,v) (void)ia64_atomic_fetch_and(i,v) +#define atomic_or(i,v) (void)ia64_atomic_fetch_or(i,v) +#define atomic_xor(i,v) (void)ia64_atomic_fetch_xor(i,v) -#define atomic_and(i,v) (void)ia64_atomic_and(i,v) -#define atomic_or(i,v) (void)ia64_atomic_or(i,v) -#define atomic_xor(i,v) (void)ia64_atomic_xor(i,v) +#define atomic_fetch_and(i,v) ia64_atomic_fetch_and(i,v) +#define atomic_fetch_or(i,v) ia64_atomic_fetch_or(i,v) +#define atomic_fetch_xor(i,v) ia64_atomic_fetch_xor(i,v) +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP #define ATOMIC64_OP(op, c_op) \ @@ -94,8 +143,27 @@ ia64_atomic64_##op (__s64 i, atomic64_t *v) \ return new; \ } -ATOMIC64_OP(add, +) -ATOMIC64_OP(sub, -) +#define ATOMIC64_FETCH_OP(op, c_op) \ +static __inline__ long \ +ia64_atomic64_fetch_##op (__s64 i, atomic64_t *v) \ +{ \ + __s64 old, new; \ + CMPXCHG_BUGCHECK_DECL \ + \ + do { \ + CMPXCHG_BUGCHECK(v); \ + old = atomic64_read(v); \ + new = old c_op i; \ + } while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic64_t)) != old); \ + return old; \ +} + +#define ATOMIC64_OPS(op, c_op) \ + ATOMIC64_OP(op, c_op) \ + ATOMIC64_FETCH_OP(op, c_op) + +ATOMIC64_OPS(add, +) +ATOMIC64_OPS(sub, -) #define atomic64_add_return(i,v) \ ({ \ @@ -121,14 +189,44 @@ ATOMIC64_OP(sub, -) : ia64_atomic64_sub(__ia64_asr_i, v); \ }) -ATOMIC64_OP(and, &) -ATOMIC64_OP(or, |) -ATOMIC64_OP(xor, ^) +#define atomic64_fetch_add(i,v) \ +({ \ + long __ia64_aar_i = (i); \ + (__builtin_constant_p(i) \ + && ( (__ia64_aar_i == 1) || (__ia64_aar_i == 4) \ + || (__ia64_aar_i == 8) || (__ia64_aar_i == 16) \ + || (__ia64_aar_i == -1) || (__ia64_aar_i == -4) \ + || (__ia64_aar_i == -8) || (__ia64_aar_i == -16))) \ + ? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq) \ + : ia64_atomic64_fetch_add(__ia64_aar_i, v); \ +}) + +#define atomic64_fetch_sub(i,v) \ +({ \ + long __ia64_asr_i = (i); \ + (__builtin_constant_p(i) \ + && ( (__ia64_asr_i == 1) || (__ia64_asr_i == 4) \ + || (__ia64_asr_i == 8) || (__ia64_asr_i == 16) \ + || (__ia64_asr_i == -1) || (__ia64_asr_i == -4) \ + || (__ia64_asr_i == -8) || (__ia64_asr_i == -16))) \ + ? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq) \ + : ia64_atomic64_fetch_sub(__ia64_asr_i, v); \ +}) + +ATOMIC64_FETCH_OP(and, &) +ATOMIC64_FETCH_OP(or, |) +ATOMIC64_FETCH_OP(xor, ^) + +#define atomic64_and(i,v) (void)ia64_atomic64_fetch_and(i,v) +#define atomic64_or(i,v) (void)ia64_atomic64_fetch_or(i,v) +#define atomic64_xor(i,v) (void)ia64_atomic64_fetch_xor(i,v) -#define atomic64_and(i,v) (void)ia64_atomic64_and(i,v) -#define atomic64_or(i,v) (void)ia64_atomic64_or(i,v) -#define atomic64_xor(i,v) (void)ia64_atomic64_xor(i,v) +#define atomic64_fetch_and(i,v) ia64_atomic64_fetch_and(i,v) +#define atomic64_fetch_or(i,v) ia64_atomic64_fetch_or(i,v) +#define atomic64_fetch_xor(i,v) ia64_atomic64_fetch_xor(i,v) +#undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP #define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), old, new)) -- cgit v1.2.3 From f649370523033c7c2adf16a9d062438c8a7758b3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:07 +0200 Subject: locking/atomic, arch/m32r: Implement atomic_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/m32r/include/asm/atomic.h | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/m32r/include/asm/atomic.h b/arch/m32r/include/asm/atomic.h index ea35160d632b..8ba8a0ab5d5d 100644 --- a/arch/m32r/include/asm/atomic.h +++ b/arch/m32r/include/asm/atomic.h @@ -89,16 +89,46 @@ static __inline__ int atomic_##op##_return(int i, atomic_t *v) \ return result; \ } -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +static __inline__ int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned long flags; \ + int result, val; \ + \ + local_irq_save(flags); \ + __asm__ __volatile__ ( \ + "# atomic_fetch_" #op " \n\t" \ + DCACHE_CLEAR("%0", "r4", "%2") \ + M32R_LOCK" %1, @%2; \n\t" \ + "mv %0, %1 \n\t" \ + #op " %1, %3; \n\t" \ + M32R_UNLOCK" %1, @%2; \n\t" \ + : "=&r" (result), "=&r" (val) \ + : "r" (&v->counter), "r" (i) \ + : "memory" \ + __ATOMIC_CLOBBER \ + ); \ + local_irq_restore(flags); \ + \ + return result; \ +} + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +#define atomic_fetch_or atomic_fetch_or + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP -- cgit v1.2.3 From e39d88ea3ce4a471cd0202f4f2c8f5ee0f8d7f53 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:06 +0200 Subject: locking/atomic, arch/m68k: Implement atomic_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Geert Uytterhoeven Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-m68k@lists.linux-m68k.org Signed-off-by: Ingo Molnar --- arch/m68k/include/asm/atomic.h | 53 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h index 4858178260f9..5cf9b3b1b6ac 100644 --- a/arch/m68k/include/asm/atomic.h +++ b/arch/m68k/include/asm/atomic.h @@ -38,6 +38,13 @@ static inline void atomic_##op(int i, atomic_t *v) \ #ifdef CONFIG_RMW_INSNS +/* + * Am I reading these CAS loops right in that %2 is the old value and the first + * iteration uses an uninitialized value? + * + * Would it not make sense to add: tmp = atomic_read(v); to avoid this? + */ + #define ATOMIC_OP_RETURN(op, c_op, asm_op) \ static inline int atomic_##op##_return(int i, atomic_t *v) \ { \ @@ -53,6 +60,21 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return t; \ } +#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int t, tmp; \ + \ + __asm__ __volatile__( \ + "1: movel %2,%1\n" \ + " " #asm_op "l %3,%1\n" \ + " casl %2,%1,%0\n" \ + " jne 1b" \ + : "+m" (*v), "=&d" (t), "=&d" (tmp) \ + : "g" (i), "2" (atomic_read(v))); \ + return tmp; \ +} + #else #define ATOMIC_OP_RETURN(op, c_op, asm_op) \ @@ -68,20 +90,43 @@ static inline int atomic_##op##_return(int i, atomic_t * v) \ return t; \ } +#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ +static inline int atomic_fetch_##op(int i, atomic_t * v) \ +{ \ + unsigned long flags; \ + int t; \ + \ + local_irq_save(flags); \ + t = v->counter; \ + v->counter c_op i; \ + local_irq_restore(flags); \ + \ + return t; \ +} + #endif /* CONFIG_RMW_INSNS */ #define ATOMIC_OPS(op, c_op, asm_op) \ ATOMIC_OP(op, c_op, asm_op) \ - ATOMIC_OP_RETURN(op, c_op, asm_op) + ATOMIC_OP_RETURN(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) ATOMIC_OPS(add, +=, add) ATOMIC_OPS(sub, -=, sub) -ATOMIC_OP(and, &=, and) -ATOMIC_OP(or, |=, or) -ATOMIC_OP(xor, ^=, eor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op, c_op, asm_op) \ + ATOMIC_OP(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) + +#define atomic_fetch_or atomic_fetch_or + +ATOMIC_OPS(and, &=, and) +ATOMIC_OPS(or, |=, or) +ATOMIC_OPS(xor, ^=, eor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP -- cgit v1.2.3 From e898eb27ffd8b0ad6f4fd0b631559bc877c85444 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:06 +0200 Subject: locking/atomic, arch/metag: Implement atomic_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Acked-by: James Hogan Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-metag@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/metag/include/asm/atomic.h | 2 ++ arch/metag/include/asm/atomic_lnkget.h | 36 ++++++++++++++++++++++++++++++---- arch/metag/include/asm/atomic_lock1.h | 33 +++++++++++++++++++++++++++---- 3 files changed, 63 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/metag/include/asm/atomic.h b/arch/metag/include/asm/atomic.h index 470e365f04ea..6ca210de8a7d 100644 --- a/arch/metag/include/asm/atomic.h +++ b/arch/metag/include/asm/atomic.h @@ -17,6 +17,8 @@ #include #endif +#define atomic_fetch_or atomic_fetch_or + #define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0) #define atomic_dec_return(v) atomic_sub_return(1, (v)) diff --git a/arch/metag/include/asm/atomic_lnkget.h b/arch/metag/include/asm/atomic_lnkget.h index 88fa25fae8bd..def2c642f053 100644 --- a/arch/metag/include/asm/atomic_lnkget.h +++ b/arch/metag/include/asm/atomic_lnkget.h @@ -69,16 +69,44 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return result; \ } -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int result, temp; \ + \ + smp_mb(); \ + \ + asm volatile ( \ + "1: LNKGETD %1, [%2]\n" \ + " " #op " %0, %1, %3\n" \ + " LNKSETD [%2], %0\n" \ + " DEFR %0, TXSTAT\n" \ + " ANDT %0, %0, #HI(0x3f000000)\n" \ + " CMPT %0, #HI(0x02000000)\n" \ + " BNZ 1b\n" \ + : "=&d" (temp), "=&d" (result) \ + : "da" (&v->counter), "bd" (i) \ + : "cc"); \ + \ + smp_mb(); \ + \ + return result; \ +} + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/metag/include/asm/atomic_lock1.h b/arch/metag/include/asm/atomic_lock1.h index 0295d9b8d5bf..6c1380a8a0d4 100644 --- a/arch/metag/include/asm/atomic_lock1.h +++ b/arch/metag/include/asm/atomic_lock1.h @@ -64,15 +64,40 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return result; \ } -#define ATOMIC_OPS(op, c_op) ATOMIC_OP(op, c_op) ATOMIC_OP_RETURN(op, c_op) +#define ATOMIC_FETCH_OP(op, c_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned long result; \ + unsigned long flags; \ + \ + __global_lock1(flags); \ + result = v->counter; \ + fence(); \ + v->counter c_op i; \ + __global_unlock1(flags); \ + \ + return result; \ +} + +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op, c_op) \ + ATOMIC_OP_RETURN(op, c_op) \ + ATOMIC_FETCH_OP(op, c_op) ATOMIC_OPS(add, +=) ATOMIC_OPS(sub, -=) -ATOMIC_OP(and, &=) -ATOMIC_OP(or, |=) -ATOMIC_OP(xor, ^=) #undef ATOMIC_OPS +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op, c_op) \ + ATOMIC_FETCH_OP(op, c_op) + +ATOMIC_OPS(and, &=) +ATOMIC_OPS(or, |=) +ATOMIC_OPS(xor, ^=) + +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP -- cgit v1.2.3 From 4edac529eb629ccd598e2236c61762537f16e883 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:06 +0200 Subject: locking/atomic, arch/mips: Implement atomic{,64}_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-mips@linux-mips.org Signed-off-by: Ingo Molnar --- arch/mips/include/asm/atomic.h | 138 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 129 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index 835b402e4574..431079f8e483 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -66,7 +66,7 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \ " " #asm_op " %0, %2 \n" \ " sc %0, %1 \n" \ " .set mips0 \n" \ - : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) \ + : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) \ : "Ir" (i)); \ } while (unlikely(!temp)); \ } else { \ @@ -130,18 +130,78 @@ static __inline__ int atomic_##op##_return(int i, atomic_t * v) \ return result; \ } +#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ +static __inline__ int atomic_fetch_##op(int i, atomic_t * v) \ +{ \ + int result; \ + \ + smp_mb__before_llsc(); \ + \ + if (kernel_uses_llsc && R10000_LLSC_WAR) { \ + int temp; \ + \ + __asm__ __volatile__( \ + " .set arch=r4000 \n" \ + "1: ll %1, %2 # atomic_fetch_" #op " \n" \ + " " #asm_op " %0, %1, %3 \n" \ + " sc %0, %2 \n" \ + " beqzl %0, 1b \n" \ + " move %0, %1 \n" \ + " .set mips0 \n" \ + : "=&r" (result), "=&r" (temp), \ + "+" GCC_OFF_SMALL_ASM() (v->counter) \ + : "Ir" (i)); \ + } else if (kernel_uses_llsc) { \ + int temp; \ + \ + do { \ + __asm__ __volatile__( \ + " .set "MIPS_ISA_LEVEL" \n" \ + " ll %1, %2 # atomic_fetch_" #op " \n" \ + " " #asm_op " %0, %1, %3 \n" \ + " sc %0, %2 \n" \ + " .set mips0 \n" \ + : "=&r" (result), "=&r" (temp), \ + "+" GCC_OFF_SMALL_ASM() (v->counter) \ + : "Ir" (i)); \ + } while (unlikely(!result)); \ + \ + result = temp; \ + } else { \ + unsigned long flags; \ + \ + raw_local_irq_save(flags); \ + result = v->counter; \ + v->counter c_op i; \ + raw_local_irq_restore(flags); \ + } \ + \ + smp_llsc_mb(); \ + \ + return result; \ +} + #define ATOMIC_OPS(op, c_op, asm_op) \ ATOMIC_OP(op, c_op, asm_op) \ - ATOMIC_OP_RETURN(op, c_op, asm_op) + ATOMIC_OP_RETURN(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) ATOMIC_OPS(add, +=, addu) ATOMIC_OPS(sub, -=, subu) -ATOMIC_OP(and, &=, and) -ATOMIC_OP(or, |=, or) -ATOMIC_OP(xor, ^=, xor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op, c_op, asm_op) \ + ATOMIC_OP(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) + +#define atomic_fetch_or atomic_fetch_or + +ATOMIC_OPS(and, &=, and) +ATOMIC_OPS(or, |=, or) +ATOMIC_OPS(xor, ^=, xor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP @@ -414,17 +474,77 @@ static __inline__ long atomic64_##op##_return(long i, atomic64_t * v) \ return result; \ } +#define ATOMIC64_FETCH_OP(op, c_op, asm_op) \ +static __inline__ long atomic64_fetch_##op(long i, atomic64_t * v) \ +{ \ + long result; \ + \ + smp_mb__before_llsc(); \ + \ + if (kernel_uses_llsc && R10000_LLSC_WAR) { \ + long temp; \ + \ + __asm__ __volatile__( \ + " .set arch=r4000 \n" \ + "1: lld %1, %2 # atomic64_fetch_" #op "\n" \ + " " #asm_op " %0, %1, %3 \n" \ + " scd %0, %2 \n" \ + " beqzl %0, 1b \n" \ + " move %0, %1 \n" \ + " .set mips0 \n" \ + : "=&r" (result), "=&r" (temp), \ + "+" GCC_OFF_SMALL_ASM() (v->counter) \ + : "Ir" (i)); \ + } else if (kernel_uses_llsc) { \ + long temp; \ + \ + do { \ + __asm__ __volatile__( \ + " .set "MIPS_ISA_LEVEL" \n" \ + " lld %1, %2 # atomic64_fetch_" #op "\n" \ + " " #asm_op " %0, %1, %3 \n" \ + " scd %0, %2 \n" \ + " .set mips0 \n" \ + : "=&r" (result), "=&r" (temp), \ + "=" GCC_OFF_SMALL_ASM() (v->counter) \ + : "Ir" (i), GCC_OFF_SMALL_ASM() (v->counter) \ + : "memory"); \ + } while (unlikely(!result)); \ + \ + result = temp; \ + } else { \ + unsigned long flags; \ + \ + raw_local_irq_save(flags); \ + result = v->counter; \ + v->counter c_op i; \ + raw_local_irq_restore(flags); \ + } \ + \ + smp_llsc_mb(); \ + \ + return result; \ +} + #define ATOMIC64_OPS(op, c_op, asm_op) \ ATOMIC64_OP(op, c_op, asm_op) \ - ATOMIC64_OP_RETURN(op, c_op, asm_op) + ATOMIC64_OP_RETURN(op, c_op, asm_op) \ + ATOMIC64_FETCH_OP(op, c_op, asm_op) ATOMIC64_OPS(add, +=, daddu) ATOMIC64_OPS(sub, -=, dsubu) -ATOMIC64_OP(and, &=, and) -ATOMIC64_OP(or, |=, or) -ATOMIC64_OP(xor, ^=, xor) #undef ATOMIC64_OPS +#define ATOMIC64_OPS(op, c_op, asm_op) \ + ATOMIC64_OP(op, c_op, asm_op) \ + ATOMIC64_FETCH_OP(op, c_op, asm_op) + +ATOMIC64_OPS(and, &=, and) +ATOMIC64_OPS(or, |=, or) +ATOMIC64_OPS(xor, ^=, xor) + +#undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP -- cgit v1.2.3 From f8d638e28d7cc858066d2de484d9719dc181593a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:05 +0200 Subject: locking/atomic, arch/mn10300: Implement atomic_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: David Howells Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-am33-list@redhat.com Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/mn10300/include/asm/atomic.h | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h index ce318d5ab23b..3580f789f3a6 100644 --- a/arch/mn10300/include/asm/atomic.h +++ b/arch/mn10300/include/asm/atomic.h @@ -84,16 +84,43 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return retval; \ } -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int retval, status; \ + \ + asm volatile( \ + "1: mov %4,(_AAR,%3) \n" \ + " mov (_ADR,%3),%1 \n" \ + " mov %1,%0 \n" \ + " " #op " %5,%0 \n" \ + " mov %0,(_ADR,%3) \n" \ + " mov (_ADR,%3),%0 \n" /* flush */ \ + " mov (_ASR,%3),%0 \n" \ + " or %0,%0 \n" \ + " bne 1b \n" \ + : "=&r"(status), "=&r"(retval), "=m"(v->counter) \ + : "a"(ATOMIC_OPS_BASE_ADDR), "r"(&v->counter), "r"(i) \ + : "memory", "cc"); \ + return retval; \ +} + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +#define atomic_fetch_or atomic_fetch_or + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP -- cgit v1.2.3 From e5857a6ed6004cac5273b8cdc189ab4b6363cfaf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:05 +0200 Subject: locking/atomic, arch/parisc: Implement atomic{,64}_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Helge Deller Cc: James E.J. Bottomley Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-parisc@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/parisc/include/asm/atomic.h | 65 +++++++++++++++++++++++++++++++++++----- 1 file changed, 57 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index 1d109990a022..29df1f871910 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h @@ -121,16 +121,41 @@ static __inline__ int atomic_##op##_return(int i, atomic_t *v) \ return ret; \ } -#define ATOMIC_OPS(op, c_op) ATOMIC_OP(op, c_op) ATOMIC_OP_RETURN(op, c_op) +#define ATOMIC_FETCH_OP(op, c_op) \ +static __inline__ int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned long flags; \ + int ret; \ + \ + _atomic_spin_lock_irqsave(v, flags); \ + ret = v->counter; \ + v->counter c_op i; \ + _atomic_spin_unlock_irqrestore(v, flags); \ + \ + return ret; \ +} + +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op, c_op) \ + ATOMIC_OP_RETURN(op, c_op) \ + ATOMIC_FETCH_OP(op, c_op) ATOMIC_OPS(add, +=) ATOMIC_OPS(sub, -=) -ATOMIC_OP(and, &=) -ATOMIC_OP(or, |=) -ATOMIC_OP(xor, ^=) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op, c_op) \ + ATOMIC_FETCH_OP(op, c_op) + +#define atomic_fetch_or atomic_fetch_or + +ATOMIC_OPS(and, &=) +ATOMIC_OPS(or, |=) +ATOMIC_OPS(xor, ^=) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP @@ -185,15 +210,39 @@ static __inline__ s64 atomic64_##op##_return(s64 i, atomic64_t *v) \ return ret; \ } -#define ATOMIC64_OPS(op, c_op) ATOMIC64_OP(op, c_op) ATOMIC64_OP_RETURN(op, c_op) +#define ATOMIC64_FETCH_OP(op, c_op) \ +static __inline__ s64 atomic64_fetch_##op(s64 i, atomic64_t *v) \ +{ \ + unsigned long flags; \ + s64 ret; \ + \ + _atomic_spin_lock_irqsave(v, flags); \ + ret = v->counter; \ + v->counter c_op i; \ + _atomic_spin_unlock_irqrestore(v, flags); \ + \ + return ret; \ +} + +#define ATOMIC64_OPS(op, c_op) \ + ATOMIC64_OP(op, c_op) \ + ATOMIC64_OP_RETURN(op, c_op) \ + ATOMIC64_FETCH_OP(op, c_op) ATOMIC64_OPS(add, +=) ATOMIC64_OPS(sub, -=) -ATOMIC64_OP(and, &=) -ATOMIC64_OP(or, |=) -ATOMIC64_OP(xor, ^=) #undef ATOMIC64_OPS +#define ATOMIC64_OPS(op, c_op) \ + ATOMIC64_OP(op, c_op) \ + ATOMIC64_FETCH_OP(op, c_op) + +ATOMIC64_OPS(and, &=) +ATOMIC64_OPS(or, |=) +ATOMIC64_OPS(xor, ^=) + +#undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP -- cgit v1.2.3 From a28cc7bbe8e30ee573e1a27e704558f0862d8c6d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:05 +0200 Subject: locking/atomic, arch/powerpc: Implement atomic{,64}_fetch_{add,sub,and,or,xor}{,_relaxed,_acquire,_release}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Tested-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Benjamin Herrenschmidt Cc: Linus Torvalds Cc: Michael Ellerman Cc: Paul E. McKenney Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Ingo Molnar --- arch/powerpc/include/asm/atomic.h | 83 ++++++++++++++++++++++++++++++++++----- 1 file changed, 74 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index ae0751ef8788..f08d567e0ca4 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -78,21 +78,53 @@ static inline int atomic_##op##_return_relaxed(int a, atomic_t *v) \ return t; \ } +#define ATOMIC_FETCH_OP_RELAXED(op, asm_op) \ +static inline int atomic_fetch_##op##_relaxed(int a, atomic_t *v) \ +{ \ + int res, t; \ + \ + __asm__ __volatile__( \ +"1: lwarx %0,0,%4 # atomic_fetch_" #op "_relaxed\n" \ + #asm_op " %1,%3,%0\n" \ + PPC405_ERR77(0, %4) \ +" stwcx. %1,0,%4\n" \ +" bne- 1b\n" \ + : "=&r" (res), "=&r" (t), "+m" (v->counter) \ + : "r" (a), "r" (&v->counter) \ + : "cc"); \ + \ + return res; \ +} + #define ATOMIC_OPS(op, asm_op) \ ATOMIC_OP(op, asm_op) \ - ATOMIC_OP_RETURN_RELAXED(op, asm_op) + ATOMIC_OP_RETURN_RELAXED(op, asm_op) \ + ATOMIC_FETCH_OP_RELAXED(op, asm_op) ATOMIC_OPS(add, add) ATOMIC_OPS(sub, subf) -ATOMIC_OP(and, and) -ATOMIC_OP(or, or) -ATOMIC_OP(xor, xor) - #define atomic_add_return_relaxed atomic_add_return_relaxed #define atomic_sub_return_relaxed atomic_sub_return_relaxed +#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed +#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed + +#undef ATOMIC_OPS +#define ATOMIC_OPS(op, asm_op) \ + ATOMIC_OP(op, asm_op) \ + ATOMIC_FETCH_OP_RELAXED(op, asm_op) + +ATOMIC_OPS(and, and) +ATOMIC_OPS(or, or) +ATOMIC_OPS(xor, xor) + +#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed +#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed +#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed + #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP_RELAXED #undef ATOMIC_OP_RETURN_RELAXED #undef ATOMIC_OP @@ -329,20 +361,53 @@ atomic64_##op##_return_relaxed(long a, atomic64_t *v) \ return t; \ } +#define ATOMIC64_FETCH_OP_RELAXED(op, asm_op) \ +static inline long \ +atomic64_fetch_##op##_relaxed(long a, atomic64_t *v) \ +{ \ + long res, t; \ + \ + __asm__ __volatile__( \ +"1: ldarx %0,0,%4 # atomic64_fetch_" #op "_relaxed\n" \ + #asm_op " %1,%3,%0\n" \ +" stdcx. %1,0,%4\n" \ +" bne- 1b\n" \ + : "=&r" (res), "=&r" (t), "+m" (v->counter) \ + : "r" (a), "r" (&v->counter) \ + : "cc"); \ + \ + return res; \ +} + #define ATOMIC64_OPS(op, asm_op) \ ATOMIC64_OP(op, asm_op) \ - ATOMIC64_OP_RETURN_RELAXED(op, asm_op) + ATOMIC64_OP_RETURN_RELAXED(op, asm_op) \ + ATOMIC64_FETCH_OP_RELAXED(op, asm_op) ATOMIC64_OPS(add, add) ATOMIC64_OPS(sub, subf) -ATOMIC64_OP(and, and) -ATOMIC64_OP(or, or) -ATOMIC64_OP(xor, xor) #define atomic64_add_return_relaxed atomic64_add_return_relaxed #define atomic64_sub_return_relaxed atomic64_sub_return_relaxed +#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed +#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed + +#undef ATOMIC64_OPS +#define ATOMIC64_OPS(op, asm_op) \ + ATOMIC64_OP(op, asm_op) \ + ATOMIC64_FETCH_OP_RELAXED(op, asm_op) + +ATOMIC64_OPS(and, and) +ATOMIC64_OPS(or, or) +ATOMIC64_OPS(xor, xor) + +#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed +#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed +#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed + #undef ATOPIC64_OPS +#undef ATOMIC64_FETCH_OP_RELAXED #undef ATOMIC64_OP_RETURN_RELAXED #undef ATOMIC64_OP -- cgit v1.2.3 From 56fefbbc3f13ad8cc9f502dbc6b5c9ddc8c4395e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:05 +0200 Subject: locking/atomic, arch/s390: Implement atomic{,64}_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Acked-by: Martin Schwidefsky Cc: Andrew Morton Cc: Heiko Carstens Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-s390@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/s390/include/asm/atomic.h | 42 ++++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index 911064aa59b2..2324e759b544 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -93,6 +93,11 @@ static inline int atomic_add_return(int i, atomic_t *v) return __ATOMIC_LOOP(v, i, __ATOMIC_ADD, __ATOMIC_BARRIER) + i; } +static inline int atomic_fetch_add(int i, atomic_t *v) +{ + return __ATOMIC_LOOP(v, i, __ATOMIC_ADD, __ATOMIC_BARRIER); +} + static inline void atomic_add(int i, atomic_t *v) { #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES @@ -114,22 +119,29 @@ static inline void atomic_add(int i, atomic_t *v) #define atomic_inc_and_test(_v) (atomic_add_return(1, _v) == 0) #define atomic_sub(_i, _v) atomic_add(-(int)(_i), _v) #define atomic_sub_return(_i, _v) atomic_add_return(-(int)(_i), _v) +#define atomic_fetch_sub(_i, _v) atomic_fetch_add(-(int)(_i), _v) #define atomic_sub_and_test(_i, _v) (atomic_sub_return(_i, _v) == 0) #define atomic_dec(_v) atomic_sub(1, _v) #define atomic_dec_return(_v) atomic_sub_return(1, _v) #define atomic_dec_and_test(_v) (atomic_sub_return(1, _v) == 0) -#define ATOMIC_OP(op, OP) \ +#define ATOMIC_OPS(op, OP) \ static inline void atomic_##op(int i, atomic_t *v) \ { \ __ATOMIC_LOOP(v, i, __ATOMIC_##OP, __ATOMIC_NO_BARRIER); \ +} \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + return __ATOMIC_LOOP(v, i, __ATOMIC_##OP, __ATOMIC_BARRIER); \ } -ATOMIC_OP(and, AND) -ATOMIC_OP(or, OR) -ATOMIC_OP(xor, XOR) +#define atomic_fetch_or atomic_fetch_or + +ATOMIC_OPS(and, AND) +ATOMIC_OPS(or, OR) +ATOMIC_OPS(xor, XOR) -#undef ATOMIC_OP +#undef ATOMIC_OPS #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) @@ -236,6 +248,11 @@ static inline long long atomic64_add_return(long long i, atomic64_t *v) return __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_BARRIER) + i; } +static inline long long atomic64_fetch_add(long long i, atomic64_t *v) +{ + return __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_BARRIER); +} + static inline void atomic64_add(long long i, atomic64_t *v) { #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES @@ -264,17 +281,21 @@ static inline long long atomic64_cmpxchg(atomic64_t *v, return old; } -#define ATOMIC64_OP(op, OP) \ +#define ATOMIC64_OPS(op, OP) \ static inline void atomic64_##op(long i, atomic64_t *v) \ { \ __ATOMIC64_LOOP(v, i, __ATOMIC64_##OP, __ATOMIC64_NO_BARRIER); \ +} \ +static inline long atomic64_fetch_##op(long i, atomic64_t *v) \ +{ \ + return __ATOMIC64_LOOP(v, i, __ATOMIC64_##OP, __ATOMIC64_BARRIER); \ } -ATOMIC64_OP(and, AND) -ATOMIC64_OP(or, OR) -ATOMIC64_OP(xor, XOR) +ATOMIC64_OPS(and, AND) +ATOMIC64_OPS(or, OR) +ATOMIC64_OPS(xor, XOR) -#undef ATOMIC64_OP +#undef ATOMIC64_OPS #undef __ATOMIC64_LOOP static inline int atomic64_add_unless(atomic64_t *v, long long i, long long u) @@ -315,6 +336,7 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v) #define atomic64_inc_return(_v) atomic64_add_return(1, _v) #define atomic64_inc_and_test(_v) (atomic64_add_return(1, _v) == 0) #define atomic64_sub_return(_i, _v) atomic64_add_return(-(long long)(_i), _v) +#define atomic64_fetch_sub(_i, _v) atomic64_fetch_add(-(long long)(_i), _v) #define atomic64_sub(_i, _v) atomic64_add(-(long long)(_i), _v) #define atomic64_sub_and_test(_i, _v) (atomic64_sub_return(_i, _v) == 0) #define atomic64_dec(_v) atomic64_sub(1, _v) -- cgit v1.2.3 From 7d9794e7523798e1b9422ad9f4e4d808ae5d5932 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:04 +0200 Subject: locking/atomic, arch/sh: Implement atomic_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rich Felker Cc: Thomas Gleixner Cc: Yoshinori Sato Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-sh@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/sh/include/asm/atomic-grb.h | 34 ++++++++++++++++++++++++++++++---- arch/sh/include/asm/atomic-irq.h | 31 +++++++++++++++++++++++++++---- arch/sh/include/asm/atomic-llsc.h | 32 ++++++++++++++++++++++++++++---- arch/sh/include/asm/atomic.h | 2 ++ 4 files changed, 87 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/sh/include/asm/atomic-grb.h b/arch/sh/include/asm/atomic-grb.h index b94df40e5f2d..d755e96c3064 100644 --- a/arch/sh/include/asm/atomic-grb.h +++ b/arch/sh/include/asm/atomic-grb.h @@ -43,16 +43,42 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return tmp; \ } -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int res, tmp; \ + \ + __asm__ __volatile__ ( \ + " .align 2 \n\t" \ + " mova 1f, r0 \n\t" /* r0 = end point */ \ + " mov r15, r1 \n\t" /* r1 = saved sp */ \ + " mov #-6, r15 \n\t" /* LOGIN: r15 = size */ \ + " mov.l @%2, %0 \n\t" /* load old value */ \ + " mov %0, %1 \n\t" /* save old value */ \ + " " #op " %3, %0 \n\t" /* $op */ \ + " mov.l %0, @%2 \n\t" /* store new value */ \ + "1: mov r1, r15 \n\t" /* LOGOUT */ \ + : "=&r" (tmp), "=&r" (res), "+r" (v) \ + : "r" (i) \ + : "memory" , "r0", "r1"); \ + \ + return res; \ +} + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/sh/include/asm/atomic-irq.h b/arch/sh/include/asm/atomic-irq.h index 23fcdad5773e..8e2da5fa0178 100644 --- a/arch/sh/include/asm/atomic-irq.h +++ b/arch/sh/include/asm/atomic-irq.h @@ -33,15 +33,38 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return temp; \ } -#define ATOMIC_OPS(op, c_op) ATOMIC_OP(op, c_op) ATOMIC_OP_RETURN(op, c_op) +#define ATOMIC_FETCH_OP(op, c_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned long temp, flags; \ + \ + raw_local_irq_save(flags); \ + temp = v->counter; \ + v->counter c_op i; \ + raw_local_irq_restore(flags); \ + \ + return temp; \ +} + +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op, c_op) \ + ATOMIC_OP_RETURN(op, c_op) \ + ATOMIC_FETCH_OP(op, c_op) ATOMIC_OPS(add, +=) ATOMIC_OPS(sub, -=) -ATOMIC_OP(and, &=) -ATOMIC_OP(or, |=) -ATOMIC_OP(xor, ^=) #undef ATOMIC_OPS +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op, c_op) \ + ATOMIC_FETCH_OP(op, c_op) + +ATOMIC_OPS(and, &=) +ATOMIC_OPS(or, |=) +ATOMIC_OPS(xor, ^=) + +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/sh/include/asm/atomic-llsc.h b/arch/sh/include/asm/atomic-llsc.h index 33d34b16d4d6..caea2c45f6c2 100644 --- a/arch/sh/include/asm/atomic-llsc.h +++ b/arch/sh/include/asm/atomic-llsc.h @@ -48,15 +48,39 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return temp; \ } -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned long res, temp; \ + \ + __asm__ __volatile__ ( \ +"1: movli.l @%3, %0 ! atomic_fetch_" #op " \n" \ +" mov %0, %1 \n" \ +" " #op " %2, %0 \n" \ +" movco.l %0, @%3 \n" \ +" bf 1b \n" \ +" synco \n" \ + : "=&z" (temp), "=&z" (res) \ + : "r" (i), "r" (&v->counter) \ + : "t"); \ + \ + return res; \ +} + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) #undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) + +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h index c399e1c55685..d93ed7ce1b2f 100644 --- a/arch/sh/include/asm/atomic.h +++ b/arch/sh/include/asm/atomic.h @@ -25,6 +25,8 @@ #include #endif +#define atomic_fetch_or atomic_fetch_or + #define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0) #define atomic_dec_return(v) atomic_sub_return(1, (v)) #define atomic_inc_return(v) atomic_add_return(1, (v)) -- cgit v1.2.3 From 3a1adb23a52c920304239efff377d3bc967febc2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:04 +0200 Subject: locking/atomic, arch/sparc: Implement atomic{,64}_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Acked-by: David S. Miller Cc: Andrew Morton Cc: James Y Knight Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: sparclinux@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/sparc/include/asm/atomic.h | 1 + arch/sparc/include/asm/atomic_32.h | 15 ++++++++-- arch/sparc/include/asm/atomic_64.h | 16 +++++++--- arch/sparc/lib/atomic32.c | 29 ++++++++++-------- arch/sparc/lib/atomic_64.S | 61 +++++++++++++++++++++++++++++++------- arch/sparc/lib/ksyms.c | 17 ++++++++--- 6 files changed, 105 insertions(+), 34 deletions(-) (limited to 'arch') diff --git a/arch/sparc/include/asm/atomic.h b/arch/sparc/include/asm/atomic.h index 8ff83d8cc33f..1f741bcc73b7 100644 --- a/arch/sparc/include/asm/atomic.h +++ b/arch/sparc/include/asm/atomic.h @@ -5,4 +5,5 @@ #else #include #endif +#define atomic_fetch_or atomic_fetch_or #endif diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h index 7dcbebbcaec6..5cfb20a599d9 100644 --- a/arch/sparc/include/asm/atomic_32.h +++ b/arch/sparc/include/asm/atomic_32.h @@ -20,9 +20,10 @@ #define ATOMIC_INIT(i) { (i) } int atomic_add_return(int, atomic_t *); -void atomic_and(int, atomic_t *); -void atomic_or(int, atomic_t *); -void atomic_xor(int, atomic_t *); +int atomic_fetch_add(int, atomic_t *); +int atomic_fetch_and(int, atomic_t *); +int atomic_fetch_or(int, atomic_t *); +int atomic_fetch_xor(int, atomic_t *); int atomic_cmpxchg(atomic_t *, int, int); int atomic_xchg(atomic_t *, int); int __atomic_add_unless(atomic_t *, int, int); @@ -35,7 +36,15 @@ void atomic_set(atomic_t *, int); #define atomic_inc(v) ((void)atomic_add_return( 1, (v))) #define atomic_dec(v) ((void)atomic_add_return( -1, (v))) +#define atomic_fetch_or atomic_fetch_or + +#define atomic_and(i, v) ((void)atomic_fetch_and((i), (v))) +#define atomic_or(i, v) ((void)atomic_fetch_or((i), (v))) +#define atomic_xor(i, v) ((void)atomic_fetch_xor((i), (v))) + #define atomic_sub_return(i, v) (atomic_add_return(-(int)(i), (v))) +#define atomic_fetch_sub(i, v) (atomic_fetch_add (-(int)(i), (v))) + #define atomic_inc_return(v) (atomic_add_return( 1, (v))) #define atomic_dec_return(v) (atomic_add_return( -1, (v))) diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h index f2fbf9e16faf..24827a3f733a 100644 --- a/arch/sparc/include/asm/atomic_64.h +++ b/arch/sparc/include/asm/atomic_64.h @@ -28,16 +28,24 @@ void atomic64_##op(long, atomic64_t *); int atomic_##op##_return(int, atomic_t *); \ long atomic64_##op##_return(long, atomic64_t *); -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +int atomic_fetch_##op(int, atomic_t *); \ +long atomic64_fetch_##op(long, atomic64_t *); + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c index b9d63c0a7aab..2c373329d5cb 100644 --- a/arch/sparc/lib/atomic32.c +++ b/arch/sparc/lib/atomic32.c @@ -27,39 +27,44 @@ static DEFINE_SPINLOCK(dummy); #endif /* SMP */ -#define ATOMIC_OP_RETURN(op, c_op) \ -int atomic_##op##_return(int i, atomic_t *v) \ +#define ATOMIC_FETCH_OP(op, c_op) \ +int atomic_fetch_##op(int i, atomic_t *v) \ { \ int ret; \ unsigned long flags; \ spin_lock_irqsave(ATOMIC_HASH(v), flags); \ \ - ret = (v->counter c_op i); \ + ret = v->counter; \ + v->counter c_op i; \ \ spin_unlock_irqrestore(ATOMIC_HASH(v), flags); \ return ret; \ } \ -EXPORT_SYMBOL(atomic_##op##_return); +EXPORT_SYMBOL(atomic_fetch_##op); -#define ATOMIC_OP(op, c_op) \ -void atomic_##op(int i, atomic_t *v) \ +#define ATOMIC_OP_RETURN(op, c_op) \ +int atomic_##op##_return(int i, atomic_t *v) \ { \ + int ret; \ unsigned long flags; \ spin_lock_irqsave(ATOMIC_HASH(v), flags); \ \ - v->counter c_op i; \ + ret = (v->counter c_op i); \ \ spin_unlock_irqrestore(ATOMIC_HASH(v), flags); \ + return ret; \ } \ -EXPORT_SYMBOL(atomic_##op); +EXPORT_SYMBOL(atomic_##op##_return); ATOMIC_OP_RETURN(add, +=) -ATOMIC_OP(and, &=) -ATOMIC_OP(or, |=) -ATOMIC_OP(xor, ^=) +ATOMIC_FETCH_OP(add, +=) +ATOMIC_FETCH_OP(and, &=) +ATOMIC_FETCH_OP(or, |=) +ATOMIC_FETCH_OP(xor, ^=) + +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN -#undef ATOMIC_OP int atomic_xchg(atomic_t *v, int new) { diff --git a/arch/sparc/lib/atomic_64.S b/arch/sparc/lib/atomic_64.S index d6b0363f345b..a5c5a0279ccc 100644 --- a/arch/sparc/lib/atomic_64.S +++ b/arch/sparc/lib/atomic_64.S @@ -9,10 +9,11 @@ .text - /* Two versions of the atomic routines, one that + /* Three versions of the atomic routines, one that * does not return a value and does not perform - * memory barriers, and a second which returns - * a value and does the barriers. + * memory barriers, and a two which return + * a value, the new and old value resp. and does the + * barriers. */ #define ATOMIC_OP(op) \ @@ -43,15 +44,34 @@ ENTRY(atomic_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */ \ 2: BACKOFF_SPIN(%o2, %o3, 1b); \ ENDPROC(atomic_##op##_return); -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +ENTRY(atomic_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */ \ + BACKOFF_SETUP(%o2); \ +1: lduw [%o1], %g1; \ + op %g1, %o0, %g7; \ + cas [%o1], %g1, %g7; \ + cmp %g1, %g7; \ + bne,pn %icc, BACKOFF_LABEL(2f, 1b); \ + nop; \ + retl; \ + sra %g1, 0, %o0; \ +2: BACKOFF_SPIN(%o2, %o3, 1b); \ +ENDPROC(atomic_fetch_##op); + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) #undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) + +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP @@ -83,15 +103,34 @@ ENTRY(atomic64_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */ \ 2: BACKOFF_SPIN(%o2, %o3, 1b); \ ENDPROC(atomic64_##op##_return); -#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op) +#define ATOMIC64_FETCH_OP(op) \ +ENTRY(atomic64_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */ \ + BACKOFF_SETUP(%o2); \ +1: ldx [%o1], %g1; \ + op %g1, %o0, %g7; \ + casx [%o1], %g1, %g7; \ + cmp %g1, %g7; \ + bne,pn %xcc, BACKOFF_LABEL(2f, 1b); \ + nop; \ + retl; \ + mov %g1, %o0; \ +2: BACKOFF_SPIN(%o2, %o3, 1b); \ +ENDPROC(atomic64_fetch_##op); + +#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op) ATOMIC64_FETCH_OP(op) ATOMIC64_OPS(add) ATOMIC64_OPS(sub) -ATOMIC64_OP(and) -ATOMIC64_OP(or) -ATOMIC64_OP(xor) #undef ATOMIC64_OPS +#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_FETCH_OP(op) + +ATOMIC64_OPS(and) +ATOMIC64_OPS(or) +ATOMIC64_OPS(xor) + +#undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP diff --git a/arch/sparc/lib/ksyms.c b/arch/sparc/lib/ksyms.c index 8eb454cfe05c..de5e97817bdb 100644 --- a/arch/sparc/lib/ksyms.c +++ b/arch/sparc/lib/ksyms.c @@ -107,15 +107,24 @@ EXPORT_SYMBOL(atomic64_##op); EXPORT_SYMBOL(atomic_##op##_return); \ EXPORT_SYMBOL(atomic64_##op##_return); -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +EXPORT_SYMBOL(atomic_fetch_##op); \ +EXPORT_SYMBOL(atomic64_fetch_##op); + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) #undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) + +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP -- cgit v1.2.3 From 1af5de9af138941fb8638cf126293b16f3387de4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:03 +0200 Subject: locking/atomic, arch/tile: Implement atomic{,64}_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Acked-by: Chris Metcalf Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/tile/include/asm/atomic.h | 4 ++ arch/tile/include/asm/atomic_32.h | 60 ++++++++++++++------ arch/tile/include/asm/atomic_64.h | 115 +++++++++++++++++++++++++------------- arch/tile/include/asm/bitops_32.h | 18 +++--- arch/tile/lib/atomic_32.c | 42 +++++++------- arch/tile/lib/atomic_asm_32.S | 14 ++--- 6 files changed, 159 insertions(+), 94 deletions(-) (limited to 'arch') diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h index 9fc0107a9c5e..9807030557c4 100644 --- a/arch/tile/include/asm/atomic.h +++ b/arch/tile/include/asm/atomic.h @@ -46,6 +46,10 @@ static inline int atomic_read(const atomic_t *v) */ #define atomic_sub_return(i, v) atomic_add_return((int)(-(i)), (v)) +#define atomic_fetch_sub(i, v) atomic_fetch_add(-(int)(i), (v)) + +#define atomic_fetch_or atomic_fetch_or + /** * atomic_sub - subtract integer from atomic variable * @i: integer value to subtract diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h index d320ce253d86..da8eb4ed3752 100644 --- a/arch/tile/include/asm/atomic_32.h +++ b/arch/tile/include/asm/atomic_32.h @@ -34,18 +34,29 @@ static inline void atomic_add(int i, atomic_t *v) _atomic_xchg_add(&v->counter, i); } -#define ATOMIC_OP(op) \ -unsigned long _atomic_##op(volatile unsigned long *p, unsigned long mask); \ +#define ATOMIC_OPS(op) \ +unsigned long _atomic_fetch_##op(volatile unsigned long *p, unsigned long mask); \ static inline void atomic_##op(int i, atomic_t *v) \ { \ - _atomic_##op((unsigned long *)&v->counter, i); \ + _atomic_fetch_##op((unsigned long *)&v->counter, i); \ +} \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + smp_mb(); \ + return _atomic_fetch_##op((unsigned long *)&v->counter, i); \ } -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) + +#undef ATOMIC_OPS -#undef ATOMIC_OP +static inline int atomic_fetch_add(int i, atomic_t *v) +{ + smp_mb(); + return _atomic_xchg_add(&v->counter, i); +} /** * atomic_add_return - add integer and return @@ -126,17 +137,30 @@ static inline void atomic64_add(long long i, atomic64_t *v) _atomic64_xchg_add(&v->counter, i); } -#define ATOMIC64_OP(op) \ -long long _atomic64_##op(long long *v, long long n); \ +#define ATOMIC64_OPS(op) \ +long long _atomic64_fetch_##op(long long *v, long long n); \ +static inline void atomic64_##op(long long i, atomic64_t *v) \ +{ \ + _atomic64_fetch_##op(&v->counter, i); \ +} \ static inline void atomic64_##op(long long i, atomic64_t *v) \ { \ - _atomic64_##op(&v->counter, i); \ + smp_mb(); \ + return _atomic64_fetch_##op(&v->counter, i); \ } ATOMIC64_OP(and) ATOMIC64_OP(or) ATOMIC64_OP(xor) +#undef ATOMIC64_OPS + +static inline long long atomic64_fetch_add(long long i, atomic64_t *v) +{ + smp_mb(); + return _atomic64_xchg_add(&v->counter, i); +} + /** * atomic64_add_return - add integer and return * @v: pointer of type atomic64_t @@ -186,6 +210,7 @@ static inline void atomic64_set(atomic64_t *v, long long n) #define atomic64_inc_return(v) atomic64_add_return(1LL, (v)) #define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0) #define atomic64_sub_return(i, v) atomic64_add_return(-(i), (v)) +#define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v)) #define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0) #define atomic64_sub(i, v) atomic64_add(-(i), (v)) #define atomic64_dec(v) atomic64_sub(1LL, (v)) @@ -193,7 +218,6 @@ static inline void atomic64_set(atomic64_t *v, long long n) #define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0) #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL) - #endif /* !__ASSEMBLY__ */ /* @@ -248,10 +272,10 @@ extern struct __get_user __atomic_xchg(volatile int *p, int *lock, int n); extern struct __get_user __atomic_xchg_add(volatile int *p, int *lock, int n); extern struct __get_user __atomic_xchg_add_unless(volatile int *p, int *lock, int o, int n); -extern struct __get_user __atomic_or(volatile int *p, int *lock, int n); -extern struct __get_user __atomic_and(volatile int *p, int *lock, int n); -extern struct __get_user __atomic_andn(volatile int *p, int *lock, int n); -extern struct __get_user __atomic_xor(volatile int *p, int *lock, int n); +extern struct __get_user __atomic_fetch_or(volatile int *p, int *lock, int n); +extern struct __get_user __atomic_fetch_and(volatile int *p, int *lock, int n); +extern struct __get_user __atomic_fetch_andn(volatile int *p, int *lock, int n); +extern struct __get_user __atomic_fetch_xor(volatile int *p, int *lock, int n); extern long long __atomic64_cmpxchg(volatile long long *p, int *lock, long long o, long long n); extern long long __atomic64_xchg(volatile long long *p, int *lock, long long n); @@ -259,9 +283,9 @@ extern long long __atomic64_xchg_add(volatile long long *p, int *lock, long long n); extern long long __atomic64_xchg_add_unless(volatile long long *p, int *lock, long long o, long long n); -extern long long __atomic64_and(volatile long long *p, int *lock, long long n); -extern long long __atomic64_or(volatile long long *p, int *lock, long long n); -extern long long __atomic64_xor(volatile long long *p, int *lock, long long n); +extern long long __atomic64_fetch_and(volatile long long *p, int *lock, long long n); +extern long long __atomic64_fetch_or(volatile long long *p, int *lock, long long n); +extern long long __atomic64_fetch_xor(volatile long long *p, int *lock, long long n); /* Return failure from the atomic wrappers. */ struct __get_user __atomic_bad_address(int __user *addr); diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h index b0531a623653..4cefa0c9fd81 100644 --- a/arch/tile/include/asm/atomic_64.h +++ b/arch/tile/include/asm/atomic_64.h @@ -32,11 +32,6 @@ * on any routine which updates memory and returns a value. */ -static inline void atomic_add(int i, atomic_t *v) -{ - __insn_fetchadd4((void *)&v->counter, i); -} - /* * Note a subtlety of the locking here. We are required to provide a * full memory barrier before and after the operation. However, we @@ -59,28 +54,39 @@ static inline int atomic_add_return(int i, atomic_t *v) return val; } -static inline int __atomic_add_unless(atomic_t *v, int a, int u) +#define ATOMIC_OPS(op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int val; \ + smp_mb(); \ + val = __insn_fetch##op##4((void *)&v->counter, i); \ + smp_mb(); \ + return val; \ +} \ +static inline void atomic_##op(int i, atomic_t *v) \ +{ \ + __insn_fetch##op##4((void *)&v->counter, i); \ +} + +ATOMIC_OPS(add) +ATOMIC_OPS(and) +ATOMIC_OPS(or) + +#undef ATOMIC_OPS + +static inline int atomic_fetch_xor(int i, atomic_t *v) { int guess, oldval = v->counter; + smp_mb(); do { - if (oldval == u) - break; guess = oldval; - oldval = cmpxchg(&v->counter, guess, guess + a); + __insn_mtspr(SPR_CMPEXCH_VALUE, guess); + oldval = __insn_cmpexch4(&v->counter, guess ^ i); } while (guess != oldval); + smp_mb(); return oldval; } -static inline void atomic_and(int i, atomic_t *v) -{ - __insn_fetchand4((void *)&v->counter, i); -} - -static inline void atomic_or(int i, atomic_t *v) -{ - __insn_fetchor4((void *)&v->counter, i); -} - static inline void atomic_xor(int i, atomic_t *v) { int guess, oldval = v->counter; @@ -91,6 +97,18 @@ static inline void atomic_xor(int i, atomic_t *v) } while (guess != oldval); } +static inline int __atomic_add_unless(atomic_t *v, int a, int u) +{ + int guess, oldval = v->counter; + do { + if (oldval == u) + break; + guess = oldval; + oldval = cmpxchg(&v->counter, guess, guess + a); + } while (guess != oldval); + return oldval; +} + /* Now the true 64-bit operations. */ #define ATOMIC64_INIT(i) { (i) } @@ -98,11 +116,6 @@ static inline void atomic_xor(int i, atomic_t *v) #define atomic64_read(v) READ_ONCE((v)->counter) #define atomic64_set(v, i) WRITE_ONCE((v)->counter, (i)) -static inline void atomic64_add(long i, atomic64_t *v) -{ - __insn_fetchadd((void *)&v->counter, i); -} - static inline long atomic64_add_return(long i, atomic64_t *v) { int val; @@ -112,26 +125,37 @@ static inline long atomic64_add_return(long i, atomic64_t *v) return val; } -static inline long atomic64_add_unless(atomic64_t *v, long a, long u) +#define ATOMIC64_OPS(op) \ +static inline long atomic64_fetch_##op(long i, atomic64_t *v) \ +{ \ + long val; \ + smp_mb(); \ + val = __insn_fetch##op((void *)&v->counter, i); \ + smp_mb(); \ + return val; \ +} \ +static inline void atomic64_##op(long i, atomic64_t *v) \ +{ \ + __insn_fetch##op((void *)&v->counter, i); \ +} + +ATOMIC64_OPS(add) +ATOMIC64_OPS(and) +ATOMIC64_OPS(or) + +#undef ATOMIC64_OPS + +static inline long atomic64_fetch_xor(long i, atomic64_t *v) { long guess, oldval = v->counter; + smp_mb(); do { - if (oldval == u) - break; guess = oldval; - oldval = cmpxchg(&v->counter, guess, guess + a); + __insn_mtspr(SPR_CMPEXCH_VALUE, guess); + oldval = __insn_cmpexch(&v->counter, guess ^ i); } while (guess != oldval); - return oldval != u; -} - -static inline void atomic64_and(long i, atomic64_t *v) -{ - __insn_fetchand((void *)&v->counter, i); -} - -static inline void atomic64_or(long i, atomic64_t *v) -{ - __insn_fetchor((void *)&v->counter, i); + smp_mb(); + return oldval; } static inline void atomic64_xor(long i, atomic64_t *v) @@ -144,7 +168,20 @@ static inline void atomic64_xor(long i, atomic64_t *v) } while (guess != oldval); } +static inline long atomic64_add_unless(atomic64_t *v, long a, long u) +{ + long guess, oldval = v->counter; + do { + if (oldval == u) + break; + guess = oldval; + oldval = cmpxchg(&v->counter, guess, guess + a); + } while (guess != oldval); + return oldval != u; +} + #define atomic64_sub_return(i, v) atomic64_add_return(-(i), (v)) +#define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v)) #define atomic64_sub(i, v) atomic64_add(-(i), (v)) #define atomic64_inc_return(v) atomic64_add_return(1, (v)) #define atomic64_dec_return(v) atomic64_sub_return(1, (v)) diff --git a/arch/tile/include/asm/bitops_32.h b/arch/tile/include/asm/bitops_32.h index bbf7b666f21d..d1406a95f6b7 100644 --- a/arch/tile/include/asm/bitops_32.h +++ b/arch/tile/include/asm/bitops_32.h @@ -19,9 +19,9 @@ #include /* Tile-specific routines to support . */ -unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask); -unsigned long _atomic_andn(volatile unsigned long *p, unsigned long mask); -unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask); +unsigned long _atomic_fetch_or(volatile unsigned long *p, unsigned long mask); +unsigned long _atomic_fetch_andn(volatile unsigned long *p, unsigned long mask); +unsigned long _atomic_fetch_xor(volatile unsigned long *p, unsigned long mask); /** * set_bit - Atomically set a bit in memory @@ -35,7 +35,7 @@ unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask); */ static inline void set_bit(unsigned nr, volatile unsigned long *addr) { - _atomic_or(addr + BIT_WORD(nr), BIT_MASK(nr)); + _atomic_fetch_or(addr + BIT_WORD(nr), BIT_MASK(nr)); } /** @@ -54,7 +54,7 @@ static inline void set_bit(unsigned nr, volatile unsigned long *addr) */ static inline void clear_bit(unsigned nr, volatile unsigned long *addr) { - _atomic_andn(addr + BIT_WORD(nr), BIT_MASK(nr)); + _atomic_fetch_andn(addr + BIT_WORD(nr), BIT_MASK(nr)); } /** @@ -69,7 +69,7 @@ static inline void clear_bit(unsigned nr, volatile unsigned long *addr) */ static inline void change_bit(unsigned nr, volatile unsigned long *addr) { - _atomic_xor(addr + BIT_WORD(nr), BIT_MASK(nr)); + _atomic_fetch_xor(addr + BIT_WORD(nr), BIT_MASK(nr)); } /** @@ -85,7 +85,7 @@ static inline int test_and_set_bit(unsigned nr, volatile unsigned long *addr) unsigned long mask = BIT_MASK(nr); addr += BIT_WORD(nr); smp_mb(); /* barrier for proper semantics */ - return (_atomic_or(addr, mask) & mask) != 0; + return (_atomic_fetch_or(addr, mask) & mask) != 0; } /** @@ -101,7 +101,7 @@ static inline int test_and_clear_bit(unsigned nr, volatile unsigned long *addr) unsigned long mask = BIT_MASK(nr); addr += BIT_WORD(nr); smp_mb(); /* barrier for proper semantics */ - return (_atomic_andn(addr, mask) & mask) != 0; + return (_atomic_fetch_andn(addr, mask) & mask) != 0; } /** @@ -118,7 +118,7 @@ static inline int test_and_change_bit(unsigned nr, unsigned long mask = BIT_MASK(nr); addr += BIT_WORD(nr); smp_mb(); /* barrier for proper semantics */ - return (_atomic_xor(addr, mask) & mask) != 0; + return (_atomic_fetch_xor(addr, mask) & mask) != 0; } #include diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c index 298df1e9912a..5b6bd932c9c7 100644 --- a/arch/tile/lib/atomic_32.c +++ b/arch/tile/lib/atomic_32.c @@ -88,29 +88,29 @@ int _atomic_cmpxchg(int *v, int o, int n) } EXPORT_SYMBOL(_atomic_cmpxchg); -unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask) +unsigned long _atomic_fetch_or(volatile unsigned long *p, unsigned long mask) { - return __atomic_or((int *)p, __atomic_setup(p), mask).val; + return __atomic_fetch_or((int *)p, __atomic_setup(p), mask).val; } -EXPORT_SYMBOL(_atomic_or); +EXPORT_SYMBOL(_atomic_fetch_or); -unsigned long _atomic_and(volatile unsigned long *p, unsigned long mask) +unsigned long _atomic_fetch_and(volatile unsigned long *p, unsigned long mask) { - return __atomic_and((int *)p, __atomic_setup(p), mask).val; + return __atomic_fetch_and((int *)p, __atomic_setup(p), mask).val; } -EXPORT_SYMBOL(_atomic_and); +EXPORT_SYMBOL(_atomic_fetch_and); -unsigned long _atomic_andn(volatile unsigned long *p, unsigned long mask) +unsigned long _atomic_fetch_andn(volatile unsigned long *p, unsigned long mask) { - return __atomic_andn((int *)p, __atomic_setup(p), mask).val; + return __atomic_fetch_andn((int *)p, __atomic_setup(p), mask).val; } -EXPORT_SYMBOL(_atomic_andn); +EXPORT_SYMBOL(_atomic_fetch_andn); -unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask) +unsigned long _atomic_fetch_xor(volatile unsigned long *p, unsigned long mask) { - return __atomic_xor((int *)p, __atomic_setup(p), mask).val; + return __atomic_fetch_xor((int *)p, __atomic_setup(p), mask).val; } -EXPORT_SYMBOL(_atomic_xor); +EXPORT_SYMBOL(_atomic_fetch_xor); long long _atomic64_xchg(long long *v, long long n) @@ -142,23 +142,23 @@ long long _atomic64_cmpxchg(long long *v, long long o, long long n) } EXPORT_SYMBOL(_atomic64_cmpxchg); -long long _atomic64_and(long long *v, long long n) +long long _atomic64_fetch_and(long long *v, long long n) { - return __atomic64_and(v, __atomic_setup(v), n); + return __atomic64_fetch_and(v, __atomic_setup(v), n); } -EXPORT_SYMBOL(_atomic64_and); +EXPORT_SYMBOL(_atomic64_fetch_and); -long long _atomic64_or(long long *v, long long n) +long long _atomic64_fetch_or(long long *v, long long n) { - return __atomic64_or(v, __atomic_setup(v), n); + return __atomic64_fetch_or(v, __atomic_setup(v), n); } -EXPORT_SYMBOL(_atomic64_or); +EXPORT_SYMBOL(_atomic64_fetch_or); -long long _atomic64_xor(long long *v, long long n) +long long _atomic64_fetch_xor(long long *v, long long n) { - return __atomic64_xor(v, __atomic_setup(v), n); + return __atomic64_fetch_xor(v, __atomic_setup(v), n); } -EXPORT_SYMBOL(_atomic64_xor); +EXPORT_SYMBOL(_atomic64_fetch_xor); /* * If any of the atomic or futex routines hit a bad address (not in diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S index f611265633d6..507abdd2bf9a 100644 --- a/arch/tile/lib/atomic_asm_32.S +++ b/arch/tile/lib/atomic_asm_32.S @@ -177,10 +177,10 @@ atomic_op _xchg, 32, "move r24, r2" atomic_op _xchg_add, 32, "add r24, r22, r2" atomic_op _xchg_add_unless, 32, \ "sne r26, r22, r2; { bbns r26, 3f; add r24, r22, r3 }" -atomic_op _or, 32, "or r24, r22, r2" -atomic_op _and, 32, "and r24, r22, r2" -atomic_op _andn, 32, "nor r2, r2, zero; and r24, r22, r2" -atomic_op _xor, 32, "xor r24, r22, r2" +atomic_op _fetch_or, 32, "or r24, r22, r2" +atomic_op _fetch_and, 32, "and r24, r22, r2" +atomic_op _fetch_andn, 32, "nor r2, r2, zero; and r24, r22, r2" +atomic_op _fetch_xor, 32, "xor r24, r22, r2" atomic_op 64_cmpxchg, 64, "{ seq r26, r22, r2; seq r27, r23, r3 }; \ { bbns r26, 3f; move r24, r4 }; { bbns r27, 3f; move r25, r5 }" @@ -192,9 +192,9 @@ atomic_op 64_xchg_add_unless, 64, \ { bbns r26, 3f; add r24, r22, r4 }; \ { bbns r27, 3f; add r25, r23, r5 }; \ slt_u r26, r24, r22; add r25, r25, r26" -atomic_op 64_or, 64, "{ or r24, r22, r2; or r25, r23, r3 }" -atomic_op 64_and, 64, "{ and r24, r22, r2; and r25, r23, r3 }" -atomic_op 64_xor, 64, "{ xor r24, r22, r2; xor r25, r23, r3 }" +atomic_op 64_fetch_or, 64, "{ or r24, r22, r2; or r25, r23, r3 }" +atomic_op 64_fetch_and, 64, "{ and r24, r22, r2; and r25, r23, r3 }" +atomic_op 64_fetch_xor, 64, "{ xor r24, r22, r2; xor r25, r23, r3 }" jrp lr /* happy backtracer */ -- cgit v1.2.3 From a8bcccaba162632c3963259b8a442c6b490f4c68 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:03 +0200 Subject: locking/atomic, arch/x86: Implement atomic{,64}_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/atomic.h | 37 ++++++++++++++++++++++++++++++++++--- arch/x86/include/asm/atomic64_32.h | 25 ++++++++++++++++++++++--- arch/x86/include/asm/atomic64_64.h | 35 ++++++++++++++++++++++++++++++++--- 3 files changed, 88 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 3e8674288198..73b8463b89e9 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -171,6 +171,16 @@ static __always_inline int atomic_sub_return(int i, atomic_t *v) #define atomic_inc_return(v) (atomic_add_return(1, v)) #define atomic_dec_return(v) (atomic_sub_return(1, v)) +static __always_inline int atomic_fetch_add(int i, atomic_t *v) +{ + return xadd(&v->counter, i); +} + +static __always_inline int atomic_fetch_sub(int i, atomic_t *v) +{ + return xadd(&v->counter, -i); +} + static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new) { return cmpxchg(&v->counter, old, new); @@ -190,10 +200,31 @@ static inline void atomic_##op(int i, atomic_t *v) \ : "memory"); \ } -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +#define ATOMIC_FETCH_OP(op, c_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int old, val = atomic_read(v); \ + for (;;) { \ + old = atomic_cmpxchg(v, val, val c_op i); \ + if (old == val) \ + break; \ + val = old; \ + } \ + return old; \ +} + +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op) \ + ATOMIC_FETCH_OP(op, c_op) + +#define atomic_fetch_or atomic_fetch_or + +ATOMIC_OPS(and, &) +ATOMIC_OPS(or , |) +ATOMIC_OPS(xor, ^) +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP /** diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index a984111135b1..71d7705fb303 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h @@ -320,10 +320,29 @@ static inline void atomic64_##op(long long i, atomic64_t *v) \ c = old; \ } -ATOMIC64_OP(and, &) -ATOMIC64_OP(or, |) -ATOMIC64_OP(xor, ^) +#define ATOMIC64_FETCH_OP(op, c_op) \ +static inline long long atomic64_fetch_##op(long long i, atomic64_t *v) \ +{ \ + long long old, c = 0; \ + while ((old = atomic64_cmpxchg(v, c, c c_op i)) != c) \ + c = old; \ + return old; \ +} + +ATOMIC64_FETCH_OP(add, +) + +#define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v)) + +#define ATOMIC64_OPS(op, c_op) \ + ATOMIC64_OP(op, c_op) \ + ATOMIC64_FETCH_OP(op, c_op) + +ATOMIC64_OPS(and, &) +ATOMIC64_OPS(or, |) +ATOMIC64_OPS(xor, ^) +#undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP #endif /* _ASM_X86_ATOMIC64_32_H */ diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 037351022f54..70eed0e14553 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -158,6 +158,16 @@ static inline long atomic64_sub_return(long i, atomic64_t *v) return atomic64_add_return(-i, v); } +static inline long atomic64_fetch_add(long i, atomic64_t *v) +{ + return xadd(&v->counter, i); +} + +static inline long atomic64_fetch_sub(long i, atomic64_t *v) +{ + return xadd(&v->counter, -i); +} + #define atomic64_inc_return(v) (atomic64_add_return(1, (v))) #define atomic64_dec_return(v) (atomic64_sub_return(1, (v))) @@ -229,10 +239,29 @@ static inline void atomic64_##op(long i, atomic64_t *v) \ : "memory"); \ } -ATOMIC64_OP(and) -ATOMIC64_OP(or) -ATOMIC64_OP(xor) +#define ATOMIC64_FETCH_OP(op, c_op) \ +static inline long atomic64_fetch_##op(long i, atomic64_t *v) \ +{ \ + long old, val = atomic64_read(v); \ + for (;;) { \ + old = atomic64_cmpxchg(v, val, val c_op i); \ + if (old == val) \ + break; \ + val = old; \ + } \ + return old; \ +} + +#define ATOMIC64_OPS(op, c_op) \ + ATOMIC64_OP(op) \ + ATOMIC64_FETCH_OP(op, c_op) + +ATOMIC64_OPS(and, &) +ATOMIC64_OPS(or, |) +ATOMIC64_OPS(xor, ^) +#undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP #endif /* _ASM_X86_ATOMIC64_64_H */ -- cgit v1.2.3 From 6dc25876cdb17fd3906504dcabb9e537f8433000 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:03 +0200 Subject: locking/atomic, arch/xtensa: Implement atomic_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Chris Zankel Cc: Linus Torvalds Cc: Max Filippov Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-xtensa@linux-xtensa.org Signed-off-by: Ingo Molnar --- arch/xtensa/include/asm/atomic.h | 54 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h index fd8017ce298a..d95a8aa1a6d3 100644 --- a/arch/xtensa/include/asm/atomic.h +++ b/arch/xtensa/include/asm/atomic.h @@ -98,6 +98,26 @@ static inline int atomic_##op##_return(int i, atomic_t * v) \ return result; \ } +#define ATOMIC_FETCH_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t * v) \ +{ \ + unsigned long tmp; \ + int result; \ + \ + __asm__ __volatile__( \ + "1: l32i %1, %3, 0\n" \ + " wsr %1, scompare1\n" \ + " " #op " %0, %1, %2\n" \ + " s32c1i %0, %3, 0\n" \ + " bne %0, %1, 1b\n" \ + : "=&a" (result), "=&a" (tmp) \ + : "a" (i), "a" (v) \ + : "memory" \ + ); \ + \ + return result; \ +} + #else /* XCHAL_HAVE_S32C1I */ #define ATOMIC_OP(op) \ @@ -138,18 +158,44 @@ static inline int atomic_##op##_return(int i, atomic_t * v) \ return vval; \ } +#define ATOMIC_FETCH_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t * v) \ +{ \ + unsigned int tmp, vval; \ + \ + __asm__ __volatile__( \ + " rsil a15,"__stringify(TOPLEVEL)"\n" \ + " l32i %0, %3, 0\n" \ + " " #op " %1, %0, %2\n" \ + " s32i %1, %3, 0\n" \ + " wsr a15, ps\n" \ + " rsync\n" \ + : "=&a" (vval), "=&a" (tmp) \ + : "a" (i), "a" (v) \ + : "a15", "memory" \ + ); \ + \ + return vval; \ +} + #endif /* XCHAL_HAVE_S32C1I */ -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +#define atomic_fetch_or atomic_fetch_or + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP -- cgit v1.2.3 From b53d6bedbe781974097fd8c38263f6cc78ff9ea7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 00:58:25 +0200 Subject: locking/atomic: Remove linux/atomic.h:atomic_fetch_or() Since all architectures have this implemented now natively, remove this dead code. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/alpha/include/asm/atomic.h | 2 -- arch/arc/include/asm/atomic.h | 2 -- arch/arm/include/asm/atomic.h | 2 -- arch/arm64/include/asm/atomic.h | 2 -- arch/avr32/include/asm/atomic.h | 2 -- arch/frv/include/asm/atomic.h | 2 -- arch/h8300/include/asm/atomic.h | 2 -- arch/hexagon/include/asm/atomic.h | 2 -- arch/m32r/include/asm/atomic.h | 2 -- arch/m68k/include/asm/atomic.h | 2 -- arch/metag/include/asm/atomic.h | 2 -- arch/mips/include/asm/atomic.h | 2 -- arch/mn10300/include/asm/atomic.h | 2 -- arch/parisc/include/asm/atomic.h | 2 -- arch/s390/include/asm/atomic.h | 2 -- arch/sh/include/asm/atomic.h | 2 -- arch/sparc/include/asm/atomic.h | 1 - arch/sparc/include/asm/atomic_32.h | 2 -- arch/tile/include/asm/atomic.h | 2 -- arch/x86/include/asm/atomic.h | 2 -- arch/xtensa/include/asm/atomic.h | 2 -- include/asm-generic/atomic.h | 2 -- include/linux/atomic.h | 21 --------------------- 23 files changed, 64 deletions(-) (limited to 'arch') diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h index 8243f17999e3..5377ca8bb503 100644 --- a/arch/alpha/include/asm/atomic.h +++ b/arch/alpha/include/asm/atomic.h @@ -153,8 +153,6 @@ ATOMIC_OPS(sub) #define atomic_andnot atomic_andnot #define atomic64_andnot atomic64_andnot -#define atomic_fetch_or atomic_fetch_or - #undef ATOMIC_OPS #define ATOMIC_OPS(op, asm) \ ATOMIC_OP(op, asm) \ diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index c066a21caaaf..bd9c51cb2bfd 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -189,8 +189,6 @@ ATOMIC_OPS(sub, -=, sub) #define atomic_andnot atomic_andnot -#define atomic_fetch_or atomic_fetch_or - #undef ATOMIC_OPS #define ATOMIC_OPS(op, c_op, asm_op) \ ATOMIC_OP(op, c_op, asm_op) \ diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index 0feb110ec542..66d0e215a773 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h @@ -201,8 +201,6 @@ static inline int atomic_fetch_##op(int i, atomic_t *v) \ return val; \ } -#define atomic_fetch_or atomic_fetch_or - static inline int atomic_cmpxchg(atomic_t *v, int old, int new) { int ret; diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index 3128c3d7c1ff..c0235e0ff849 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -128,8 +128,6 @@ #define __atomic_add_unless(v, a, u) ___atomic_add_unless(v, a, u,) #define atomic_andnot atomic_andnot -#define atomic_fetch_or atomic_fetch_or - /* * 64-bit atomic operations. */ diff --git a/arch/avr32/include/asm/atomic.h b/arch/avr32/include/asm/atomic.h index b8681fd495ef..3d5ce38a6f0b 100644 --- a/arch/avr32/include/asm/atomic.h +++ b/arch/avr32/include/asm/atomic.h @@ -66,8 +66,6 @@ ATOMIC_OP_RETURN(add, add, r) ATOMIC_FETCH_OP (sub, sub, rKs21) ATOMIC_FETCH_OP (add, add, r) -#define atomic_fetch_or atomic_fetch_or - #define ATOMIC_OPS(op, asm_op) \ ATOMIC_OP_RETURN(op, asm_op, r) \ static inline void atomic_##op(int i, atomic_t *v) \ diff --git a/arch/frv/include/asm/atomic.h b/arch/frv/include/asm/atomic.h index e3e06da0cd59..1c2a5e264fc7 100644 --- a/arch/frv/include/asm/atomic.h +++ b/arch/frv/include/asm/atomic.h @@ -74,8 +74,6 @@ static inline void atomic_dec(atomic_t *v) #define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0) #define atomic_inc_and_test(v) (atomic_add_return(1, (v)) == 0) -#define atomic_fetch_or atomic_fetch_or - /* * 64-bit atomic ops */ diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h index 0961b618bdde..349a47a918db 100644 --- a/arch/h8300/include/asm/atomic.h +++ b/arch/h8300/include/asm/atomic.h @@ -54,8 +54,6 @@ static inline void atomic_##op(int i, atomic_t *v) \ ATOMIC_OP_RETURN(add, +=) ATOMIC_OP_RETURN(sub, -=) -#define atomic_fetch_or atomic_fetch_or - #define ATOMIC_OPS(op, c_op) \ ATOMIC_OP(op, c_op) \ ATOMIC_FETCH_OP(op, c_op) diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h index 07dbb3332b4a..a62ba368b27d 100644 --- a/arch/hexagon/include/asm/atomic.h +++ b/arch/hexagon/include/asm/atomic.h @@ -152,8 +152,6 @@ ATOMIC_OPS(sub) #undef ATOMIC_OPS #define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) -#define atomic_fetch_or atomic_fetch_or - ATOMIC_OPS(and) ATOMIC_OPS(or) ATOMIC_OPS(xor) diff --git a/arch/m32r/include/asm/atomic.h b/arch/m32r/include/asm/atomic.h index 8ba8a0ab5d5d..640cc1c7099f 100644 --- a/arch/m32r/include/asm/atomic.h +++ b/arch/m32r/include/asm/atomic.h @@ -121,8 +121,6 @@ ATOMIC_OPS(sub) #undef ATOMIC_OPS #define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) -#define atomic_fetch_or atomic_fetch_or - ATOMIC_OPS(and) ATOMIC_OPS(or) ATOMIC_OPS(xor) diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h index 5cf9b3b1b6ac..3e03de7ae33b 100644 --- a/arch/m68k/include/asm/atomic.h +++ b/arch/m68k/include/asm/atomic.h @@ -119,8 +119,6 @@ ATOMIC_OPS(sub, -=, sub) ATOMIC_OP(op, c_op, asm_op) \ ATOMIC_FETCH_OP(op, c_op, asm_op) -#define atomic_fetch_or atomic_fetch_or - ATOMIC_OPS(and, &=, and) ATOMIC_OPS(or, |=, or) ATOMIC_OPS(xor, ^=, eor) diff --git a/arch/metag/include/asm/atomic.h b/arch/metag/include/asm/atomic.h index 6ca210de8a7d..470e365f04ea 100644 --- a/arch/metag/include/asm/atomic.h +++ b/arch/metag/include/asm/atomic.h @@ -17,8 +17,6 @@ #include #endif -#define atomic_fetch_or atomic_fetch_or - #define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0) #define atomic_dec_return(v) atomic_sub_return(1, (v)) diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index 431079f8e483..387ce288334e 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -194,8 +194,6 @@ ATOMIC_OPS(sub, -=, subu) ATOMIC_OP(op, c_op, asm_op) \ ATOMIC_FETCH_OP(op, c_op, asm_op) -#define atomic_fetch_or atomic_fetch_or - ATOMIC_OPS(and, &=, and) ATOMIC_OPS(or, |=, or) ATOMIC_OPS(xor, ^=, xor) diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h index 3580f789f3a6..36389efd45e8 100644 --- a/arch/mn10300/include/asm/atomic.h +++ b/arch/mn10300/include/asm/atomic.h @@ -113,8 +113,6 @@ ATOMIC_OPS(sub) #undef ATOMIC_OPS #define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) -#define atomic_fetch_or atomic_fetch_or - ATOMIC_OPS(and) ATOMIC_OPS(or) ATOMIC_OPS(xor) diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index 29df1f871910..5394b9c5f914 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h @@ -148,8 +148,6 @@ ATOMIC_OPS(sub, -=) ATOMIC_OP(op, c_op) \ ATOMIC_FETCH_OP(op, c_op) -#define atomic_fetch_or atomic_fetch_or - ATOMIC_OPS(and, &=) ATOMIC_OPS(or, |=) ATOMIC_OPS(xor, ^=) diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index 2324e759b544..d28cc2f5b7b2 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -135,8 +135,6 @@ static inline int atomic_fetch_##op(int i, atomic_t *v) \ return __ATOMIC_LOOP(v, i, __ATOMIC_##OP, __ATOMIC_BARRIER); \ } -#define atomic_fetch_or atomic_fetch_or - ATOMIC_OPS(and, AND) ATOMIC_OPS(or, OR) ATOMIC_OPS(xor, XOR) diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h index d93ed7ce1b2f..c399e1c55685 100644 --- a/arch/sh/include/asm/atomic.h +++ b/arch/sh/include/asm/atomic.h @@ -25,8 +25,6 @@ #include #endif -#define atomic_fetch_or atomic_fetch_or - #define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0) #define atomic_dec_return(v) atomic_sub_return(1, (v)) #define atomic_inc_return(v) atomic_add_return(1, (v)) diff --git a/arch/sparc/include/asm/atomic.h b/arch/sparc/include/asm/atomic.h index 1f741bcc73b7..8ff83d8cc33f 100644 --- a/arch/sparc/include/asm/atomic.h +++ b/arch/sparc/include/asm/atomic.h @@ -5,5 +5,4 @@ #else #include #endif -#define atomic_fetch_or atomic_fetch_or #endif diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h index 5cfb20a599d9..ee3f11c43cda 100644 --- a/arch/sparc/include/asm/atomic_32.h +++ b/arch/sparc/include/asm/atomic_32.h @@ -36,8 +36,6 @@ void atomic_set(atomic_t *, int); #define atomic_inc(v) ((void)atomic_add_return( 1, (v))) #define atomic_dec(v) ((void)atomic_add_return( -1, (v))) -#define atomic_fetch_or atomic_fetch_or - #define atomic_and(i, v) ((void)atomic_fetch_and((i), (v))) #define atomic_or(i, v) ((void)atomic_fetch_or((i), (v))) #define atomic_xor(i, v) ((void)atomic_fetch_xor((i), (v))) diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h index 9807030557c4..8dda3c8ff5ab 100644 --- a/arch/tile/include/asm/atomic.h +++ b/arch/tile/include/asm/atomic.h @@ -48,8 +48,6 @@ static inline int atomic_read(const atomic_t *v) #define atomic_fetch_sub(i, v) atomic_fetch_add(-(int)(i), (v)) -#define atomic_fetch_or atomic_fetch_or - /** * atomic_sub - subtract integer from atomic variable * @i: integer value to subtract diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 73b8463b89e9..a58b99811105 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -217,8 +217,6 @@ static inline int atomic_fetch_##op(int i, atomic_t *v) \ ATOMIC_OP(op) \ ATOMIC_FETCH_OP(op, c_op) -#define atomic_fetch_or atomic_fetch_or - ATOMIC_OPS(and, &) ATOMIC_OPS(or , |) ATOMIC_OPS(xor, ^) diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h index d95a8aa1a6d3..e7a23f2a519a 100644 --- a/arch/xtensa/include/asm/atomic.h +++ b/arch/xtensa/include/asm/atomic.h @@ -188,8 +188,6 @@ ATOMIC_OPS(sub) #undef ATOMIC_OPS #define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) -#define atomic_fetch_or atomic_fetch_or - ATOMIC_OPS(and) ATOMIC_OPS(or) ATOMIC_OPS(xor) diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index a2304ccf4ed0..9ed8b987185b 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -137,8 +137,6 @@ ATOMIC_FETCH_OP(and, &) #endif #ifndef atomic_fetch_or -#define atomic_fetch_or atomic_fetch_or - ATOMIC_FETCH_OP(or, |) #endif diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 2e6c013ac5a4..0b3802d33125 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -573,27 +573,6 @@ static inline int atomic_dec_if_positive(atomic_t *v) } #endif -/** - * atomic_fetch_or - perform *p |= mask and return old value of *p - * @mask: mask to OR on the atomic_t - * @p: pointer to atomic_t - */ -#ifndef atomic_fetch_or -static inline int atomic_fetch_or(int mask, atomic_t *p) -{ - int old, val = atomic_read(p); - - for (;;) { - old = atomic_cmpxchg(p, val, val | mask); - if (old == val) - break; - val = old; - } - - return old; -} -#endif - #ifdef CONFIG_GENERIC_ATOMIC64 #include #endif -- cgit v1.2.3 From fe14d2f12d5e641f114e27c2ea1fb85843c58967 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:09:20 +0200 Subject: locking/atomic, arch/alpha: Convert to _relaxed atomics Generic code will construct {,_acquire,_release} versions by adding the required smp_mb__{before,after}_atomic() calls. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Ivan Kokshaysky Cc: Linus Torvalds Cc: Matt Turner Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Richard Henderson Cc: Thomas Gleixner Cc: linux-alpha@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/alpha/include/asm/atomic.h | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h index 5377ca8bb503..498933a7df97 100644 --- a/arch/alpha/include/asm/atomic.h +++ b/arch/alpha/include/asm/atomic.h @@ -46,10 +46,9 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \ } \ #define ATOMIC_OP_RETURN(op, asm_op) \ -static inline int atomic_##op##_return(int i, atomic_t *v) \ +static inline int atomic_##op##_return_relaxed(int i, atomic_t *v) \ { \ long temp, result; \ - smp_mb(); \ __asm__ __volatile__( \ "1: ldl_l %0,%1\n" \ " " #asm_op " %0,%3,%2\n" \ @@ -61,15 +60,13 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ ".previous" \ :"=&r" (temp), "=m" (v->counter), "=&r" (result) \ :"Ir" (i), "m" (v->counter) : "memory"); \ - smp_mb(); \ return result; \ } #define ATOMIC_FETCH_OP(op, asm_op) \ -static inline int atomic_fetch_##op(int i, atomic_t *v) \ +static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v) \ { \ long temp, result; \ - smp_mb(); \ __asm__ __volatile__( \ "1: ldl_l %2,%1\n" \ " " #asm_op " %2,%3,%0\n" \ @@ -80,7 +77,6 @@ static inline int atomic_fetch_##op(int i, atomic_t *v) \ ".previous" \ :"=&r" (temp), "=m" (v->counter), "=&r" (result) \ :"Ir" (i), "m" (v->counter) : "memory"); \ - smp_mb(); \ return result; \ } @@ -101,10 +97,9 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \ } \ #define ATOMIC64_OP_RETURN(op, asm_op) \ -static __inline__ long atomic64_##op##_return(long i, atomic64_t * v) \ +static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \ { \ long temp, result; \ - smp_mb(); \ __asm__ __volatile__( \ "1: ldq_l %0,%1\n" \ " " #asm_op " %0,%3,%2\n" \ @@ -116,15 +111,13 @@ static __inline__ long atomic64_##op##_return(long i, atomic64_t * v) \ ".previous" \ :"=&r" (temp), "=m" (v->counter), "=&r" (result) \ :"Ir" (i), "m" (v->counter) : "memory"); \ - smp_mb(); \ return result; \ } #define ATOMIC64_FETCH_OP(op, asm_op) \ -static __inline__ long atomic64_fetch_##op(long i, atomic64_t * v) \ +static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \ { \ long temp, result; \ - smp_mb(); \ __asm__ __volatile__( \ "1: ldq_l %2,%1\n" \ " " #asm_op " %2,%3,%0\n" \ @@ -135,7 +128,6 @@ static __inline__ long atomic64_fetch_##op(long i, atomic64_t * v) \ ".previous" \ :"=&r" (temp), "=m" (v->counter), "=&r" (result) \ :"Ir" (i), "m" (v->counter) : "memory"); \ - smp_mb(); \ return result; \ } @@ -150,6 +142,16 @@ static __inline__ long atomic64_fetch_##op(long i, atomic64_t * v) \ ATOMIC_OPS(add) ATOMIC_OPS(sub) +#define atomic_add_return_relaxed atomic_add_return_relaxed +#define atomic_sub_return_relaxed atomic_sub_return_relaxed +#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed +#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed + +#define atomic64_add_return_relaxed atomic64_add_return_relaxed +#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed +#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed +#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed + #define atomic_andnot atomic_andnot #define atomic64_andnot atomic64_andnot @@ -165,6 +167,16 @@ ATOMIC_OPS(andnot, bic) ATOMIC_OPS(or, bis) ATOMIC_OPS(xor, xor) +#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed +#define atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed +#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed +#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed + +#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed +#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed +#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed +#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed + #undef ATOMIC_OPS #undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP_RETURN -- cgit v1.2.3 From 4ec45856b698c37e73d973fb4b1a094dfb9d5732 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:15:25 +0200 Subject: locking/atomic, arch/mips: Convert to _relaxed atomics Generic code will construct {,_acquire,_release} versions by adding the required smp_mb__{before,after}_atomic() calls. XXX if/when MIPS will start using their new SYNCxx instructions they can provide custom __atomic_op_{acquire,release}() macros as per the powerpc example. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-mips@linux-mips.org Signed-off-by: Ingo Molnar --- arch/mips/include/asm/atomic.h | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index 387ce288334e..0ab176bdb8e8 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -79,12 +79,10 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \ } #define ATOMIC_OP_RETURN(op, c_op, asm_op) \ -static __inline__ int atomic_##op##_return(int i, atomic_t * v) \ +static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v) \ { \ int result; \ \ - smp_mb__before_llsc(); \ - \ if (kernel_uses_llsc && R10000_LLSC_WAR) { \ int temp; \ \ @@ -125,18 +123,14 @@ static __inline__ int atomic_##op##_return(int i, atomic_t * v) \ raw_local_irq_restore(flags); \ } \ \ - smp_llsc_mb(); \ - \ return result; \ } #define ATOMIC_FETCH_OP(op, c_op, asm_op) \ -static __inline__ int atomic_fetch_##op(int i, atomic_t * v) \ +static __inline__ int atomic_fetch_##op##_relaxed(int i, atomic_t * v) \ { \ int result; \ \ - smp_mb__before_llsc(); \ - \ if (kernel_uses_llsc && R10000_LLSC_WAR) { \ int temp; \ \ @@ -176,8 +170,6 @@ static __inline__ int atomic_fetch_##op(int i, atomic_t * v) \ raw_local_irq_restore(flags); \ } \ \ - smp_llsc_mb(); \ - \ return result; \ } @@ -189,6 +181,11 @@ static __inline__ int atomic_fetch_##op(int i, atomic_t * v) \ ATOMIC_OPS(add, +=, addu) ATOMIC_OPS(sub, -=, subu) +#define atomic_add_return_relaxed atomic_add_return_relaxed +#define atomic_sub_return_relaxed atomic_sub_return_relaxed +#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed +#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed + #undef ATOMIC_OPS #define ATOMIC_OPS(op, c_op, asm_op) \ ATOMIC_OP(op, c_op, asm_op) \ @@ -198,6 +195,10 @@ ATOMIC_OPS(and, &=, and) ATOMIC_OPS(or, |=, or) ATOMIC_OPS(xor, ^=, xor) +#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed +#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed +#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed + #undef ATOMIC_OPS #undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN @@ -420,12 +421,10 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \ } #define ATOMIC64_OP_RETURN(op, c_op, asm_op) \ -static __inline__ long atomic64_##op##_return(long i, atomic64_t * v) \ +static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \ { \ long result; \ \ - smp_mb__before_llsc(); \ - \ if (kernel_uses_llsc && R10000_LLSC_WAR) { \ long temp; \ \ @@ -467,18 +466,14 @@ static __inline__ long atomic64_##op##_return(long i, atomic64_t * v) \ raw_local_irq_restore(flags); \ } \ \ - smp_llsc_mb(); \ - \ return result; \ } #define ATOMIC64_FETCH_OP(op, c_op, asm_op) \ -static __inline__ long atomic64_fetch_##op(long i, atomic64_t * v) \ +static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \ { \ long result; \ \ - smp_mb__before_llsc(); \ - \ if (kernel_uses_llsc && R10000_LLSC_WAR) { \ long temp; \ \ @@ -519,8 +514,6 @@ static __inline__ long atomic64_fetch_##op(long i, atomic64_t * v) \ raw_local_irq_restore(flags); \ } \ \ - smp_llsc_mb(); \ - \ return result; \ } @@ -532,6 +525,11 @@ static __inline__ long atomic64_fetch_##op(long i, atomic64_t * v) \ ATOMIC64_OPS(add, +=, daddu) ATOMIC64_OPS(sub, -=, dsubu) +#define atomic64_add_return_relaxed atomic64_add_return_relaxed +#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed +#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed +#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed + #undef ATOMIC64_OPS #define ATOMIC64_OPS(op, c_op, asm_op) \ ATOMIC64_OP(op, c_op, asm_op) \ @@ -541,6 +539,10 @@ ATOMIC64_OPS(and, &=, and) ATOMIC64_OPS(or, |=, or) ATOMIC64_OPS(xor, ^=, xor) +#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed +#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed +#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed + #undef ATOMIC64_OPS #undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP_RETURN -- cgit v1.2.3 From c5379ba8fccd99d5f99632c789f0393d84a57805 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Thu, 16 Jun 2016 15:42:25 +0200 Subject: ARM: mvebu: fix HW I/O coherency related deadlocks Until now, our understanding for HW I/O coherency to work on the Cortex-A9 based Marvell SoC was that only the PCIe regions should be mapped strongly-ordered. However, we were still encountering some deadlocks, especially when testing the CESA crypto engine. After checking with the HW designers, it was concluded that all the MMIO registers should be mapped as strongly ordered for the HW I/O coherency mechanism to work properly. This fixes some easy to reproduce deadlocks with the CESA crypto engine driver (dmcrypt on a sufficiently large disk partition). Tested-by: Terry Stockert Tested-by: Romain Perier Cc: Terry Stockert Cc: Romain Perier Cc: Signed-off-by: Thomas Petazzoni Signed-off-by: Gregory CLEMENT --- arch/arm/mach-mvebu/coherency.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c index 7e989d61159c..474abff7e855 100644 --- a/arch/arm/mach-mvebu/coherency.c +++ b/arch/arm/mach-mvebu/coherency.c @@ -162,22 +162,16 @@ exit: } /* - * This ioremap hook is used on Armada 375/38x to ensure that PCIe - * memory areas are mapped as MT_UNCACHED instead of MT_DEVICE. This - * is needed as a workaround for a deadlock issue between the PCIe - * interface and the cache controller. + * This ioremap hook is used on Armada 375/38x to ensure that all MMIO + * areas are mapped as MT_UNCACHED instead of MT_DEVICE. This is + * needed for the HW I/O coherency mechanism to work properly without + * deadlock. */ static void __iomem * -armada_pcie_wa_ioremap_caller(phys_addr_t phys_addr, size_t size, - unsigned int mtype, void *caller) +armada_wa_ioremap_caller(phys_addr_t phys_addr, size_t size, + unsigned int mtype, void *caller) { - struct resource pcie_mem; - - mvebu_mbus_get_pcie_mem_aperture(&pcie_mem); - - if (pcie_mem.start <= phys_addr && (phys_addr + size) <= pcie_mem.end) - mtype = MT_UNCACHED; - + mtype = MT_UNCACHED; return __arm_ioremap_caller(phys_addr, size, mtype, caller); } @@ -186,7 +180,7 @@ static void __init armada_375_380_coherency_init(struct device_node *np) struct device_node *cache_dn; coherency_cpu_base = of_iomap(np, 0); - arch_ioremap_caller = armada_pcie_wa_ioremap_caller; + arch_ioremap_caller = armada_wa_ioremap_caller; /* * We should switch the PL310 to I/O coherency mode only if -- cgit v1.2.3 From 6a02734d420fca778554878d03017017537d92e1 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Thu, 16 Jun 2016 15:42:26 +0200 Subject: ARM: mvebu: map PCI I/O regions strongly ordered In order for HW I/O coherency to work on Cortex-A9 based Marvell SoCs, all MMIO registers must be mapped strongly ordered. In commit 1c8c3cf0b5239 ("ARM: 8060/1: mm: allow sub-architectures to override PCI I/O memory type") we implemented a new function, pci_ioremap_set_mem_type(), that allow sub-architecture code to override the memory type used to map PCI I/O regions. In the discussion around this patch series [1], Arnd Bergmann made the comment that maybe all PCI I/O regions should be mapped strongly-ordered, which would have made our proposal to add pci_ioremap_set_mem_type() irrelevant. So, we submitted a patch [2] that did what Arnd suggested. However, Russell in the end merged our initial proposal to add pci_ioremap_set_mem_type(), but it was never used anywhere. Further discussion with Arnd and other folks on IRC lead to the conclusion that in fact using strongly-ordered for all platforms was maybe not desirable, and therefore, using pci_ioremap_set_mem_type() was the most appropriate solution. As a consequence, this commit finally adds the pci_ioremap_set_mem_type() call in the mach-mvebu platform code, which was originally part of our initial patch series [3] and is necessary for the whole mechanism to work. [1] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-May/256565.html [2] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-May/256755.html [3] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-May/256563.html Signed-off-by: Thomas Petazzoni Signed-off-by: Gregory CLEMENT --- arch/arm/mach-mvebu/coherency.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c index 474abff7e855..e80f0dde2189 100644 --- a/arch/arm/mach-mvebu/coherency.c +++ b/arch/arm/mach-mvebu/coherency.c @@ -181,6 +181,7 @@ static void __init armada_375_380_coherency_init(struct device_node *np) coherency_cpu_base = of_iomap(np, 0); arch_ioremap_caller = armada_wa_ioremap_caller; + pci_ioremap_set_mem_type(MT_UNCACHED); /* * We should switch the PL310 to I/O coherency mode only if -- cgit v1.2.3 From 929e604efa3dc0522214e0dc18984be23993e9f0 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Thu, 16 Jun 2016 15:42:27 +0200 Subject: ARM: dts: armada-38x: fix MBUS_ID for crypto SRAM on Armada 385 Linksys When the support for the Marvell crypto engine was added in the Device Tree of the various Armada 385 Device Tree files in commit d716f2e837ac6 ("ARM: mvebu: define crypto SRAM ranges for all armada-38x boards"), a typo was made in the MBus window attributes for the Armada 385 Linksys board: 0x09/0x05 are used instead of 0x19/0x15. This commit fixes this typo, which makes the CESA engines operational on Armada 385 Linksys boards. Reported-by: Terry Stockert Cc: Terry Stockert Cc: Imre Kaloz Cc: Boris Brezillon Cc: Fixes: d716f2e837ac6 ("ARM: mvebu: define crypto SRAM ranges for all armada-38x boards") Signed-off-by: Thomas Petazzoni Signed-off-by: Gregory CLEMENT --- arch/arm/boot/dts/armada-385-linksys.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/armada-385-linksys.dtsi b/arch/arm/boot/dts/armada-385-linksys.dtsi index 8450944b28e6..22f7a13e20b4 100644 --- a/arch/arm/boot/dts/armada-385-linksys.dtsi +++ b/arch/arm/boot/dts/armada-385-linksys.dtsi @@ -58,8 +58,8 @@ soc { ranges = ; + MBUS_ID(0x09, 0x19) 0 0xf1100000 0x10000 + MBUS_ID(0x09, 0x15) 0 0xf1110000 0x10000>; internal-regs { -- cgit v1.2.3 From 30ce0350381351646ef86b64e6d3840b3869833b Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 17 Jun 2016 16:00:21 -0600 Subject: x86/PCI/VMD: Use untracked irq handler There is no way to know which device in a VMD triggered an interrupt without invoking every registered driver's actions. This uses the untracked irq handler so that a less used device does not trigger spurious interrupt. We have been previously recommending users to enable "noirqdebug", but do not want to force a system setting just to keep this domain functional. Signed-off-by: Keith Busch Acked-by: Bjorn Helgaas Cc: linux-pci@vger.kernel.org Cc: Jon Derrick Link: http://lkml.kernel.org/r/1466200821-29159-2-git-send-email-keith.busch@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/pci/vmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/pci/vmd.c b/arch/x86/pci/vmd.c index 7792aba266df..613cac7395c4 100644 --- a/arch/x86/pci/vmd.c +++ b/arch/x86/pci/vmd.c @@ -195,7 +195,7 @@ static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info, vmdirq->virq = virq; irq_domain_set_info(domain, virq, vmdirq->irq->vmd_vector, info->chip, - vmdirq, handle_simple_irq, vmd, NULL); + vmdirq, handle_untracked_irq, vmd, NULL); return 0; } -- cgit v1.2.3 From a1141e0b5ca6ee3e5e35d5f1a310a5ecb9c96ce5 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 20 May 2016 10:47:05 -0700 Subject: x86/fpu/xstate: Define and use 'fpu_user_xstate_size' The kernel xstate area can be in standard or compacted format; it is always in standard format for user mode. When XSAVES is enabled, the kernel uses the compacted format and it is necessary to use a separate fpu_user_xstate_size for signal/ptrace frames. Signed-off-by: Fenghua Yu [ Rebased the patch and cleaned up the naming. ] Signed-off-by: Yu-cheng Yu Reviewed-by: Dave Hansen Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Ravi V. Shankar Cc: Sai Praneeth Prakhya Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/8756ec34dabddfc727cda5743195eb81e8caf91c.1463760376.git.yu-cheng.yu@intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/xstate.h | 1 - arch/x86/include/asm/processor.h | 1 + arch/x86/kernel/fpu/init.c | 5 ++- arch/x86/kernel/fpu/signal.c | 27 ++++++++++---- arch/x86/kernel/fpu/xstate.c | 76 ++++++++++++++++++++++++--------------- 5 files changed, 73 insertions(+), 37 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 38951b0fcc5a..16df2c44ac66 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -39,7 +39,6 @@ #define REX_PREFIX #endif -extern unsigned int xstate_size; extern u64 xfeatures_mask; extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 62c6cc3cc5d3..0a16a16284f5 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -368,6 +368,7 @@ DECLARE_PER_CPU(struct irq_stack *, softirq_stack); #endif /* X86_64 */ extern unsigned int xstate_size; +extern unsigned int fpu_user_xstate_size; struct perf_event; diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index aacfd7a82cec..5b1928c0aad4 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -195,7 +195,7 @@ static void __init fpu__init_task_struct_size(void) } /* - * Set up the xstate_size based on the legacy FPU context size. + * Set up the user and kernel xstate_size based on the legacy FPU context size. * * We set this up first, and later it will be overwritten by * fpu__init_system_xstate() if the CPU knows about xstates. @@ -226,6 +226,9 @@ static void __init fpu__init_system_xstate_size_legacy(void) else xstate_size = sizeof(struct fregs_state); } + + fpu_user_xstate_size = xstate_size; + /* * Quirk: we don't yet handle the XSAVES* instructions * correctly, as we don't correctly convert between diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index c6f2a3cee2c2..0d29d4de4209 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -32,7 +32,7 @@ static inline int check_for_xstate(struct fxregs_state __user *buf, /* Check for the first magic field and other error scenarios. */ if (fx_sw->magic1 != FP_XSTATE_MAGIC1 || fx_sw->xstate_size < min_xstate_size || - fx_sw->xstate_size > xstate_size || + fx_sw->xstate_size > fpu_user_xstate_size || fx_sw->xstate_size > fx_sw->extended_size) return -1; @@ -89,7 +89,8 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame) if (!use_xsave()) return err; - err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size)); + err |= __put_user(FP_XSTATE_MAGIC2, + (__u32 *)(buf + fpu_user_xstate_size)); /* * Read the xfeatures which we copied (directly from the cpu or @@ -126,7 +127,7 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf) else err = copy_fregs_to_user((struct fregs_state __user *) buf); - if (unlikely(err) && __clear_user(buf, xstate_size)) + if (unlikely(err) && __clear_user(buf, fpu_user_xstate_size)) err = -EFAULT; return err; } @@ -176,8 +177,19 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) if (ia32_fxstate) copy_fxregs_to_kernel(&tsk->thread.fpu); } else { + /* + * It is a *bug* if kernel uses compacted-format for xsave + * area and we copy it out directly to a signal frame. It + * should have been handled above by saving the registers + * directly. + */ + if (boot_cpu_has(X86_FEATURE_XSAVES)) { + WARN_ONCE(1, "x86/fpu: saving compacted-format xsave area to a signal frame!\n"); + return -1; + } + fpstate_sanitize_xstate(&tsk->thread.fpu); - if (__copy_to_user(buf_fx, xsave, xstate_size)) + if (__copy_to_user(buf_fx, xsave, fpu_user_xstate_size)) return -1; } @@ -344,7 +356,8 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) static inline int xstate_sigframe_size(void) { - return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size; + return use_xsave() ? fpu_user_xstate_size + FP_XSTATE_MAGIC2_SIZE : + fpu_user_xstate_size; } /* @@ -388,12 +401,12 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame, */ void fpu__init_prepare_fx_sw_frame(void) { - int size = xstate_size + FP_XSTATE_MAGIC2_SIZE; + int size = fpu_user_xstate_size + FP_XSTATE_MAGIC2_SIZE; fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; fx_sw_reserved.extended_size = size; fx_sw_reserved.xfeatures = xfeatures_mask; - fx_sw_reserved.xstate_size = xstate_size; + fx_sw_reserved.xstate_size = fpu_user_xstate_size; if (config_enabled(CONFIG_IA32_EMULATION) || config_enabled(CONFIG_X86_32)) { diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 4ea2a59483c7..9c4da358ebb9 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -43,6 +43,13 @@ static unsigned int xstate_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = static unsigned int xstate_sizes[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1}; static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8]; +/* + * The XSAVE area of kernel can be in standard or compacted format; + * it is always in standard format for user mode. This is the user + * mode standard format size used for signal and ptrace frames. + */ +unsigned int fpu_user_xstate_size; + /* * Clear all of the X86_FEATURE_* bits that are unavailable * when the CPU has no XSAVE support. @@ -171,7 +178,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu) */ while (xfeatures) { if (xfeatures & 0x1) { - int offset = xstate_offsets[feature_bit]; + int offset = xstate_comp_offsets[feature_bit]; int size = xstate_sizes[feature_bit]; memcpy((void *)fx + offset, @@ -533,8 +540,9 @@ static void do_extra_xstate_size_checks(void) XSTATE_WARN_ON(paranoid_xstate_size != xstate_size); } + /* - * Calculate total size of enabled xstates in XCR0/xfeatures_mask. + * Get total size of enabled xstates in XCR0/xfeatures_mask. * * Note the SDM's wording here. "sub-function 0" only enumerates * the size of the *user* states. If we use it to size a buffer @@ -544,34 +552,33 @@ static void do_extra_xstate_size_checks(void) * Note that we do not currently set any bits on IA32_XSS so * 'XCR0 | IA32_XSS == XCR0' for now. */ -static unsigned int __init calculate_xstate_size(void) +static unsigned int __init get_xsaves_size(void) { unsigned int eax, ebx, ecx, edx; - unsigned int calculated_xstate_size; + /* + * - CPUID function 0DH, sub-function 1: + * EBX enumerates the size (in bytes) required by + * the XSAVES instruction for an XSAVE area + * containing all the state components + * corresponding to bits currently set in + * XCR0 | IA32_XSS. + */ + cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx); + return ebx; +} - if (!boot_cpu_has(X86_FEATURE_XSAVES)) { - /* - * - CPUID function 0DH, sub-function 0: - * EBX enumerates the size (in bytes) required by - * the XSAVE instruction for an XSAVE area - * containing all the *user* state components - * corresponding to bits currently set in XCR0. - */ - cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); - calculated_xstate_size = ebx; - } else { - /* - * - CPUID function 0DH, sub-function 1: - * EBX enumerates the size (in bytes) required by - * the XSAVES instruction for an XSAVE area - * containing all the state components - * corresponding to bits currently set in - * XCR0 | IA32_XSS. - */ - cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx); - calculated_xstate_size = ebx; - } - return calculated_xstate_size; +static unsigned int __init get_xsave_size(void) +{ + unsigned int eax, ebx, ecx, edx; + /* + * - CPUID function 0DH, sub-function 0: + * EBX enumerates the size (in bytes) required by + * the XSAVE instruction for an XSAVE area + * containing all the *user* state components + * corresponding to bits currently set in XCR0. + */ + cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); + return ebx; } /* @@ -591,7 +598,15 @@ static bool is_supported_xstate_size(unsigned int test_xstate_size) static int init_xstate_size(void) { /* Recompute the context size for enabled features: */ - unsigned int possible_xstate_size = calculate_xstate_size(); + unsigned int possible_xstate_size; + unsigned int xsave_size; + + xsave_size = get_xsave_size(); + + if (boot_cpu_has(X86_FEATURE_XSAVES)) + possible_xstate_size = get_xsaves_size(); + else + possible_xstate_size = xsave_size; /* Ensure we have the space to store all enabled: */ if (!is_supported_xstate_size(possible_xstate_size)) @@ -603,6 +618,11 @@ static int init_xstate_size(void) */ xstate_size = possible_xstate_size; do_extra_xstate_size_checks(); + + /* + * User space is always in standard format. + */ + fpu_user_xstate_size = xsave_size; return 0; } -- cgit v1.2.3 From bf15a8cf8d14879b785c548728415d36ccb6a33b Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 20 May 2016 10:47:06 -0700 Subject: x86/fpu/xstate: Rename 'xstate_size' to 'fpu_kernel_xstate_size', to distinguish it from 'fpu_user_xstate_size' User space uses standard format xsave area. fpstate in signal frame should have standard format size. To explicitly distinguish between xstate size in kernel space and the one in user space, we rename 'xstate_size' to 'fpu_kernel_xstate_size'. Cleanup only, no change in functionality. Signed-off-by: Fenghua Yu [ Rebased the patch and cleaned up the naming. ] Signed-off-by: Yu-cheng Yu Reviewed-by: Dave Hansen Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Ravi V. Shankar Cc: Sai Praneeth Prakhya Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/2ecbae347a5152d94be52adf7d0f3b7305d90d99.1463760376.git.yu-cheng.yu@intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 2 +- arch/x86/kernel/fpu/core.c | 7 ++++--- arch/x86/kernel/fpu/init.c | 20 +++++++++++--------- arch/x86/kernel/fpu/signal.c | 2 +- arch/x86/kernel/fpu/xstate.c | 8 ++++---- 5 files changed, 21 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 0a16a16284f5..965c5d212c31 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -367,7 +367,7 @@ DECLARE_PER_CPU(struct irq_stack *, hardirq_stack); DECLARE_PER_CPU(struct irq_stack *, softirq_stack); #endif /* X86_64 */ -extern unsigned int xstate_size; +extern unsigned int fpu_kernel_xstate_size; extern unsigned int fpu_user_xstate_size; struct perf_event; diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 7d564742e499..c759bd01ec99 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -227,7 +227,7 @@ void fpstate_init(union fpregs_state *state) return; } - memset(state, 0, xstate_size); + memset(state, 0, fpu_kernel_xstate_size); if (static_cpu_has(X86_FEATURE_FXSR)) fpstate_init_fxstate(&state->fxsave); @@ -252,7 +252,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) * leak into the child task: */ if (use_eager_fpu()) - memset(&dst_fpu->state.xsave, 0, xstate_size); + memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size); /* * Save current FPU registers directly into the child @@ -271,7 +271,8 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) */ preempt_disable(); if (!copy_fpregs_to_fpstate(dst_fpu)) { - memcpy(&src_fpu->state, &dst_fpu->state, xstate_size); + memcpy(&src_fpu->state, &dst_fpu->state, + fpu_kernel_xstate_size); if (use_eager_fpu()) copy_kernel_to_fpregs(&src_fpu->state); diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 5b1928c0aad4..60f3839c5bfa 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -145,8 +145,8 @@ static void __init fpu__init_system_generic(void) * This is inherent to the XSAVE architecture which puts all state * components into a single, continuous memory block: */ -unsigned int xstate_size; -EXPORT_SYMBOL_GPL(xstate_size); +unsigned int fpu_kernel_xstate_size; +EXPORT_SYMBOL_GPL(fpu_kernel_xstate_size); /* Get alignment of the TYPE. */ #define TYPE_ALIGN(TYPE) offsetof(struct { char x; TYPE test; }, test) @@ -178,7 +178,7 @@ static void __init fpu__init_task_struct_size(void) * Add back the dynamically-calculated register state * size. */ - task_size += xstate_size; + task_size += fpu_kernel_xstate_size; /* * We dynamically size 'struct fpu', so we require that @@ -195,7 +195,7 @@ static void __init fpu__init_task_struct_size(void) } /* - * Set up the user and kernel xstate_size based on the legacy FPU context size. + * Set up the user and kernel xstate sizes based on the legacy FPU context size. * * We set this up first, and later it will be overwritten by * fpu__init_system_xstate() if the CPU knows about xstates. @@ -208,7 +208,7 @@ static void __init fpu__init_system_xstate_size_legacy(void) on_boot_cpu = 0; /* - * Note that xstate_size might be overwriten later during + * Note that xstate sizes might be overwritten later during * fpu__init_system_xstate(). */ @@ -219,15 +219,17 @@ static void __init fpu__init_system_xstate_size_legacy(void) */ setup_clear_cpu_cap(X86_FEATURE_XSAVE); setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); - xstate_size = sizeof(struct swregs_state); + fpu_kernel_xstate_size = sizeof(struct swregs_state); } else { if (boot_cpu_has(X86_FEATURE_FXSR)) - xstate_size = sizeof(struct fxregs_state); + fpu_kernel_xstate_size = + sizeof(struct fxregs_state); else - xstate_size = sizeof(struct fregs_state); + fpu_kernel_xstate_size = + sizeof(struct fregs_state); } - fpu_user_xstate_size = xstate_size; + fpu_user_xstate_size = fpu_kernel_xstate_size; /* * Quirk: we don't yet handle the XSAVES* instructions diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 0d29d4de4209..06d80f62c03f 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -263,7 +263,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) int ia32_fxstate = (buf != buf_fx); struct task_struct *tsk = current; struct fpu *fpu = &tsk->thread.fpu; - int state_size = xstate_size; + int state_size = fpu_kernel_xstate_size; u64 xfeatures = 0; int fx_only = 0; diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 9c4da358ebb9..46abfafe61c8 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -537,7 +537,7 @@ static void do_extra_xstate_size_checks(void) */ paranoid_xstate_size += xfeature_size(i); } - XSTATE_WARN_ON(paranoid_xstate_size != xstate_size); + XSTATE_WARN_ON(paranoid_xstate_size != fpu_kernel_xstate_size); } @@ -616,7 +616,7 @@ static int init_xstate_size(void) * The size is OK, we are definitely going to use xsave, * make it known to the world that we need more space. */ - xstate_size = possible_xstate_size; + fpu_kernel_xstate_size = possible_xstate_size; do_extra_xstate_size_checks(); /* @@ -679,14 +679,14 @@ void __init fpu__init_system_xstate(void) return; } - update_regset_xstate_info(xstate_size, xfeatures_mask); + update_regset_xstate_info(fpu_kernel_xstate_size, xfeatures_mask); fpu__init_prepare_fx_sw_frame(); setup_init_fpu_buf(); setup_xstate_comp(); pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n", xfeatures_mask, - xstate_size, + fpu_kernel_xstate_size, boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard"); } -- cgit v1.2.3 From 7d9370607d28afd454775c623d5447603473a3c3 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 20 May 2016 10:47:07 -0700 Subject: x86/fpu/xstate: Keep init_fpstate.xsave.header.xfeatures as zero for init optimization Keep init_fpstate.xsave.header.xfeatures as zero for init optimization. This is important for init optimization that is implemented in processor. If a bit corresponding to an xstate in xstate_bv is 0, it means the xstate is in init status and will not be read from memory to the processor during XRSTOR/XRSTORS instruction. This largely impacts context switch performance. Signed-off-by: Fenghua Yu Signed-off-by: Yu-cheng Yu Reviewed-by: Dave Hansen Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Ravi V. Shankar Cc: Sai Praneeth Prakhya Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/2fb4ec7f18b76e8cda057a8c0038def74a9b8044.1463760376.git.yu-cheng.yu@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/xstate.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 46abfafe61c8..dbfef1b7be7c 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -329,13 +329,11 @@ static void __init setup_init_fpu_buf(void) setup_xstate_features(); print_xstate_features(); - if (boot_cpu_has(X86_FEATURE_XSAVES)) { + if (boot_cpu_has(X86_FEATURE_XSAVES)) init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask; - init_fpstate.xsave.header.xfeatures = xfeatures_mask; - } /* - * Init all the features state with header_bv being 0x0 + * Init all the features state with header.xfeatures being 0x0 */ copy_kernel_to_xregs_booting(&init_fpstate.xsave); -- cgit v1.2.3 From 99aa22d0d8f70d9317727ab40c85b2ead740a6ca Mon Sep 17 00:00:00 2001 From: Yu-cheng Yu Date: Fri, 20 May 2016 10:47:08 -0700 Subject: x86/fpu/xstate: Copy xstate registers directly to the signal frame when compacted format is in use XSAVES is a kernel instruction and uses a compacted format. When working with user space, the kernel should provide standard-format, non-supervisor state data. We cannot do __copy_to_user() from a compacted-format kernel xstate area to a signal frame. Dave Hansen proposes this method to simplify copy xstate directly to user. This patch is based on an earlier patch from Fenghua Yu Originally-from: Fenghua Yu Signed-off-by: Yu-cheng Yu Reviewed-by: Dave Hansen Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Ravi V. Shankar Cc: Sai Praneeth Prakhya Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/c36f419d525517d04209a28dd8e1e5af9000036e.1463760376.git.yu-cheng.yu@intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/xstate.h | 1 + arch/x86/kernel/fpu/signal.c | 3 ++- arch/x86/kernel/fpu/xstate.c | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 16df2c44ac66..d812cf361282 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -47,5 +47,6 @@ extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); void fpu__xstate_clear_all_cpu_caps(void); void *get_xsave_addr(struct xregs_state *xsave, int xstate); const void *get_xsave_field_ptr(int xstate_field); +int using_compacted_format(void); #endif diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 06d80f62c03f..8aa96cbb5dfb 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -169,7 +170,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) sizeof(struct user_i387_ia32_struct), NULL, (struct _fpstate_32 __user *) buf) ? -1 : 1; - if (fpregs_active()) { + if (fpregs_active() || using_compacted_format()) { /* Save the live register state to the user directly. */ if (copy_fpregs_to_sigframe(buf_fx)) return -1; diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index dbfef1b7be7c..0b01f003df8b 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -420,7 +420,7 @@ static int xfeature_size(int xfeature_nr) * that it is obvious which aspect of 'XSAVES' is being handled * by the calling code. */ -static int using_compacted_format(void) +int using_compacted_format(void) { return boot_cpu_has(X86_FEATURE_XSAVES); } -- cgit v1.2.3 From 4aef66c8ae91d00affeeb24cfb176b53354ac969 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 17 Jun 2016 17:02:01 +0200 Subject: locking/atomic, arch/arc: Fix build Resolve conflict between commits: fbffe892e525 ("locking/atomic, arch/arc: Implement atomic_fetch_{add,sub,and,andnot,or,xor}()") and: ed6aefed726a ("Revert "ARCv2: spinlock/rwlock/atomics: Delayed retry of failed SCOND with exponential backoff"") Reported-by: Guenter Roeck Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Nigel Topham Cc: Noam Camus Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vineet Gupta Cc: linux-kernel@vger.kernel.org Cc: linux-snps-arc@lists.infradead.org Signed-off-by: Ingo Molnar --- arch/arc/include/asm/atomic.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index bd9c51cb2bfd..4e3c1b6b0806 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -71,7 +71,6 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ static inline int atomic_fetch_##op(int i, atomic_t *v) \ { \ unsigned int val, orig; \ - SCOND_FAIL_RETRY_VAR_DEF \ \ /* \ * Explicit full memory barrier needed before/after as \ @@ -84,11 +83,8 @@ static inline int atomic_fetch_##op(int i, atomic_t *v) \ " " #asm_op " %[val], %[orig], %[i] \n" \ " scond %[val], [%[ctr]] \n" \ " \n" \ - SCOND_FAIL_RETRY_ASM \ - \ : [val] "=&r" (val), \ [orig] "=&r" (orig) \ - SCOND_FAIL_RETRY_VARS \ : [ctr] "r" (&v->counter), \ [i] "ir" (i) \ : "cc"); \ @@ -199,10 +195,6 @@ ATOMIC_OPS(andnot, &= ~, bic) ATOMIC_OPS(or, |=, or) ATOMIC_OPS(xor, ^=, xor) -#undef SCOND_FAIL_RETRY_VAR_DEF -#undef SCOND_FAIL_RETRY_ASM -#undef SCOND_FAIL_RETRY_VARS - #else /* CONFIG_ARC_PLAT_EZNPS */ static inline int atomic_read(const atomic_t *v) -- cgit v1.2.3 From 86a664d58f3ba2398a378dc9da6d4cfa737d2281 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 17 Jun 2016 17:05:38 +0200 Subject: locking/atomic, arch/m68k: Remove comment I misread the inline asm. It uses a rare construct to provide an input to a previously declared output to do the atomic_read(). Reported-by: Geert Uytterhoeven Signed-off-by: Peter Zijlstra (Intel) Cc: Andreas Schwab Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Cc: linux-m68k@lists.linux-m68k.org Signed-off-by: Ingo Molnar --- arch/m68k/include/asm/atomic.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch') diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h index 3e03de7ae33b..cf4c3a7b1a45 100644 --- a/arch/m68k/include/asm/atomic.h +++ b/arch/m68k/include/asm/atomic.h @@ -38,13 +38,6 @@ static inline void atomic_##op(int i, atomic_t *v) \ #ifdef CONFIG_RMW_INSNS -/* - * Am I reading these CAS loops right in that %2 is the old value and the first - * iteration uses an uninitialized value? - * - * Would it not make sense to add: tmp = atomic_read(v); to avoid this? - */ - #define ATOMIC_OP_RETURN(op, c_op, asm_op) \ static inline int atomic_##op##_return(int i, atomic_t *v) \ { \ -- cgit v1.2.3 From b3b630b26ae87a54e2f396b459aab0cd2286fc77 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 20 Jun 2016 22:57:22 +0200 Subject: ARM: dts: sunxi: Add pll3 to simplefb nodes clocks lists Now that we've a clock node describing pll3 we must add it to the simplefb nodes clocks lists to avoid it getting turned off when simplefb is used. This fixes the screen going black when using simplefb. Signed-off-by: Hans de Goede Signed-off-by: Maxime Ripard --- arch/arm/boot/dts/sun4i-a10.dtsi | 21 ++++++++++++--------- arch/arm/boot/dts/sun5i-a10s.dtsi | 11 ++++++----- arch/arm/boot/dts/sun7i-a20.dtsi | 11 ++++++----- 3 files changed, 24 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi index a03e56fb5dbc..ca58eb279d55 100644 --- a/arch/arm/boot/dts/sun4i-a10.dtsi +++ b/arch/arm/boot/dts/sun4i-a10.dtsi @@ -65,8 +65,9 @@ compatible = "allwinner,simple-framebuffer", "simple-framebuffer"; allwinner,pipeline = "de_be0-lcd0-hdmi"; - clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 43>, - <&ahb_gates 44>, <&dram_gates 26>; + clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>, + <&ahb_gates 43>, <&ahb_gates 44>, + <&dram_gates 26>; status = "disabled"; }; @@ -74,8 +75,9 @@ compatible = "allwinner,simple-framebuffer", "simple-framebuffer"; allwinner,pipeline = "de_fe0-de_be0-lcd0-hdmi"; - clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 43>, - <&ahb_gates 44>, <&ahb_gates 46>, + clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>, + <&ahb_gates 43>, <&ahb_gates 44>, + <&ahb_gates 46>, <&dram_gates 25>, <&dram_gates 26>; status = "disabled"; }; @@ -84,9 +86,9 @@ compatible = "allwinner,simple-framebuffer", "simple-framebuffer"; allwinner,pipeline = "de_fe0-de_be0-lcd0"; - clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 44>, - <&ahb_gates 46>, <&dram_gates 25>, - <&dram_gates 26>; + clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>, + <&ahb_gates 44>, <&ahb_gates 46>, + <&dram_gates 25>, <&dram_gates 26>; status = "disabled"; }; @@ -94,8 +96,9 @@ compatible = "allwinner,simple-framebuffer", "simple-framebuffer"; allwinner,pipeline = "de_fe0-de_be0-lcd0-tve0"; - clocks = <&pll5 1>, <&ahb_gates 34>, <&ahb_gates 36>, - <&ahb_gates 44>, <&ahb_gates 46>, + clocks = <&pll3>, <&pll5 1>, <&ahb_gates 34>, + <&ahb_gates 36>, <&ahb_gates 44>, + <&ahb_gates 46>, <&dram_gates 5>, <&dram_gates 25>, <&dram_gates 26>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sun5i-a10s.dtsi b/arch/arm/boot/dts/sun5i-a10s.dtsi index bddd0de88af6..367f33012493 100644 --- a/arch/arm/boot/dts/sun5i-a10s.dtsi +++ b/arch/arm/boot/dts/sun5i-a10s.dtsi @@ -65,8 +65,8 @@ compatible = "allwinner,simple-framebuffer", "simple-framebuffer"; allwinner,pipeline = "de_be0-lcd0-hdmi"; - clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 43>, - <&ahb_gates 44>; + clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>, + <&ahb_gates 43>, <&ahb_gates 44>; status = "disabled"; }; @@ -74,7 +74,8 @@ compatible = "allwinner,simple-framebuffer", "simple-framebuffer"; allwinner,pipeline = "de_be0-lcd0"; - clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 44>; + clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>, + <&ahb_gates 44>; status = "disabled"; }; @@ -82,8 +83,8 @@ compatible = "allwinner,simple-framebuffer", "simple-framebuffer"; allwinner,pipeline = "de_be0-lcd0-tve0"; - clocks = <&pll5 1>, <&ahb_gates 34>, <&ahb_gates 36>, - <&ahb_gates 44>; + clocks = <&pll3>, <&pll5 1>, <&ahb_gates 34>, + <&ahb_gates 36>, <&ahb_gates 44>; status = "disabled"; }; }; diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index febdf4c72fb0..f480051c1f8a 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -67,8 +67,9 @@ compatible = "allwinner,simple-framebuffer", "simple-framebuffer"; allwinner,pipeline = "de_be0-lcd0-hdmi"; - clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 43>, - <&ahb_gates 44>, <&dram_gates 26>; + clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>, + <&ahb_gates 43>, <&ahb_gates 44>, + <&dram_gates 26>; status = "disabled"; }; @@ -76,8 +77,8 @@ compatible = "allwinner,simple-framebuffer", "simple-framebuffer"; allwinner,pipeline = "de_be0-lcd0"; - clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 44>, - <&dram_gates 26>; + clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>, + <&ahb_gates 44>, <&dram_gates 26>; status = "disabled"; }; @@ -85,7 +86,7 @@ compatible = "allwinner,simple-framebuffer", "simple-framebuffer"; allwinner,pipeline = "de_be0-lcd0-tve0"; - clocks = <&pll5 1>, + clocks = <&pll3>, <&pll5 1>, <&ahb_gates 34>, <&ahb_gates 36>, <&ahb_gates 44>, <&dram_gates 5>, <&dram_gates 26>; status = "disabled"; -- cgit v1.2.3 From b7271b9f3e18181559b96a610f4e42bdb04b07f5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 22 Jun 2016 11:16:49 +0200 Subject: locking/atomic, arch/tile: Fix tilepro build The tilepro change wasn't ever compiled it seems (the 0day built bot also doesn't have a toolchain for it). Make it work. The thing that makes the patch bigger than desired is namespace collision with the C11 __atomic builtin functions. So rename the tilepro functions to __atomic32. Reported-by: Sudip Mukherjee Signed-off-by: Peter Zijlstra (Intel) Acked-by: Chris Metcalf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephen Rothwell Cc: Thomas Gleixner Fixes: 1af5de9af138 ("locking/atomic, arch/tile: Implement atomic{,64}_fetch_{add,sub,and,or,xor}()") Link: http://lkml.kernel.org/r/20160622091649.GB30154@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/tile/include/asm/atomic_32.h | 24 ++++++++++++------------ arch/tile/include/asm/futex.h | 14 +++++++------- arch/tile/lib/atomic_32.c | 16 ++++++++-------- arch/tile/lib/atomic_asm_32.S | 21 +++++++++++++-------- 4 files changed, 40 insertions(+), 35 deletions(-) (limited to 'arch') diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h index da8eb4ed3752..a93774255136 100644 --- a/arch/tile/include/asm/atomic_32.h +++ b/arch/tile/include/asm/atomic_32.h @@ -143,15 +143,15 @@ static inline void atomic64_##op(long long i, atomic64_t *v) \ { \ _atomic64_fetch_##op(&v->counter, i); \ } \ -static inline void atomic64_##op(long long i, atomic64_t *v) \ +static inline long long atomic64_fetch_##op(long long i, atomic64_t *v) \ { \ smp_mb(); \ return _atomic64_fetch_##op(&v->counter, i); \ } -ATOMIC64_OP(and) -ATOMIC64_OP(or) -ATOMIC64_OP(xor) +ATOMIC64_OPS(and) +ATOMIC64_OPS(or) +ATOMIC64_OPS(xor) #undef ATOMIC64_OPS @@ -266,16 +266,16 @@ struct __get_user { unsigned long val; int err; }; -extern struct __get_user __atomic_cmpxchg(volatile int *p, +extern struct __get_user __atomic32_cmpxchg(volatile int *p, int *lock, int o, int n); -extern struct __get_user __atomic_xchg(volatile int *p, int *lock, int n); -extern struct __get_user __atomic_xchg_add(volatile int *p, int *lock, int n); -extern struct __get_user __atomic_xchg_add_unless(volatile int *p, +extern struct __get_user __atomic32_xchg(volatile int *p, int *lock, int n); +extern struct __get_user __atomic32_xchg_add(volatile int *p, int *lock, int n); +extern struct __get_user __atomic32_xchg_add_unless(volatile int *p, int *lock, int o, int n); -extern struct __get_user __atomic_fetch_or(volatile int *p, int *lock, int n); -extern struct __get_user __atomic_fetch_and(volatile int *p, int *lock, int n); -extern struct __get_user __atomic_fetch_andn(volatile int *p, int *lock, int n); -extern struct __get_user __atomic_fetch_xor(volatile int *p, int *lock, int n); +extern struct __get_user __atomic32_fetch_or(volatile int *p, int *lock, int n); +extern struct __get_user __atomic32_fetch_and(volatile int *p, int *lock, int n); +extern struct __get_user __atomic32_fetch_andn(volatile int *p, int *lock, int n); +extern struct __get_user __atomic32_fetch_xor(volatile int *p, int *lock, int n); extern long long __atomic64_cmpxchg(volatile long long *p, int *lock, long long o, long long n); extern long long __atomic64_xchg(volatile long long *p, int *lock, long long n); diff --git a/arch/tile/include/asm/futex.h b/arch/tile/include/asm/futex.h index 1a6ef1b69cb1..e64a1b75fc38 100644 --- a/arch/tile/include/asm/futex.h +++ b/arch/tile/include/asm/futex.h @@ -80,16 +80,16 @@ ret = gu.err; \ } -#define __futex_set() __futex_call(__atomic_xchg) -#define __futex_add() __futex_call(__atomic_xchg_add) -#define __futex_or() __futex_call(__atomic_or) -#define __futex_andn() __futex_call(__atomic_andn) -#define __futex_xor() __futex_call(__atomic_xor) +#define __futex_set() __futex_call(__atomic32_xchg) +#define __futex_add() __futex_call(__atomic32_xchg_add) +#define __futex_or() __futex_call(__atomic32_fetch_or) +#define __futex_andn() __futex_call(__atomic32_fetch_andn) +#define __futex_xor() __futex_call(__atomic32_fetch_xor) #define __futex_cmpxchg() \ { \ - struct __get_user gu = __atomic_cmpxchg((u32 __force *)uaddr, \ - lock, oldval, oparg); \ + struct __get_user gu = __atomic32_cmpxchg((u32 __force *)uaddr, \ + lock, oldval, oparg); \ val = gu.val; \ ret = gu.err; \ } diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c index 5b6bd932c9c7..f8128800dbf5 100644 --- a/arch/tile/lib/atomic_32.c +++ b/arch/tile/lib/atomic_32.c @@ -61,13 +61,13 @@ static inline int *__atomic_setup(volatile void *v) int _atomic_xchg(int *v, int n) { - return __atomic_xchg(v, __atomic_setup(v), n).val; + return __atomic32_xchg(v, __atomic_setup(v), n).val; } EXPORT_SYMBOL(_atomic_xchg); int _atomic_xchg_add(int *v, int i) { - return __atomic_xchg_add(v, __atomic_setup(v), i).val; + return __atomic32_xchg_add(v, __atomic_setup(v), i).val; } EXPORT_SYMBOL(_atomic_xchg_add); @@ -78,37 +78,37 @@ int _atomic_xchg_add_unless(int *v, int a, int u) * to use the first argument consistently as the "old value" * in the assembly, as is done for _atomic_cmpxchg(). */ - return __atomic_xchg_add_unless(v, __atomic_setup(v), u, a).val; + return __atomic32_xchg_add_unless(v, __atomic_setup(v), u, a).val; } EXPORT_SYMBOL(_atomic_xchg_add_unless); int _atomic_cmpxchg(int *v, int o, int n) { - return __atomic_cmpxchg(v, __atomic_setup(v), o, n).val; + return __atomic32_cmpxchg(v, __atomic_setup(v), o, n).val; } EXPORT_SYMBOL(_atomic_cmpxchg); unsigned long _atomic_fetch_or(volatile unsigned long *p, unsigned long mask) { - return __atomic_fetch_or((int *)p, __atomic_setup(p), mask).val; + return __atomic32_fetch_or((int *)p, __atomic_setup(p), mask).val; } EXPORT_SYMBOL(_atomic_fetch_or); unsigned long _atomic_fetch_and(volatile unsigned long *p, unsigned long mask) { - return __atomic_fetch_and((int *)p, __atomic_setup(p), mask).val; + return __atomic32_fetch_and((int *)p, __atomic_setup(p), mask).val; } EXPORT_SYMBOL(_atomic_fetch_and); unsigned long _atomic_fetch_andn(volatile unsigned long *p, unsigned long mask) { - return __atomic_fetch_andn((int *)p, __atomic_setup(p), mask).val; + return __atomic32_fetch_andn((int *)p, __atomic_setup(p), mask).val; } EXPORT_SYMBOL(_atomic_fetch_andn); unsigned long _atomic_fetch_xor(volatile unsigned long *p, unsigned long mask) { - return __atomic_fetch_xor((int *)p, __atomic_setup(p), mask).val; + return __atomic32_fetch_xor((int *)p, __atomic_setup(p), mask).val; } EXPORT_SYMBOL(_atomic_fetch_xor); diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S index 507abdd2bf9a..1a70e6c0f259 100644 --- a/arch/tile/lib/atomic_asm_32.S +++ b/arch/tile/lib/atomic_asm_32.S @@ -172,15 +172,20 @@ STD_ENTRY_SECTION(__atomic\name, .text.atomic) .endif .endm -atomic_op _cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }" -atomic_op _xchg, 32, "move r24, r2" -atomic_op _xchg_add, 32, "add r24, r22, r2" -atomic_op _xchg_add_unless, 32, \ + +/* + * Use __atomic32 prefix to avoid collisions with GCC builtin __atomic functions. + */ + +atomic_op 32_cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }" +atomic_op 32_xchg, 32, "move r24, r2" +atomic_op 32_xchg_add, 32, "add r24, r22, r2" +atomic_op 32_xchg_add_unless, 32, \ "sne r26, r22, r2; { bbns r26, 3f; add r24, r22, r3 }" -atomic_op _fetch_or, 32, "or r24, r22, r2" -atomic_op _fetch_and, 32, "and r24, r22, r2" -atomic_op _fetch_andn, 32, "nor r2, r2, zero; and r24, r22, r2" -atomic_op _fetch_xor, 32, "xor r24, r22, r2" +atomic_op 32_fetch_or, 32, "or r24, r22, r2" +atomic_op 32_fetch_and, 32, "and r24, r22, r2" +atomic_op 32_fetch_andn, 32, "nor r2, r2, zero; and r24, r22, r2" +atomic_op 32_fetch_xor, 32, "xor r24, r22, r2" atomic_op 64_cmpxchg, 64, "{ seq r26, r22, r2; seq r27, r23, r3 }; \ { bbns r26, 3f; move r24, r4 }; { bbns r27, 3f; move r25, r5 }" -- cgit v1.2.3 From 5691e03593cb9f7abfee93f4581de8933f1d6630 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 9 May 2016 09:11:56 +0200 Subject: tty: frv, remove unused serial macros STD_COM_FLAGS needs not be defined as it is not used anywhere on frv. SERIAL_PORT_DFNS is defined to be empty. 8250 is aware of empty SERIAL_PORT_DFNS and does: #ifndef SERIAL_PORT_DFNS #define SERIAL_PORT_DFNS #endif So no need to define it on frv. Signed-off-by: Jiri Slaby Cc: David Howells Signed-off-by: Greg Kroah-Hartman --- arch/frv/include/asm/serial.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch') diff --git a/arch/frv/include/asm/serial.h b/arch/frv/include/asm/serial.h index bce0d0d07e60..614c6d76789a 100644 --- a/arch/frv/include/asm/serial.h +++ b/arch/frv/include/asm/serial.h @@ -12,7 +12,3 @@ * the base baud is derived from the clock speed and so is variable */ #define BASE_BAUD 0 - -#define STD_COM_FLAGS UPF_BOOT_AUTOCONF - -#define SERIAL_PORT_DFNS -- cgit v1.2.3 From 6137b7a62978915d76db69e17e25e8e0b1057254 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 9 May 2016 09:11:57 +0200 Subject: tty: stop defining STD_COM_FLAGS in drivers STD_COM_FLAGS is mostly a bad name for what the drivers thinks it is. Stop using it and pass the flags directly. cyclades defines it as 0, so we do not assign anything to freshly tty_port_init'ed structure. Signed-off-by: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/platforms/xt2000/setup.c | 1 - drivers/tty/cyclades.c | 3 --- drivers/tty/serial/m32r_sio.c | 5 +---- 3 files changed, 1 insertion(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/xtensa/platforms/xt2000/setup.c b/arch/xtensa/platforms/xt2000/setup.c index 5f4bd71971d6..4904c5c16918 100644 --- a/arch/xtensa/platforms/xt2000/setup.c +++ b/arch/xtensa/platforms/xt2000/setup.c @@ -113,7 +113,6 @@ void platform_heartbeat(void) } //#define RS_TABLE_SIZE 2 -//#define STD_COM_FLAGS (UPF_BOOT_AUTOCONF|UPF_SKIP_TEST) #define _SERIAL_PORT(_base,_irq) \ { \ diff --git a/drivers/tty/cyclades.c b/drivers/tty/cyclades.c index fcc4962fa4c6..5e4fa9206861 100644 --- a/drivers/tty/cyclades.c +++ b/drivers/tty/cyclades.c @@ -93,8 +93,6 @@ static void cy_send_xchar(struct tty_struct *tty, char ch); #define SERIAL_XMIT_SIZE (min(PAGE_SIZE, 4096)) #endif -#define STD_COM_FLAGS (0) - /* firmware stuff */ #define ZL_MAX_BLOCKS 16 #define DRIVER_VERSION 0x02010203 @@ -3083,7 +3081,6 @@ static int cy_init_card(struct cyclades_card *cinfo) info->port.closing_wait = CLOSING_WAIT_DELAY; info->port.close_delay = 5 * HZ / 10; - info->port.flags = STD_COM_FLAGS; init_completion(&info->shutdown_wait); if (cy_is_Z(cinfo)) { diff --git a/drivers/tty/serial/m32r_sio.c b/drivers/tty/serial/m32r_sio.c index 68765f7c2645..1b01504cf306 100644 --- a/drivers/tty/serial/m32r_sio.c +++ b/drivers/tty/serial/m32r_sio.c @@ -51,9 +51,6 @@ #define PASS_LIMIT 256 -/* Standard COM flags */ -#define STD_COM_FLAGS (UPF_BOOT_AUTOCONF | UPF_SKIP_TEST) - static const struct { unsigned int port; unsigned int irq; @@ -892,7 +889,7 @@ static void __init m32r_sio_init_ports(void) up->port.iobase = old_serial_port[i].port; up->port.irq = irq_canonicalize(old_serial_port[i].irq); up->port.uartclk = BAUD_RATE * 16; - up->port.flags = STD_COM_FLAGS; + up->port.flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST; up->port.membase = 0; up->port.iotype = 0; up->port.regshift = 0; -- cgit v1.2.3 From 65fe935dd2387a4faf15314c73f5e6d31ef0217e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 13 Jun 2016 15:10:02 -0700 Subject: x86/KASLR, x86/power: Remove x86 hibernation restrictions With the following fix: 70595b479ce1 ("x86/power/64: Fix crash whan the hibernation code passes control to the image kernel") ... there is no longer a problem with hibernation resuming a KASLR-booted kernel image, so remove the restriction. Signed-off-by: Kees Cook Cc: Andy Lutomirski Cc: Baoquan He Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jonathan Corbet Cc: Len Brown Cc: Linus Torvalds Cc: Linux PM list Cc: Logan Gunthorpe Cc: Pavel Machek Cc: Peter Zijlstra Cc: Stephen Smalley Cc: Thomas Gleixner Cc: Yinghai Lu Cc: linux-doc@vger.kernel.org Link: http://lkml.kernel.org/r/20160613221002.GA29719@www.outflux.net Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 10 ++++------ arch/x86/boot/compressed/kaslr.c | 7 ------- kernel/power/hibernate.c | 6 ------ 3 files changed, 4 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 82b42c958d1c..fa8c6d470ad2 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1803,12 +1803,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. js= [HW,JOY] Analog joystick See Documentation/input/joystick.txt. - kaslr/nokaslr [X86] - Enable/disable kernel and module base offset ASLR - (Address Space Layout Randomization) if built into - the kernel. When CONFIG_HIBERNATION is selected, - kASLR is disabled by default. When kASLR is enabled, - hibernation will be disabled. + nokaslr [KNL] + When CONFIG_RANDOMIZE_BASE is set, this disables + kernel and module base offset ASLR (Address Space + Layout Randomization). keepinitrd [HW,ARM] diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index cfeb0259ed81..dff42177cb0c 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -471,17 +471,10 @@ unsigned char *choose_random_location(unsigned long input, unsigned long choice = output; unsigned long random_addr; -#ifdef CONFIG_HIBERNATION - if (!cmdline_find_option_bool("kaslr")) { - warn("KASLR disabled: 'kaslr' not on cmdline (hibernation selected)."); - goto out; - } -#else if (cmdline_find_option_bool("nokaslr")) { warn("KASLR disabled: 'nokaslr' on cmdline."); goto out; } -#endif boot_params->hdr.loadflags |= KASLR_FLAG; diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index fca9254280ee..9021387c6ff4 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -1154,11 +1154,6 @@ static int __init nohibernate_setup(char *str) return 1; } -static int __init kaslr_nohibernate_setup(char *str) -{ - return nohibernate_setup(str); -} - static int __init page_poison_nohibernate_setup(char *str) { #ifdef CONFIG_PAGE_POISONING_ZERO @@ -1182,5 +1177,4 @@ __setup("hibernate=", hibernate_setup); __setup("resumewait", resumewait_setup); __setup("resumedelay=", resumedelay_setup); __setup("nohibernate", nohibernate_setup); -__setup("kaslr", kaslr_nohibernate_setup); __setup("page_poison=", page_poison_nohibernate_setup); -- cgit v1.2.3 From 98f78525371b55ccd1c480207ce10296c72fa340 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 25 May 2016 15:45:30 -0700 Subject: x86/boot: Refuse to build with data relocations The compressed kernel is built with -fPIC/-fPIE so that it can run in any location a bootloader happens to put it. However, since ELF relocation processing is not happening (and all the relocation information has already been stripped at link time), none of the code can use data relocations (e.g. static assignments of pointers). This is already noted in a warning comment at the top of misc.c, but this adds an explicit check for the condition during the linking stage to block any such bugs from appearing. If this was in place with the earlier bug in pagetable.c, the build would fail like this: ... CC arch/x86/boot/compressed/pagetable.o DATAREL arch/x86/boot/compressed/vmlinux error: arch/x86/boot/compressed/pagetable.o has data relocations! make[2]: *** [arch/x86/boot/compressed/vmlinux] Error 1 ... A clean build shows: ... CC arch/x86/boot/compressed/pagetable.o DATAREL arch/x86/boot/compressed/vmlinux LD arch/x86/boot/compressed/vmlinux ... Suggested-by: Ingo Molnar Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Baoquan He Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: H.J. Lu Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1464216334-17200-2-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/Makefile | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'arch') diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index f1356889204e..536ccfcc01c6 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -85,7 +85,25 @@ vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \ $(objtree)/drivers/firmware/efi/libstub/lib.a vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o +# The compressed kernel is built with -fPIC/-fPIE so that a boot loader +# can place it anywhere in memory and it will still run. However, since +# it is executed as-is without any ELF relocation processing performed +# (and has already had all relocation sections stripped from the binary), +# none of the code can use data relocations (e.g. static assignments of +# pointer values), since they will be meaningless at runtime. This check +# will refuse to link the vmlinux if any of these relocations are found. +quiet_cmd_check_data_rel = DATAREL $@ +define cmd_check_data_rel + for obj in $(filter %.o,$^); do \ + readelf -S $$obj | grep -qF .rel.local && { \ + echo "error: $$obj has data relocations!" >&2; \ + exit 1; \ + } || true; \ + done +endef + $(obj)/vmlinux: $(vmlinux-objs-y) FORCE + $(call if_changed,check_data_rel) $(call if_changed,ld) OBJCOPYFLAGS_vmlinux.bin := -R .comment -S -- cgit v1.2.3 From 11fdf97a3cd1a5a27625f820ceb74e1caba4fd26 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 25 May 2016 15:45:31 -0700 Subject: x86/KASLR: Clarify identity map interface This extracts the call to prepare_level4() into a top-level function that the user of the pagetable.c interface must call to initialize the new page tables. For clarity and to match the "finalize" function, it has been renamed to initialize_identity_maps(). This function also gains the initialization of mapping_info so we don't have to do it each time in add_identity_map(). Additionally add copyright notice to the top, to make it clear that the bulk of the pagetable.c code was written by Yinghai, and that I just added bugs later. :) Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Baoquan He Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: H.J. Lu Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1464216334-17200-3-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 3 +++ arch/x86/boot/compressed/misc.h | 3 +++ arch/x86/boot/compressed/pagetable.c | 26 ++++++++++++++++---------- 3 files changed, 22 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index dff42177cb0c..54037c9f2def 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -478,6 +478,9 @@ unsigned char *choose_random_location(unsigned long input, boot_params->hdr.loadflags |= KASLR_FLAG; + /* Prepare to add new identity pagetables on demand. */ + initialize_identity_maps(); + /* Record the various known unsafe memory ranges. */ mem_avoid_init(input, input_size, output); diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index b6fec1ff10e4..09c4ddd02ac6 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -85,10 +85,13 @@ unsigned char *choose_random_location(unsigned long input_ptr, #endif #ifdef CONFIG_X86_64 +void initialize_identity_maps(void); void add_identity_map(unsigned long start, unsigned long size); void finalize_identity_maps(void); extern unsigned char _pgtable[]; #else +static inline void initialize_identity_maps(void) +{ } static inline void add_identity_map(unsigned long start, unsigned long size) { } static inline void finalize_identity_maps(void) diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c index 34b95df14e69..6e31a6aac4d3 100644 --- a/arch/x86/boot/compressed/pagetable.c +++ b/arch/x86/boot/compressed/pagetable.c @@ -2,6 +2,9 @@ * This code is used on x86_64 to create page table identity mappings on * demand by building up a new set of page tables (or appending to the * existing ones), and then switching over to them when ready. + * + * Copyright (C) 2015-2016 Yinghai Lu + * Copyright (C) 2016 Kees Cook */ /* @@ -59,9 +62,21 @@ static struct alloc_pgt_data pgt_data; /* The top level page table entry pointer. */ static unsigned long level4p; +/* + * Mapping information structure passed to kernel_ident_mapping_init(). + * Due to relocation, pointers must be assigned at run time not build time. + */ +static struct x86_mapping_info mapping_info = { + .pmd_flag = __PAGE_KERNEL_LARGE_EXEC, +}; + /* Locates and clears a region for a new top level page table. */ -static void prepare_level4(void) +void initialize_identity_maps(void) { + /* Init mapping_info with run-time function/buffer pointers. */ + mapping_info.alloc_pgt_page = alloc_pgt_page; + mapping_info.context = &pgt_data; + /* * It should be impossible for this not to already be true, * but since calling this a second time would rewind the other @@ -96,17 +111,8 @@ static void prepare_level4(void) */ void add_identity_map(unsigned long start, unsigned long size) { - struct x86_mapping_info mapping_info = { - .alloc_pgt_page = alloc_pgt_page, - .context = &pgt_data, - .pmd_flag = __PAGE_KERNEL_LARGE_EXEC, - }; unsigned long end = start + size; - /* Make sure we have a top level page table ready to use. */ - if (!level4p) - prepare_level4(); - /* Align boundary to 2M. */ start = round_down(start, PMD_SIZE); end = round_up(end, PMD_SIZE); -- cgit v1.2.3 From 8391c73c96f28d4e8c40fd401fd0c9c04391b44a Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 25 May 2016 15:45:32 -0700 Subject: x86/KASLR: Randomize virtual address separately The current KASLR implementation randomizes the physical and virtual addresses of the kernel together (both are offset by the same amount). It calculates the delta of the physical address where vmlinux was linked to load and where it is finally loaded. If the delta is not equal to 0 (i.e. the kernel was relocated), relocation handling needs be done. On 64-bit, this patch randomizes both the physical address where kernel is decompressed and the virtual address where kernel text is mapped and will execute from. We now have two values being chosen, so the function arguments are reorganized to pass by pointer so they can be directly updated. Since relocation handling only depends on the virtual address, we must check the virtual delta, not the physical delta for processing kernel relocations. This also populates the page table for the new virtual address range. 32-bit does not support a separate virtual address, so it continues to use the physical offset for its virtual offset. Additionally updates the sanity checks done on the resulting kernel addresses since they are potentially separate now. [kees: rewrote changelog, limited virtual split to 64-bit only, update checks] [kees: fix CONFIG_RANDOMIZE_BASE=n boot failure] Signed-off-by: Baoquan He Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: H.J. Lu Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1464216334-17200-4-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 41 +++++++++++++++++---------------- arch/x86/boot/compressed/misc.c | 49 ++++++++++++++++++++++++---------------- arch/x86/boot/compressed/misc.h | 22 ++++++++++-------- 3 files changed, 64 insertions(+), 48 deletions(-) (limited to 'arch') diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 54037c9f2def..5550546916be 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -463,17 +463,20 @@ static unsigned long find_random_virt_addr(unsigned long minimum, * Since this function examines addresses much more numerically, * it takes the input and output pointers as 'unsigned long'. */ -unsigned char *choose_random_location(unsigned long input, - unsigned long input_size, - unsigned long output, - unsigned long output_size) +void choose_random_location(unsigned long input, + unsigned long input_size, + unsigned long *output, + unsigned long output_size, + unsigned long *virt_addr) { - unsigned long choice = output; unsigned long random_addr; + /* By default, keep output position unchanged. */ + *virt_addr = *output; + if (cmdline_find_option_bool("nokaslr")) { warn("KASLR disabled: 'nokaslr' on cmdline."); - goto out; + return; } boot_params->hdr.loadflags |= KASLR_FLAG; @@ -482,25 +485,25 @@ unsigned char *choose_random_location(unsigned long input, initialize_identity_maps(); /* Record the various known unsafe memory ranges. */ - mem_avoid_init(input, input_size, output); + mem_avoid_init(input, input_size, *output); /* Walk e820 and find a random address. */ - random_addr = find_random_phys_addr(output, output_size); + random_addr = find_random_phys_addr(*output, output_size); if (!random_addr) { warn("KASLR disabled: could not find suitable E820 region!"); - goto out; + } else { + /* Update the new physical address location. */ + if (*output != random_addr) { + add_identity_map(random_addr, output_size); + *output = random_addr; + } } - /* Always enforce the minimum. */ - if (random_addr < choice) - goto out; - - choice = random_addr; - - add_identity_map(choice, output_size); - /* This actually loads the identity pagetable on x86_64. */ finalize_identity_maps(); -out: - return (unsigned char *)choice; + + /* Pick random virtual address starting from LOAD_PHYSICAL_ADDR. */ + if (IS_ENABLED(CONFIG_X86_64)) + random_addr = find_random_virt_addr(LOAD_PHYSICAL_ADDR, output_size); + *virt_addr = random_addr; } diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index f14db4e21654..b3c5a5f030ce 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -170,7 +170,8 @@ void __puthex(unsigned long value) } #if CONFIG_X86_NEED_RELOCS -static void handle_relocations(void *output, unsigned long output_len) +static void handle_relocations(void *output, unsigned long output_len, + unsigned long virt_addr) { int *reloc; unsigned long delta, map, ptr; @@ -182,11 +183,6 @@ static void handle_relocations(void *output, unsigned long output_len) * and where it was actually loaded. */ delta = min_addr - LOAD_PHYSICAL_ADDR; - if (!delta) { - debug_putstr("No relocation needed... "); - return; - } - debug_putstr("Performing relocations... "); /* * The kernel contains a table of relocation addresses. Those @@ -197,6 +193,20 @@ static void handle_relocations(void *output, unsigned long output_len) */ map = delta - __START_KERNEL_map; + /* + * 32-bit always performs relocations. 64-bit relocations are only + * needed if KASLR has chosen a different starting address offset + * from __START_KERNEL_map. + */ + if (IS_ENABLED(CONFIG_X86_64)) + delta = virt_addr - LOAD_PHYSICAL_ADDR; + + if (!delta) { + debug_putstr("No relocation needed... "); + return; + } + debug_putstr("Performing relocations... "); + /* * Process relocations: 32 bit relocations first then 64 bit after. * Three sets of binary relocations are added to the end of the kernel @@ -250,7 +260,8 @@ static void handle_relocations(void *output, unsigned long output_len) #endif } #else -static inline void handle_relocations(void *output, unsigned long output_len) +static inline void handle_relocations(void *output, unsigned long output_len, + unsigned long virt_addr) { } #endif @@ -327,7 +338,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, unsigned long output_len) { const unsigned long kernel_total_size = VO__end - VO__text; - unsigned char *output_orig = output; + unsigned long virt_addr = (unsigned long)output; /* Retain x86 boot parameters pointer passed from startup_32/64. */ boot_params = rmode; @@ -366,13 +377,16 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, * the entire decompressed kernel plus relocation table, or the * entire decompressed kernel plus .bss and .brk sections. */ - output = choose_random_location((unsigned long)input_data, input_len, - (unsigned long)output, - max(output_len, kernel_total_size)); + choose_random_location((unsigned long)input_data, input_len, + (unsigned long *)&output, + max(output_len, kernel_total_size), + &virt_addr); /* Validate memory location choices. */ if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1)) - error("Destination address inappropriately aligned"); + error("Destination physical address inappropriately aligned"); + if (virt_addr & (MIN_KERNEL_ALIGN - 1)) + error("Destination virtual address inappropriately aligned"); #ifdef CONFIG_X86_64 if (heap > 0x3fffffffffffUL) error("Destination address too large"); @@ -382,19 +396,16 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, #endif #ifndef CONFIG_RELOCATABLE if ((unsigned long)output != LOAD_PHYSICAL_ADDR) - error("Wrong destination address"); + error("Destination address does not match LOAD_PHYSICAL_ADDR"); + if ((unsigned long)output != virt_addr) + error("Destination virtual address changed when not relocatable"); #endif debug_putstr("\nDecompressing Linux... "); __decompress(input_data, input_len, NULL, NULL, output, output_len, NULL, error); parse_elf(output); - /* - * 32-bit always performs relocations. 64-bit relocations are only - * needed if kASLR has chosen a different load address. - */ - if (!IS_ENABLED(CONFIG_X86_64) || output != output_orig) - handle_relocations(output, output_len); + handle_relocations(output, output_len, virt_addr); debug_putstr("done.\nBooting the kernel.\n"); return output; } diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 09c4ddd02ac6..1c8355eadbd1 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -67,20 +67,22 @@ int cmdline_find_option_bool(const char *option); #if CONFIG_RANDOMIZE_BASE /* kaslr.c */ -unsigned char *choose_random_location(unsigned long input_ptr, - unsigned long input_size, - unsigned long output_ptr, - unsigned long output_size); +void choose_random_location(unsigned long input, + unsigned long input_size, + unsigned long *output, + unsigned long output_size, + unsigned long *virt_addr); /* cpuflags.c */ bool has_cpuflag(int flag); #else -static inline -unsigned char *choose_random_location(unsigned long input_ptr, - unsigned long input_size, - unsigned long output_ptr, - unsigned long output_size) +static inline void choose_random_location(unsigned long input, + unsigned long input_size, + unsigned long *output, + unsigned long output_size, + unsigned long *virt_addr) { - return (unsigned char *)output_ptr; + /* No change from existing output location. */ + *virt_addr = *output; } #endif -- cgit v1.2.3 From ed9f007ee68478f6a50ec9971ade25a0129a5c0e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 25 May 2016 15:45:33 -0700 Subject: x86/KASLR: Extend kernel image physical address randomization to addresses larger than 4G We want the physical address to be randomized anywhere between 16MB and the top of physical memory (up to 64TB). This patch exchanges the prior slots[] array for the new slot_areas[] array, and lifts the limitation of KERNEL_IMAGE_SIZE on the physical address offset for 64-bit. As before, process_e820_entry() walks memory and populates slot_areas[], splitting on any detected mem_avoid collisions. Finally, since the slots[] array and its associated functions are not needed any more, so they are removed. Based on earlier patches by Baoquan He. Originally-from: Baoquan He Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Baoquan He Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: H.J. Lu Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1464216334-17200-5-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 27 +++++---- arch/x86/boot/compressed/kaslr.c | 115 +++++++++++++++++++++++---------------- 2 files changed, 85 insertions(+), 57 deletions(-) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0a7b885964ba..770ae5259dff 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1934,21 +1934,26 @@ config RANDOMIZE_BASE attempts relying on knowledge of the location of kernel code internals. - The kernel physical and virtual address can be randomized - from 16MB up to 1GB on 64-bit and 512MB on 32-bit. (Note that - using RANDOMIZE_BASE reduces the memory space available to - kernel modules from 1.5GB to 1GB.) + On 64-bit, the kernel physical and virtual addresses are + randomized separately. The physical address will be anywhere + between 16MB and the top of physical memory (up to 64TB). The + virtual address will be randomized from 16MB up to 1GB (9 bits + of entropy). Note that this also reduces the memory space + available to kernel modules from 1.5GB to 1GB. + + On 32-bit, the kernel physical and virtual addresses are + randomized together. They will be randomized from 16MB up to + 512MB (8 bits of entropy). Entropy is generated using the RDRAND instruction if it is supported. If RDTSC is supported, its value is mixed into the entropy pool as well. If neither RDRAND nor RDTSC are - supported, then entropy is read from the i8254 timer. - - Since the kernel is built using 2GB addressing, and - PHYSICAL_ALIGN must be at a minimum of 2MB, only 10 bits of - entropy is theoretically possible. Currently, with the - default value for PHYSICAL_ALIGN and due to page table - layouts, 64-bit uses 9 bits of entropy and 32-bit uses 8 bits. + supported, then entropy is read from the i8254 timer. The + usable entropy is limited by the kernel being built using + 2GB addressing, and that PHYSICAL_ALIGN must be at a + minimum of 2MB. As a result, only 10 bits of entropy are + theoretically possible, but the implementations are further + limited due to memory layouts. If CONFIG_HIBERNATE is also enabled, KASLR is disabled at boot time. To enable it, boot with "kaslr" on the kernel command diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 5550546916be..36e28112523a 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -132,17 +132,6 @@ enum mem_avoid_index { static struct mem_vector mem_avoid[MEM_AVOID_MAX]; -static bool mem_contains(struct mem_vector *region, struct mem_vector *item) -{ - /* Item at least partially before region. */ - if (item->start < region->start) - return false; - /* Item at least partially after region. */ - if (item->start + item->size > region->start + region->size) - return false; - return true; -} - static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two) { /* Item one is entirely before item two. */ @@ -319,8 +308,6 @@ static bool mem_avoid_overlap(struct mem_vector *img, return is_overlapping; } -static unsigned long slots[KERNEL_IMAGE_SIZE / CONFIG_PHYSICAL_ALIGN]; - struct slot_area { unsigned long addr; int num; @@ -351,36 +338,44 @@ static void store_slot_info(struct mem_vector *region, unsigned long image_size) } } -static void slots_append(unsigned long addr) -{ - /* Overflowing the slots list should be impossible. */ - if (slot_max >= KERNEL_IMAGE_SIZE / CONFIG_PHYSICAL_ALIGN) - return; - - slots[slot_max++] = addr; -} - static unsigned long slots_fetch_random(void) { + unsigned long slot; + int i; + /* Handle case of no slots stored. */ if (slot_max == 0) return 0; - return slots[get_random_long("Physical") % slot_max]; + slot = get_random_long("Physical") % slot_max; + + for (i = 0; i < slot_area_index; i++) { + if (slot >= slot_areas[i].num) { + slot -= slot_areas[i].num; + continue; + } + return slot_areas[i].addr + slot * CONFIG_PHYSICAL_ALIGN; + } + + if (i == slot_area_index) + debug_putstr("slots_fetch_random() failed!?\n"); + return 0; } static void process_e820_entry(struct e820entry *entry, unsigned long minimum, unsigned long image_size) { - struct mem_vector region, img, overlap; + struct mem_vector region, overlap; + struct slot_area slot_area; + unsigned long start_orig; /* Skip non-RAM entries. */ if (entry->type != E820_RAM) return; - /* Ignore entries entirely above our maximum. */ - if (entry->addr >= KERNEL_IMAGE_SIZE) + /* On 32-bit, ignore entries entirely above our maximum. */ + if (IS_ENABLED(CONFIG_X86_32) && entry->addr >= KERNEL_IMAGE_SIZE) return; /* Ignore entries entirely below our minimum. */ @@ -390,31 +385,55 @@ static void process_e820_entry(struct e820entry *entry, region.start = entry->addr; region.size = entry->size; - /* Potentially raise address to minimum location. */ - if (region.start < minimum) - region.start = minimum; + /* Give up if slot area array is full. */ + while (slot_area_index < MAX_SLOT_AREA) { + start_orig = region.start; - /* Potentially raise address to meet alignment requirements. */ - region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN); + /* Potentially raise address to minimum location. */ + if (region.start < minimum) + region.start = minimum; - /* Did we raise the address above the bounds of this e820 region? */ - if (region.start > entry->addr + entry->size) - return; + /* Potentially raise address to meet alignment needs. */ + region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN); - /* Reduce size by any delta from the original address. */ - region.size -= region.start - entry->addr; + /* Did we raise the address above this e820 region? */ + if (region.start > entry->addr + entry->size) + return; - /* Reduce maximum size to fit end of image within maximum limit. */ - if (region.start + region.size > KERNEL_IMAGE_SIZE) - region.size = KERNEL_IMAGE_SIZE - region.start; + /* Reduce size by any delta from the original address. */ + region.size -= region.start - start_orig; - /* Walk each aligned slot and check for avoided areas. */ - for (img.start = region.start, img.size = image_size ; - mem_contains(®ion, &img) ; - img.start += CONFIG_PHYSICAL_ALIGN) { - if (mem_avoid_overlap(&img, &overlap)) - continue; - slots_append(img.start); + /* On 32-bit, reduce region size to fit within max size. */ + if (IS_ENABLED(CONFIG_X86_32) && + region.start + region.size > KERNEL_IMAGE_SIZE) + region.size = KERNEL_IMAGE_SIZE - region.start; + + /* Return if region can't contain decompressed kernel */ + if (region.size < image_size) + return; + + /* If nothing overlaps, store the region and return. */ + if (!mem_avoid_overlap(®ion, &overlap)) { + store_slot_info(®ion, image_size); + return; + } + + /* Store beginning of region if holds at least image_size. */ + if (overlap.start > region.start + image_size) { + struct mem_vector beginning; + + beginning.start = region.start; + beginning.size = overlap.start - region.start; + store_slot_info(&beginning, image_size); + } + + /* Return if overlap extends to or past end of region. */ + if (overlap.start + overlap.size >= region.start + region.size) + return; + + /* Clip off the overlapping region and start over. */ + region.size -= overlap.start - region.start + overlap.size; + region.start = overlap.start + overlap.size; } } @@ -431,6 +450,10 @@ static unsigned long find_random_phys_addr(unsigned long minimum, for (i = 0; i < boot_params->e820_entries; i++) { process_e820_entry(&boot_params->e820_map[i], minimum, image_size); + if (slot_area_index == MAX_SLOT_AREA) { + debug_putstr("Aborted e820 scan (slot_areas full)!\n"); + break; + } } return slots_fetch_random(); -- cgit v1.2.3 From e066cc47776a89bbdaf4184c0e75f7d389f9ab48 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 25 May 2016 15:45:34 -0700 Subject: x86/KASLR: Allow randomization below the load address Currently the kernel image physical address randomization's lower boundary is the original kernel load address. For bootloaders that load kernels into very high memory (e.g. kexec), this means randomization takes place in a very small window at the top of memory, ignoring the large region of physical memory below the load address. Since mem_avoid[] is already correctly tracking the regions that must be avoided, this patch changes the minimum address to whatever is less: 512M (to conservatively avoid unknown things in lower memory) or the load address. Now, for example, if the kernel is loaded at 8G, [512M, 8G) will be added to the list of possible physical memory positions. Signed-off-by: Yinghai Lu [ Rewrote the changelog, refactored the code to use min(). ] Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Baoquan He Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: H.J. Lu Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1464216334-17200-6-git-send-email-keescook@chromium.org [ Edited the changelog some more, plus the code comment as well. ] Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 36e28112523a..749c9e00c674 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -492,7 +492,7 @@ void choose_random_location(unsigned long input, unsigned long output_size, unsigned long *virt_addr) { - unsigned long random_addr; + unsigned long random_addr, min_addr; /* By default, keep output position unchanged. */ *virt_addr = *output; @@ -510,8 +510,15 @@ void choose_random_location(unsigned long input, /* Record the various known unsafe memory ranges. */ mem_avoid_init(input, input_size, *output); + /* + * Low end of the randomization range should be the + * smaller of 512M or the initial kernel image + * location: + */ + min_addr = min(*output, 512UL << 20); + /* Walk e820 and find a random address. */ - random_addr = find_random_phys_addr(*output, output_size); + random_addr = find_random_phys_addr(min_addr, output_size); if (!random_addr) { warn("KASLR disabled: could not find suitable E820 region!"); } else { -- cgit v1.2.3 From f09509b9398b23ca53360ca57106555698ec2e93 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Tue, 21 Jun 2016 11:31:10 -0700 Subject: perf/x86/intel: Print LBR support statement after validation The following commit: 338b522ca43c ("perf/x86/intel: Protect LBR and extra_regs against KVM lying") added an additional test to LBR support detection that is performed after printing the LBR support statement to dmesg. Move the LBR support output after the very last test, to make sure we print the true status of LBR support. Signed-off-by: David Carrillo-Cisneros Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Stephane Eranian Reviewed-by: Andi Kleen Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1466533874-52003-2-git-send-email-davidcc@google.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 2 ++ arch/x86/events/intel/lbr.c | 9 --------- 2 files changed, 2 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 3ed528c2370c..61a027b694a3 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3958,6 +3958,8 @@ __init int intel_pmu_init(void) x86_pmu.lbr_nr = 0; } + if (x86_pmu.lbr_nr) + pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr); /* * Access extra MSR may cause #GP under certain circumstances. * E.g. KVM doesn't support offcore event diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 9e2b40cdb05f..2dca66cec617 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -956,7 +956,6 @@ void __init intel_pmu_lbr_init_core(void) * SW branch filter usage: * - compensate for lack of HW filter */ - pr_cont("4-deep LBR, "); } /* nehalem/westmere */ @@ -977,7 +976,6 @@ void __init intel_pmu_lbr_init_nhm(void) * That requires LBR_FAR but that means far * jmp need to be filtered out */ - pr_cont("16-deep LBR, "); } /* sandy bridge */ @@ -997,7 +995,6 @@ void __init intel_pmu_lbr_init_snb(void) * That requires LBR_FAR but that means far * jmp need to be filtered out */ - pr_cont("16-deep LBR, "); } /* haswell */ @@ -1010,8 +1007,6 @@ void intel_pmu_lbr_init_hsw(void) x86_pmu.lbr_sel_mask = LBR_SEL_MASK; x86_pmu.lbr_sel_map = hsw_lbr_sel_map; - - pr_cont("16-deep LBR, "); } /* skylake */ @@ -1031,7 +1026,6 @@ __init void intel_pmu_lbr_init_skl(void) * That requires LBR_FAR but that means far * jmp need to be filtered out */ - pr_cont("32-deep LBR, "); } /* atom */ @@ -1057,7 +1051,6 @@ void __init intel_pmu_lbr_init_atom(void) * SW branch filter usage: * - compensate for lack of HW filter */ - pr_cont("8-deep LBR, "); } /* slm */ @@ -1088,6 +1081,4 @@ void intel_pmu_lbr_init_knl(void) x86_pmu.lbr_sel_mask = LBR_SEL_MASK; x86_pmu.lbr_sel_map = snb_lbr_sel_map; - - pr_cont("8-deep LBR, "); } -- cgit v1.2.3 From 19fc9ddd61e059cc45464bdf6e8fa304bb94080f Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Tue, 21 Jun 2016 11:31:11 -0700 Subject: perf/x86/intel: Fix MSR_LAST_BRANCH_FROM_x bug when no TSX Intel's SDM states that bits 61:62 in MSR_LAST_BRANCH_FROM_x are the TSX flags for formats with LBR_TSX flags (i.e. LBR_FORMAT_EIP_EFLAGS2). However, when the CPU has TSX support deactivated, bits 61:62 actually behave as follows: - For wrmsr(), bits 61:62 are considered part of the sign extension. - When capturing branches, the LBR hw will always clear bits 61:62. regardless of the sign extension. Therefore, if: 1) LBR has TSX format. 2) CPU has no TSX support enabled. ... then any value passed to wrmsr() must be sign extended to 63 bits and any value from rdmsr() must be converted to have a sign extension of 61 bits, ignoring the values at TSX flags. This bug was masked by the work-around to the Intel's CPU bug: BJ94. "LBR May Contain Incorrect Information When Using FREEZE_LBRS_ON_PMI" in Document Number: 324643-037US. The aforementioned work-around uses hw flags to filter out all kernel branches, limiting LBR callstack to user level execution only. Since user addresses are not sign extended, they do not trigger the wrmsr() bug in MSR_LAST_BRANCH_FROM_x when saved/restored at context switch. To verify the hw bug: $ perf record -b -e cycles sleep 1 $ rdmsr -p 0 0x680 0x1fffffffb0b9b0cc $ wrmsr -p 0 0x680 0x1fffffffb0b9b0cc write(): Input/output error The quirk for LBR_FROM_ MSRs is required before calls to wrmsrl() and after rdmsrl(). This patch introduces it for wrmsrl()'s done for testing LBR support. Future patch in series adds the quirk for context switch, that would be required if LBR callstack is to be enabled for ring 0. Signed-off-by: David Carrillo-Cisneros Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Stephane Eranian Reviewed-by: Andi Kleen Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1466533874-52003-3-git-send-email-davidcc@google.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 18 +++++++++++++++ arch/x86/events/intel/lbr.c | 52 ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/events/perf_event.h | 2 ++ 3 files changed, 72 insertions(+) (limited to 'arch') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 61a027b694a3..3eccc42e2d88 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3361,6 +3361,13 @@ static void intel_snb_check_microcode(void) } } +static bool is_lbr_from(unsigned long msr) +{ + unsigned long lbr_from_nr = x86_pmu.lbr_from + x86_pmu.lbr_nr; + + return x86_pmu.lbr_from <= msr && msr < lbr_from_nr; +} + /* * Under certain circumstances, access certain MSR may cause #GP. * The function tests if the input MSR can be safely accessed. @@ -3381,13 +3388,24 @@ static bool check_msr(unsigned long msr, u64 mask) * Only change the bits which can be updated by wrmsrl. */ val_tmp = val_old ^ mask; + + if (is_lbr_from(msr)) + val_tmp = lbr_from_signext_quirk_wr(val_tmp); + if (wrmsrl_safe(msr, val_tmp) || rdmsrl_safe(msr, &val_new)) return false; + /* + * Quirk only affects validation in wrmsr(), so wrmsrl()'s value + * should equal rdmsrl()'s even with the quirk. + */ if (val_new != val_tmp) return false; + if (is_lbr_from(msr)) + val_old = lbr_from_signext_quirk_wr(val_old); + /* Here it's sure that the MSR can be safely accessed. * Restore the old value and return. */ diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 2dca66cec617..88093e0915a9 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -81,6 +81,8 @@ static enum { #define LBR_FROM_FLAG_IN_TX (1ULL << 62) #define LBR_FROM_FLAG_ABORT (1ULL << 61) +#define LBR_FROM_SIGNEXT_2MSB (BIT_ULL(60) | BIT_ULL(59)) + /* * x86control flow change classification * x86control flow changes include branches, interrupts, traps, faults @@ -235,6 +237,53 @@ enum { LBR_VALID, }; +/* + * For formats with LBR_TSX flags (e.g. LBR_FORMAT_EIP_FLAGS2), bits 61:62 in + * MSR_LAST_BRANCH_FROM_x are the TSX flags when TSX is supported, but when + * TSX is not supported they have no consistent behavior: + * + * - For wrmsr(), bits 61:62 are considered part of the sign extension. + * - For HW updates (branch captures) bits 61:62 are always OFF and are not + * part of the sign extension. + * + * Therefore, if: + * + * 1) LBR has TSX format + * 2) CPU has no TSX support enabled + * + * ... then any value passed to wrmsr() must be sign extended to 63 bits and any + * value from rdmsr() must be converted to have a 61 bits sign extension, + * ignoring the TSX flags. + */ +static inline bool lbr_from_signext_quirk_needed(void) +{ + int lbr_format = x86_pmu.intel_cap.lbr_format; + bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) || + boot_cpu_has(X86_FEATURE_RTM); + + return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX); +} + +DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key); + +/* If quirk is enabled, ensure sign extension is 63 bits: */ +inline u64 lbr_from_signext_quirk_wr(u64 val) +{ + if (static_branch_unlikely(&lbr_from_quirk_key)) { + /* + * Sign extend into bits 61:62 while preserving bit 63. + * + * Quirk is enabled when TSX is disabled. Therefore TSX bits + * in val are always OFF and must be changed to be sign + * extension bits. Since bits 59:60 are guaranteed to be + * part of the sign extension bits, we can just copy them + * to 61:62. + */ + val |= (LBR_FROM_SIGNEXT_2MSB & val) << 2; + } + return val; +} + static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) { int i; @@ -1007,6 +1056,9 @@ void intel_pmu_lbr_init_hsw(void) x86_pmu.lbr_sel_mask = LBR_SEL_MASK; x86_pmu.lbr_sel_map = hsw_lbr_sel_map; + + if (lbr_from_signext_quirk_needed()) + static_branch_enable(&lbr_from_quirk_key); } /* skylake */ diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index e2d7285a2dac..8c4a47706296 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -902,6 +902,8 @@ void intel_ds_init(void); void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in); +u64 lbr_from_signext_quirk_wr(u64 val); + void intel_pmu_lbr_reset(void); void intel_pmu_lbr_enable(struct perf_event *event); -- cgit v1.2.3 From 3812bba84f3d721ff7dc3bb90360bc5ed6771994 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Tue, 21 Jun 2016 11:31:12 -0700 Subject: perf/x86/intel: Fix trivial formatting and style bug Replace spaces by tabs in LBR_FROM_* constants to align with newly defined constant. Use BIT_ULL. Signed-off-by: David Carrillo-Cisneros Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Stephane Eranian Reviewed-by: Andi Kleen Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1466533874-52003-4-git-send-email-davidcc@google.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/lbr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 88093e0915a9..0da0eb0d875d 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -77,9 +77,9 @@ static enum { LBR_IND_JMP |\ LBR_FAR) -#define LBR_FROM_FLAG_MISPRED (1ULL << 63) -#define LBR_FROM_FLAG_IN_TX (1ULL << 62) -#define LBR_FROM_FLAG_ABORT (1ULL << 61) +#define LBR_FROM_FLAG_MISPRED BIT_ULL(63) +#define LBR_FROM_FLAG_IN_TX BIT_ULL(62) +#define LBR_FROM_FLAG_ABORT BIT_ULL(61) #define LBR_FROM_SIGNEXT_2MSB (BIT_ULL(60) | BIT_ULL(59)) -- cgit v1.2.3 From 71adae99ed187de9fcf988cc8873ee2c3af3385f Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Tue, 21 Jun 2016 11:31:13 -0700 Subject: perf/x86/intel: Add MSR_LAST_BRANCH_FROM_x quirk for ctx switch Add quirk for context switch to save/restore the value of MSR_LAST_BRANCH_FROM_x when LBR is enabled and there is potential for kernel addresses to be in the lbr_from register. To test this patch, use a perf tool and kernel with the patch next in this series. That patch removes the work around that masked the hw bug: $ ./lbr_perf record --call-graph lbr -e cycles:k sleep 1 where lbr_perf is the patched perf tool, that allows to specify :k on lbr mode. The above command will trigger a #GPF : WARNING: CPU: 28 PID: 14096 at arch/x86/mm/extable.c:65 ex_handler_wrmsr_unsafe+0x70/0x80 unchecked MSR access error: WRMSR to 0x681 (tried to write 0x1fffffff81010794) ... Call Trace: [] dump_stack+0x4d/0x63 [] __warn+0xe5/0x100 [] warn_slowpath_fmt+0x49/0x50 [] ex_handler_wrmsr_unsafe+0x70/0x80 [] fixup_exception+0x42/0x50 [] do_general_protection+0x8a/0x160 [] general_protection+0x22/0x30 [] ? intel_pmu_lbr_sched_task+0xc9/0x380 [] intel_pmu_sched_task+0x3c/0x60 [] x86_pmu_sched_task+0x1b/0x20 [] perf_pmu_sched_task+0x6b/0xb0 [] __perf_event_task_sched_in+0x7d/0x150 [] finish_task_switch+0x15c/0x200 [] __schedule+0x274/0x6cc [] schedule+0x39/0x90 [] exit_to_usermode_loop+0x39/0x89 [] prepare_exit_to_usermode+0x2e/0x30 [] retint_user+0x8/0x10 Signed-off-by: David Carrillo-Cisneros Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Stephane Eranian Reviewed-by: Andi Kleen Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1466533874-52003-5-git-send-email-davidcc@google.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/lbr.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 0da0eb0d875d..52bef15c7615 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -284,6 +284,20 @@ inline u64 lbr_from_signext_quirk_wr(u64 val) return val; } +/* + * If quirk is needed, ensure sign extension is 61 bits: + */ +u64 lbr_from_signext_quirk_rd(u64 val) +{ + if (static_branch_unlikely(&lbr_from_quirk_key)) + /* + * Quirk is on when TSX is not enabled. Therefore TSX + * flags must be read as OFF. + */ + val &= ~(LBR_FROM_FLAG_IN_TX | LBR_FROM_FLAG_ABORT); + return val; +} + static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) { int i; @@ -300,7 +314,8 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) tos = task_ctx->tos; for (i = 0; i < tos; i++) { lbr_idx = (tos - i) & mask; - wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]); + wrmsrl(x86_pmu.lbr_from + lbr_idx, + lbr_from_signext_quirk_wr(task_ctx->lbr_from[i])); wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]); if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]); @@ -313,7 +328,7 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx) { int i; unsigned lbr_idx, mask; - u64 tos; + u64 tos, val; if (task_ctx->lbr_callstack_users == 0) { task_ctx->lbr_stack_state = LBR_NONE; @@ -324,7 +339,8 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx) tos = intel_pmu_lbr_tos(); for (i = 0; i < tos; i++) { lbr_idx = (tos - i) & mask; - rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]); + rdmsrl(x86_pmu.lbr_from + lbr_idx, val); + task_ctx->lbr_from[i] = lbr_from_signext_quirk_rd(val); rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]); if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]); @@ -502,6 +518,8 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) int lbr_flags = lbr_desc[lbr_format]; rdmsrl(x86_pmu.lbr_from + lbr_idx, from); + from = lbr_from_signext_quirk_rd(from); + rdmsrl(x86_pmu.lbr_to + lbr_idx, to); if (lbr_format == LBR_FORMAT_INFO && need_info) { -- cgit v1.2.3 From d4cf1949f9689314aef962eea95df84a8288d097 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 23 Jun 2016 10:44:49 +0200 Subject: perf/x86/intel: Add {rd,wr}lbr_{to,from} wrappers The whole rdmsr()/wrmsr() for lbr_from got a little unweildy with the sign extension quirk, provide a few simple wrappers to clean things up. Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: David Carrillo-Cisneros Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Signed-off-by: Ingo Molnar --- arch/x86/events/intel/lbr.c | 53 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 52bef15c7615..cc4555a9e876 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -289,12 +289,42 @@ inline u64 lbr_from_signext_quirk_wr(u64 val) */ u64 lbr_from_signext_quirk_rd(u64 val) { - if (static_branch_unlikely(&lbr_from_quirk_key)) + if (static_branch_unlikely(&lbr_from_quirk_key)) { /* * Quirk is on when TSX is not enabled. Therefore TSX * flags must be read as OFF. */ val &= ~(LBR_FROM_FLAG_IN_TX | LBR_FROM_FLAG_ABORT); + } + return val; +} + +static inline void wrlbr_from(unsigned int idx, u64 val) +{ + val = lbr_from_signext_quirk_wr(val); + wrmsrl(x86_pmu.lbr_from + idx, val); +} + +static inline void wrlbr_to(unsigned int idx, u64 val) +{ + wrmsrl(x86_pmu.lbr_to + idx, val); +} + +static inline u64 rdlbr_from(unsigned int idx) +{ + u64 val; + + rdmsrl(x86_pmu.lbr_from + idx, val); + + return lbr_from_signext_quirk_rd(val); +} + +static inline u64 rdlbr_to(unsigned int idx) +{ + u64 val; + + rdmsrl(x86_pmu.lbr_from + idx, val); + return val; } @@ -314,9 +344,9 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) tos = task_ctx->tos; for (i = 0; i < tos; i++) { lbr_idx = (tos - i) & mask; - wrmsrl(x86_pmu.lbr_from + lbr_idx, - lbr_from_signext_quirk_wr(task_ctx->lbr_from[i])); - wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]); + wrlbr_from(lbr_idx, task_ctx->lbr_from[i]); + wrlbr_to (lbr_idx, task_ctx->lbr_to[i]); + if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]); } @@ -326,9 +356,9 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx) { - int i; unsigned lbr_idx, mask; - u64 tos, val; + u64 tos; + int i; if (task_ctx->lbr_callstack_users == 0) { task_ctx->lbr_stack_state = LBR_NONE; @@ -339,9 +369,8 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx) tos = intel_pmu_lbr_tos(); for (i = 0; i < tos; i++) { lbr_idx = (tos - i) & mask; - rdmsrl(x86_pmu.lbr_from + lbr_idx, val); - task_ctx->lbr_from[i] = lbr_from_signext_quirk_rd(val); - rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]); + task_ctx->lbr_from[i] = rdlbr_from(lbr_idx); + task_ctx->lbr_to[i] = rdlbr_to(lbr_idx); if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]); } @@ -517,10 +546,8 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) u16 cycles = 0; int lbr_flags = lbr_desc[lbr_format]; - rdmsrl(x86_pmu.lbr_from + lbr_idx, from); - from = lbr_from_signext_quirk_rd(from); - - rdmsrl(x86_pmu.lbr_to + lbr_idx, to); + from = rdlbr_from(lbr_idx); + to = rdlbr_to(lbr_idx); if (lbr_format == LBR_FORMAT_INFO && need_info) { u64 info; -- cgit v1.2.3 From dbf984d825935f61965bcfacfd8e8dfdaf3e8051 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 25 Jun 2016 13:24:57 +0200 Subject: x86/boot/64: Add forgotten end of function marker Add secondary_startup_64()'s ENDPROC() marker. No functionality change. Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/20160625112457.16930-1-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_64.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 5df831ef1442..c7920ba69563 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -299,6 +299,7 @@ ENTRY(secondary_startup_64) pushq $__KERNEL_CS # set correct cs pushq %rax # target address in negative space lretq +ENDPROC(secondary_startup_64) #include "verify_cpu.S" -- cgit v1.2.3 From f6d1747f898cfe1fe52e3d18f5c77e5bd21fed9a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 25 Jun 2016 08:20:26 +0100 Subject: x86/efi: Remove unused variable 'efi' Remove unused variable 'efi', it is never used. This fixes the following clang build warning: arch/x86/boot/compressed/eboot.c:803:2: warning: Value stored to 'efi' is never read Signed-off-by: Colin Ian King Signed-off-by: Matt Fleming Cc: Ard Biesheuvel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1466839230-12781-4-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/eboot.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 52fef606bc54..ff574dad95cc 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -757,7 +757,6 @@ struct boot_params *make_boot_params(struct efi_config *c) struct boot_params *boot_params; struct apm_bios_info *bi; struct setup_header *hdr; - struct efi_info *efi; efi_loaded_image_t *image; void *options, *handle; efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID; @@ -800,7 +799,6 @@ struct boot_params *make_boot_params(struct efi_config *c) memset(boot_params, 0x0, 0x4000); hdr = &boot_params->hdr; - efi = &boot_params->efi_info; bi = &boot_params->apm_bios_info; /* Copy the second sector to boot_params */ -- cgit v1.2.3 From 80e75596079f0a41f905836ad0ccaac68ba33612 Mon Sep 17 00:00:00 2001 From: Alex Thorlton Date: Sat, 25 Jun 2016 08:20:27 +0100 Subject: efi: Convert efi_call_virt() to efi_call_virt_pointer() This commit makes a few slight modifications to the efi_call_virt() macro to get it to work with function pointers that are stored in locations other than efi.systab->runtime, and renames the macro to efi_call_virt_pointer(). The majority of the changes here are to pull these macros up into header files so that they can be accessed from outside of drivers/firmware/efi/runtime-wrappers.c. The most significant change not directly related to the code move is to add an extra "p" argument into the appropriate efi_call macros, and use that new argument in place of the, formerly hard-coded, efi.systab->runtime pointer. The last piece of the puzzle was to add an efi_call_virt() macro back into drivers/firmware/efi/runtime-wrappers.c to wrap around the new efi_call_virt_pointer() macro - this was mainly to keep the code from looking too cluttered by adding a bunch of extra references to efi.systab->runtime everywhere. Note that I also broke up the code in the efi_call_virt_pointer() macro a bit in the process of moving it. Signed-off-by: Alex Thorlton Signed-off-by: Matt Fleming Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Dimitri Sivanich Cc: Linus Torvalds Cc: Mark Rutland Cc: Peter Zijlstra Cc: Roy Franz Cc: Russ Anderson Cc: Russell King Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1466839230-12781-5-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/arm/include/asm/efi.h | 4 +-- arch/arm64/include/asm/efi.h | 4 +-- arch/x86/include/asm/efi.h | 9 +++--- drivers/firmware/efi/runtime-wrappers.c | 53 +++++++-------------------------- include/linux/efi.h | 51 +++++++++++++++++++++++++++++++ 5 files changed, 69 insertions(+), 52 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h index a708fa1f0905..766bf9b78160 100644 --- a/arch/arm/include/asm/efi.h +++ b/arch/arm/include/asm/efi.h @@ -28,10 +28,10 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); #define arch_efi_call_virt_setup() efi_virtmap_load() #define arch_efi_call_virt_teardown() efi_virtmap_unload() -#define arch_efi_call_virt(f, args...) \ +#define arch_efi_call_virt(p, f, args...) \ ({ \ efi_##f##_t *__f; \ - __f = efi.systab->runtime->f; \ + __f = p->f; \ __f(args); \ }) diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 622db3c6474e..bd887663689b 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -23,10 +23,10 @@ int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); efi_virtmap_load(); \ }) -#define arch_efi_call_virt(f, args...) \ +#define arch_efi_call_virt(p, f, args...) \ ({ \ efi_##f##_t *__f; \ - __f = efi.systab->runtime->f; \ + __f = p->f; \ __f(args); \ }) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 78d1e7467eae..55b4596ef688 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -41,10 +41,9 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...); /* * Wrap all the virtual calls in a way that forces the parameters on the stack. */ -#define arch_efi_call_virt(f, args...) \ +#define arch_efi_call_virt(p, f, args...) \ ({ \ - ((efi_##f##_t __attribute__((regparm(0)))*) \ - efi.systab->runtime->f)(args); \ + ((efi_##f##_t __attribute__((regparm(0)))*) p->f)(args); \ }) #define efi_ioremap(addr, size, type, attr) ioremap_cache(addr, size) @@ -81,8 +80,8 @@ struct efi_scratch { } \ }) -#define arch_efi_call_virt(f, args...) \ - efi_call((void *)efi.systab->runtime->f, args) \ +#define arch_efi_call_virt(p, f, args...) \ + efi_call((void *)p->f, args) \ #define arch_efi_call_virt_teardown() \ ({ \ diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c index 23bef6bb73ee..41958774cde3 100644 --- a/drivers/firmware/efi/runtime-wrappers.c +++ b/drivers/firmware/efi/runtime-wrappers.c @@ -22,7 +22,16 @@ #include #include -static void efi_call_virt_check_flags(unsigned long flags, const char *call) +/* + * Wrap around the new efi_call_virt_generic() macros so that the + * code doesn't get too cluttered: + */ +#define efi_call_virt(f, args...) \ + efi_call_virt_pointer(efi.systab->runtime, f, args) +#define __efi_call_virt(f, args...) \ + __efi_call_virt_pointer(efi.systab->runtime, f, args) + +void efi_call_virt_check_flags(unsigned long flags, const char *call) { unsigned long cur_flags, mismatch; @@ -38,48 +47,6 @@ static void efi_call_virt_check_flags(unsigned long flags, const char *call) local_irq_restore(flags); } -/* - * Arch code can implement the following three template macros, avoiding - * reptition for the void/non-void return cases of {__,}efi_call_virt: - * - * * arch_efi_call_virt_setup - * - * Sets up the environment for the call (e.g. switching page tables, - * allowing kernel-mode use of floating point, if required). - * - * * arch_efi_call_virt - * - * Performs the call. The last expression in the macro must be the call - * itself, allowing the logic to be shared by the void and non-void - * cases. - * - * * arch_efi_call_virt_teardown - * - * Restores the usual kernel environment once the call has returned. - */ - -#define efi_call_virt(f, args...) \ -({ \ - efi_status_t __s; \ - unsigned long flags; \ - arch_efi_call_virt_setup(); \ - local_save_flags(flags); \ - __s = arch_efi_call_virt(f, args); \ - efi_call_virt_check_flags(flags, __stringify(f)); \ - arch_efi_call_virt_teardown(); \ - __s; \ -}) - -#define __efi_call_virt(f, args...) \ -({ \ - unsigned long flags; \ - arch_efi_call_virt_setup(); \ - local_save_flags(flags); \ - arch_efi_call_virt(f, args); \ - efi_call_virt_check_flags(flags, __stringify(f)); \ - arch_efi_call_virt_teardown(); \ -}) - /* * According to section 7.1 of the UEFI spec, Runtime Services are not fully * reentrant, and there are particular combinations of calls that need to be diff --git a/include/linux/efi.h b/include/linux/efi.h index 03009695760d..75d148dc9c3f 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1480,4 +1480,55 @@ efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg, unsigned long size); bool efi_runtime_disabled(void); +extern void efi_call_virt_check_flags(unsigned long flags, const char *call); + +/* + * Arch code can implement the following three template macros, avoiding + * reptition for the void/non-void return cases of {__,}efi_call_virt(): + * + * * arch_efi_call_virt_setup() + * + * Sets up the environment for the call (e.g. switching page tables, + * allowing kernel-mode use of floating point, if required). + * + * * arch_efi_call_virt() + * + * Performs the call. The last expression in the macro must be the call + * itself, allowing the logic to be shared by the void and non-void + * cases. + * + * * arch_efi_call_virt_teardown() + * + * Restores the usual kernel environment once the call has returned. + */ + +#define efi_call_virt_pointer(p, f, args...) \ +({ \ + efi_status_t __s; \ + unsigned long __flags; \ + \ + arch_efi_call_virt_setup(); \ + \ + local_save_flags(__flags); \ + __s = arch_efi_call_virt(p, f, args); \ + efi_call_virt_check_flags(__flags, __stringify(f)); \ + \ + arch_efi_call_virt_teardown(); \ + \ + __s; \ +}) + +#define __efi_call_virt_pointer(p, f, args...) \ +({ \ + unsigned long __flags; \ + \ + arch_efi_call_virt_setup(); \ + \ + local_save_flags(__flags); \ + arch_efi_call_virt(p, f, args); \ + efi_call_virt_check_flags(__flags, __stringify(f)); \ + \ + arch_efi_call_virt_teardown(); \ +}) + #endif /* _LINUX_EFI_H */ -- cgit v1.2.3 From d1be84a232e359ca9456c63e72cb0082d68311b6 Mon Sep 17 00:00:00 2001 From: Alex Thorlton Date: Sat, 25 Jun 2016 08:20:28 +0100 Subject: x86/uv: Update uv_bios_call() to use efi_call_virt_pointer() Now that the efi_call_virt() macro has been generalized to be able to use EFI system tables besides efi.systab, we are able to convert our uv_bios_call() wrapper to use this standard EFI callback mechanism. This simple change is part of a much larger effort to recover from some issues with the way we were mapping in some of our MMRs, and the way that we were doing our BIOS callbacks, which were uncovered by commit 67a9108ed431 ("x86/efi: Build our own page table structures"). The first issue that this uncovered was that we were relying on the EFI memory mapping mechanism to map in our MMR space for us, which, while reliable, was technically a bug, as it relied on "undefined" behavior in the mapping code. The reason we were able to piggyback on the EFI memory mapping code to map in our MMRs was because, previously, EFI code used the trampoline_pgd, which shares a few entries with the main kernel pgd. It just so happened, that the memory range containing our MMRs was inside one of those shared regions, which kept our code working without issue for quite a while. Anyways, once we discovered this problem, we brought back our original code to map in the MMRs with commit: 08914f436bdd ("x86/platform/UV: Bring back the call to map_low_mmrs in uv_system_init") This got our systems a little further along, but we were still running into trouble with our EFI callbacks, which prevented us from booting all the way up. Our first step towards fixing the BIOS callbacks was to get our uv_bios_call() wrapper updated to use efi_call_virt() instead of the plain efi_call(). The previous patch took care of the effort needed to make that possible. Along the way, we hit a major issue with some confusion about how to properly pull arguments higher than number 6 off the stack in the efi_call() code, which resulted in the following commit from Linus: 683ad8092cd2 ("x86/efi: Fix 7-parameter efi_call()s") Now that all of those issues are out of the way, we're able to make this simple change to use the new efi_call_virt_pointer() in uv_bios_call() which gets our machines booting, running properly, and able to execute our callbacks with 6+ arguments. Note that, since we are now using the EFI page table when we make our function call, we are no longer able to make the call using the __va() of our function pointer, since the memory range containing that address isn't mapped into the EFI page table. For now, we will use the physical address of the function directly, since that is mapped into the EFI page table. In the near future, we're going to get some code added in to properly update our function pointer to its virtual address during SetVirtualAddressMap. Signed-off-by: Alex Thorlton Signed-off-by: Matt Fleming Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Dimitri Sivanich Cc: Linus Torvalds Cc: Mark Rutland Cc: Peter Zijlstra Cc: Roy Franz Cc: Russ Anderson Cc: Russell King Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1466839230-12781-6-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/platform/uv/bios_uv.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c index 815fec6e05e2..66b2166ea4a1 100644 --- a/arch/x86/platform/uv/bios_uv.c +++ b/arch/x86/platform/uv/bios_uv.c @@ -40,8 +40,7 @@ s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) */ return BIOS_STATUS_UNIMPLEMENTED; - ret = efi_call((void *)__va(tab->function), (u64)which, - a1, a2, a3, a4, a5); + ret = efi_call_virt_pointer(tab, function, (u64)which, a1, a2, a3, a4, a5); return ret; } EXPORT_SYMBOL_GPL(uv_bios_call); -- cgit v1.2.3 From 21f866257c7027f8f49bfde83f559f9e58f9ee93 Mon Sep 17 00:00:00 2001 From: Alex Thorlton Date: Sat, 25 Jun 2016 08:20:29 +0100 Subject: x86/efi: Update efi_thunk() to use the the arch_efi_call_virt*() macros Currently, the efi_thunk macro has some semi-duplicated code in it that can be replaced with the arch_efi_call_virt_setup/teardown macros. This commit simply replaces the duplicated code with those macros. Suggested-by: Matt Fleming Signed-off-by: Alex Thorlton Signed-off-by: Matt Fleming Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Dimitri Sivanich Cc: Linus Torvalds Cc: Mark Rutland Cc: Peter Zijlstra Cc: Roy Franz Cc: Russ Anderson Cc: Russell King Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1466839230-12781-7-git-send-email-matt@codeblueprint.co.uk [ Renamed variables to the standard __ prefix. ] Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi_64.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index b226b3f497f1..5cb4301c4dcf 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -466,22 +466,17 @@ extern efi_status_t efi64_thunk(u32, ...); #define efi_thunk(f, ...) \ ({ \ efi_status_t __s; \ - unsigned long flags; \ - u32 func; \ + unsigned long __flags; \ + u32 __func; \ \ - efi_sync_low_kernel_mappings(); \ - local_irq_save(flags); \ + local_irq_save(__flags); \ + arch_efi_call_virt_setup(); \ \ - efi_scratch.prev_cr3 = read_cr3(); \ - write_cr3((unsigned long)efi_scratch.efi_pgt); \ - __flush_tlb_all(); \ + __func = runtime_service32(f); \ + __s = efi64_thunk(__func, __VA_ARGS__); \ \ - func = runtime_service32(f); \ - __s = efi64_thunk(func, __VA_ARGS__); \ - \ - write_cr3(efi_scratch.prev_cr3); \ - __flush_tlb_all(); \ - local_irq_restore(flags); \ + arch_efi_call_virt_teardown(); \ + local_irq_restore(__flags); \ \ __s; \ }) -- cgit v1.2.3 From b684e9bc750b6349ff59f1b1ab4397cae255765f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 25 Jun 2016 08:20:30 +0100 Subject: x86/efi: Remove the unused efi_get_time() function Nothing calls the efi_get_time() function on x86, but it does suffer from the 32-bit time_t overflow in 2038. This removes the function, we can always put it back in case we need it later. Signed-off-by: Arnd Bergmann Signed-off-by: Matt Fleming Acked-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1466839230-12781-8-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi.c | 15 --------------- include/linux/efi.h | 1 - 2 files changed, 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index f93545e7dc54..d898b334ff46 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -98,21 +98,6 @@ static efi_status_t __init phys_efi_set_virtual_address_map( return status; } -void efi_get_time(struct timespec *now) -{ - efi_status_t status; - efi_time_t eft; - efi_time_cap_t cap; - - status = efi.get_time(&eft, &cap); - if (status != EFI_SUCCESS) - pr_err("Oops: efitime: can't read time!\n"); - - now->tv_sec = mktime(eft.year, eft.month, eft.day, eft.hour, - eft.minute, eft.second); - now->tv_nsec = 0; -} - void __init efi_find_mirror(void) { efi_memory_desc_t *md; diff --git a/include/linux/efi.h b/include/linux/efi.h index 75d148dc9c3f..0174f28ce9b3 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -990,7 +990,6 @@ extern u64 efi_mem_desc_end(efi_memory_desc_t *md); extern int efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md); extern void efi_initialize_iomem_resources(struct resource *code_resource, struct resource *data_resource, struct resource *bss_resource); -extern void efi_get_time(struct timespec *now); extern void efi_reserve_boot_services(void); extern int efi_get_fdt_params(struct efi_fdt_params *params); extern struct kobject *efi_kobj; -- cgit v1.2.3 From 1e8567d53d3bbefe3f6e0a27685da1b842f4c1fa Mon Sep 17 00:00:00 2001 From: Huang Tao Date: Thu, 16 Jun 2016 16:18:58 +0200 Subject: arm64: dts: rockchip: Add rktimer device node for rk3399 Add a 'rktimer' node in the device treee for the ARM64 rk3399 SoC. Signed-off-by: Huang Tao Cc: Daniel Lezcano Cc: Thomas Gleixner Cc: Heiko Stuebner Tested-by: Jianqun Xu Signed-off-by: Caesar Wang Signed-off-by: Daniel Lezcano --- arch/arm64/boot/dts/rockchip/rk3399.dtsi | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index 46f325a143b0..f0c0d7665102 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -492,6 +492,14 @@ interrupts = ; }; + rktimer: rktimer@ff850000 { + compatible = "rockchip,rk3399-timer"; + reg = <0x0 0xff850000 0x0 0x1000>; + interrupts = ; + clocks = <&cru PCLK_TIMER0>, <&cru SCLK_TIMER00>; + clock-names = "pclk", "timer"; + }; + spdif: spdif@ff870000 { compatible = "rockchip,rk3399-spdif"; reg = <0x0 0xff870000 0x0 0x1000>; -- cgit v1.2.3 From 0586421746ef2bc33898d2d7f3dbb0eec6b234c3 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 7 Jun 2016 00:03:34 +0200 Subject: clocksource/drivers/microblaze: Convert init function to return error The init functions do not return any error. They behave as the following: - panic, thus leading to a kernel crash while another timer may work and make the system boot up correctly or - print an error and let the caller unaware if the state of the system Change that by converting the init functions to return an error conforming to the CLOCKSOURCE_OF_RET prototype. Proper error handling (rollback, errno value) will be changed later case by case, thus this change just return back an error or success in the init function. Signed-off-by: Daniel Lezcano --- arch/microblaze/kernel/timer.c | 51 +++++++++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/kernel/timer.c b/arch/microblaze/kernel/timer.c index 67e2ef48d2d0..7f35e7b50f1b 100644 --- a/arch/microblaze/kernel/timer.c +++ b/arch/microblaze/kernel/timer.c @@ -170,7 +170,7 @@ static struct irqaction timer_irqaction = { .dev_id = &clockevent_xilinx_timer, }; -static __init void xilinx_clockevent_init(void) +static __init int xilinx_clockevent_init(void) { clockevent_xilinx_timer.mult = div_sc(timer_clock_freq, NSEC_PER_SEC, @@ -181,6 +181,8 @@ static __init void xilinx_clockevent_init(void) clockevent_delta2ns(1, &clockevent_xilinx_timer); clockevent_xilinx_timer.cpumask = cpumask_of(0); clockevents_register_device(&clockevent_xilinx_timer); + + return 0; } static u64 xilinx_clock_read(void) @@ -229,8 +231,14 @@ static struct clocksource clocksource_microblaze = { static int __init xilinx_clocksource_init(void) { - if (clocksource_register_hz(&clocksource_microblaze, timer_clock_freq)) - panic("failed to register clocksource"); + int ret; + + ret = clocksource_register_hz(&clocksource_microblaze, + timer_clock_freq); + if (ret) { + pr_err("failed to register clocksource"); + return ret; + } /* stop timer1 */ write_fn(read_fn(timer_baseaddr + TCSR1) & ~TCSR_ENT, @@ -239,16 +247,16 @@ static int __init xilinx_clocksource_init(void) write_fn(TCSR_TINT|TCSR_ENT|TCSR_ARHT, timer_baseaddr + TCSR1); /* register timecounter - for ftrace support */ - init_xilinx_timecounter(); - return 0; + return init_xilinx_timecounter(); } -static void __init xilinx_timer_init(struct device_node *timer) +static int __init xilinx_timer_init(struct device_node *timer) { struct clk *clk; static int initialized; u32 irq; u32 timer_num = 1; + int ret; if (initialized) return; @@ -258,7 +266,7 @@ static void __init xilinx_timer_init(struct device_node *timer) timer_baseaddr = of_iomap(timer, 0); if (!timer_baseaddr) { pr_err("ERROR: invalid timer base address\n"); - BUG(); + return -ENXIO; } write_fn = timer_write32; @@ -271,11 +279,15 @@ static void __init xilinx_timer_init(struct device_node *timer) } irq = irq_of_parse_and_map(timer, 0); + if (irq <= 0) { + pr_err("Failed to parse and map irq"); + return -EINVAL; + } of_property_read_u32(timer, "xlnx,one-timer-only", &timer_num); if (timer_num) { - pr_emerg("Please enable two timers in HW\n"); - BUG(); + pr_err("Please enable two timers in HW\n"); + return -EINVAL; } pr_info("%s: irq=%d\n", timer->full_name, irq); @@ -297,15 +309,28 @@ static void __init xilinx_timer_init(struct device_node *timer) freq_div_hz = timer_clock_freq / HZ; - setup_irq(irq, &timer_irqaction); + ret = setup_irq(irq, &timer_irqaction); + if (ret) { + pr_err("Failed to setup IRQ"); + return ret; + } + #ifdef CONFIG_HEART_BEAT microblaze_setup_heartbeat(); #endif - xilinx_clocksource_init(); - xilinx_clockevent_init(); + + ret = xilinx_clocksource_init(); + if (ret) + return ret; + + ret = xilinx_clockevent_init(); + if (ret) + return ret; sched_clock_register(xilinx_clock_read, 32, timer_clock_freq); + + return 0; } -CLOCKSOURCE_OF_DECLARE(xilinx_timer, "xlnx,xps-timer-1.00.a", +CLOCKSOURCE_OF_DECLARE_RET(xilinx_timer, "xlnx,xps-timer-1.00.a", xilinx_timer_init); -- cgit v1.2.3 From 2712616fed84ddf60c788269d39bb26eb6779945 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 7 Jun 2016 00:03:49 +0200 Subject: clocksource/drivers/ralink: Convert init function to return error The init functions do not return any error. They behave as the following: - panic, thus leading to a kernel crash while another timer may work and make the system boot up correctly or - print an error and let the caller unaware if the state of the system Change that by converting the init functions to return an error conforming to the CLOCKSOURCE_OF_RET prototype. Proper error handling (rollback, errno value) will be changed later case by case, thus this change just return back an error or success in the init function. Signed-off-by: Daniel Lezcano --- arch/mips/ralink/cevt-rt3352.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/mips/ralink/cevt-rt3352.c b/arch/mips/ralink/cevt-rt3352.c index 3ad0b0794f7d..f2d3c7908626 100644 --- a/arch/mips/ralink/cevt-rt3352.c +++ b/arch/mips/ralink/cevt-rt3352.c @@ -117,11 +117,13 @@ static int systick_set_oneshot(struct clock_event_device *evt) return 0; } -static void __init ralink_systick_init(struct device_node *np) +static int __init ralink_systick_init(struct device_node *np) { + int ret; + systick.membase = of_iomap(np, 0); if (!systick.membase) - return; + return -ENXIO; systick_irqaction.name = np->name; systick.dev.name = np->name; @@ -131,16 +133,21 @@ static void __init ralink_systick_init(struct device_node *np) systick.dev.irq = irq_of_parse_and_map(np, 0); if (!systick.dev.irq) { pr_err("%s: request_irq failed", np->name); - return; + return -EINVAL; } - clocksource_mmio_init(systick.membase + SYSTICK_COUNT, np->name, - SYSTICK_FREQ, 301, 16, clocksource_mmio_readl_up); + ret = clocksource_mmio_init(systick.membase + SYSTICK_COUNT, np->name, + SYSTICK_FREQ, 301, 16, + clocksource_mmio_readl_up); + if (ret) + return ret; clockevents_register_device(&systick.dev); pr_info("%s: running - mult: %d, shift: %d\n", np->name, systick.dev.mult, systick.dev.shift); + + return 0; } -CLOCKSOURCE_OF_DECLARE(systick, "ralink,cevt-systick", ralink_systick_init); +CLOCKSOURCE_OF_DECLARE_RET(systick, "ralink,cevt-systick", ralink_systick_init); -- cgit v1.2.3 From dd1364a7439be4d20f87637a72eb7bd4553827f0 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 7 Jun 2016 00:04:02 +0200 Subject: clocksource/drivers/nios2: Convert init function to return error The init functions do not return any error. They behave as the following: - panic, thus leading to a kernel crash while another timer may work and make the system boot up correctly or - print an error and let the caller unaware if the state of the system Change that by converting the init functions to return an error conforming to the CLOCKSOURCE_OF_RET prototype. Proper error handling (rollback, errno value) will be changed later case by case, thus this change just return back an error or success in the init function. Signed-off-by: Daniel Lezcano --- arch/nios2/kernel/time.c | 65 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/nios2/kernel/time.c b/arch/nios2/kernel/time.c index e835dda2bfe2..b75e40e17963 100644 --- a/arch/nios2/kernel/time.c +++ b/arch/nios2/kernel/time.c @@ -206,15 +206,21 @@ irqreturn_t timer_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -static void __init nios2_timer_get_base_and_freq(struct device_node *np, +static int __init nios2_timer_get_base_and_freq(struct device_node *np, void __iomem **base, u32 *freq) { *base = of_iomap(np, 0); - if (!*base) - panic("Unable to map reg for %s\n", np->name); + if (!*base) { + pr_crit("Unable to map reg for %s\n", np->name); + return -ENXIO; + } + + if (of_property_read_u32(np, "clock-frequency", freq)) { + pr_crit("Unable to get %s clock frequency\n", np->name); + return -EINVAL; + } - if (of_property_read_u32(np, "clock-frequency", freq)) - panic("Unable to get %s clock frequency\n", np->name); + return 0; } static struct nios2_clockevent_dev nios2_ce = { @@ -231,17 +237,21 @@ static struct nios2_clockevent_dev nios2_ce = { }, }; -static __init void nios2_clockevent_init(struct device_node *timer) +static __init int nios2_clockevent_init(struct device_node *timer) { void __iomem *iobase; u32 freq; - int irq; + int irq, ret; - nios2_timer_get_base_and_freq(timer, &iobase, &freq); + ret = nios2_timer_get_base_and_freq(timer, &iobase, &freq); + if (ret) + return ret; irq = irq_of_parse_and_map(timer, 0); - if (!irq) - panic("Unable to parse timer irq\n"); + if (!irq) { + pr_crit("Unable to parse timer irq\n"); + return -EINVAL; + } nios2_ce.timer.base = iobase; nios2_ce.timer.freq = freq; @@ -253,25 +263,35 @@ static __init void nios2_clockevent_init(struct device_node *timer) /* clear pending interrupt */ timer_writew(&nios2_ce.timer, 0, ALTERA_TIMER_STATUS_REG); - if (request_irq(irq, timer_interrupt, IRQF_TIMER, timer->name, - &nios2_ce.ced)) - panic("Unable to setup timer irq\n"); + ret = request_irq(irq, timer_interrupt, IRQF_TIMER, timer->name, + &nios2_ce.ced); + if (ret) { + pr_crit("Unable to setup timer irq\n"); + return ret; + } clockevents_config_and_register(&nios2_ce.ced, freq, 1, ULONG_MAX); + + return 0; } -static __init void nios2_clocksource_init(struct device_node *timer) +static __init int nios2_clocksource_init(struct device_node *timer) { unsigned int ctrl; void __iomem *iobase; u32 freq; + int ret; - nios2_timer_get_base_and_freq(timer, &iobase, &freq); + ret = nios2_timer_get_base_and_freq(timer, &iobase, &freq); + if (ret) + return ret; nios2_cs.timer.base = iobase; nios2_cs.timer.freq = freq; - clocksource_register_hz(&nios2_cs.cs, freq); + ret = clocksource_register_hz(&nios2_cs.cs, freq); + if (ret) + return ret; timer_writew(&nios2_cs.timer, USHRT_MAX, ALTERA_TIMER_PERIODL_REG); timer_writew(&nios2_cs.timer, USHRT_MAX, ALTERA_TIMER_PERIODH_REG); @@ -282,6 +302,8 @@ static __init void nios2_clocksource_init(struct device_node *timer) /* Calibrate the delay loop directly */ lpj_fine = freq / HZ; + + return 0; } /* @@ -289,22 +311,25 @@ static __init void nios2_clocksource_init(struct device_node *timer) * more instances, the second one gets used as clocksource and all * others are unused. */ -static void __init nios2_time_init(struct device_node *timer) +static int __init nios2_time_init(struct device_node *timer) { static int num_called; + int ret; switch (num_called) { case 0: - nios2_clockevent_init(timer); + ret = nios2_clockevent_init(timer); break; case 1: - nios2_clocksource_init(timer); + ret = nios2_clocksource_init(timer); break; default: break; } num_called++; + + return ret; } void read_persistent_clock(struct timespec *ts) @@ -327,4 +352,4 @@ void __init time_init(void) clocksource_probe(); } -CLOCKSOURCE_OF_DECLARE(nios2_timer, ALTR_TIMER_COMPATIBLE, nios2_time_init); +CLOCKSOURCE_OF_DECLARE_RET(nios2_timer, ALTR_TIMER_COMPATIBLE, nios2_time_init); -- cgit v1.2.3 From dcbc0eddcbbf441fdcf0eb4c2e9c1716ac235972 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 7 Jun 2016 00:03:11 +0200 Subject: clocksource/drivers/smp_twd: Convert init function to return error The init functions do not return any error. They behave as the following: - panic, thus leading to a kernel crash while another timer may work and make the system boot up correctly or - print an error and let the caller unaware if the state of the system Change that by converting the init functions to return an error conforming to the CLOCKSOURCE_OF_RET prototype. Proper error handling (rollback, errno value) will be changed later case by case, thus this change just return back an error or success in the init function. Signed-off-by: Daniel Lezcano --- arch/arm/kernel/smp_twd.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c index 1bfa7a7f5533..2b24be41d9cc 100644 --- a/arch/arm/kernel/smp_twd.c +++ b/arch/arm/kernel/smp_twd.c @@ -390,7 +390,7 @@ int __init twd_local_timer_register(struct twd_local_timer *tlt) } #ifdef CONFIG_OF -static void __init twd_local_timer_of_register(struct device_node *np) +static int __init twd_local_timer_of_register(struct device_node *np) { int err; @@ -410,8 +410,9 @@ static void __init twd_local_timer_of_register(struct device_node *np) out: WARN(err, "twd_local_timer_of_register failed (%d)\n", err); + return err; } -CLOCKSOURCE_OF_DECLARE(arm_twd_a9, "arm,cortex-a9-twd-timer", twd_local_timer_of_register); -CLOCKSOURCE_OF_DECLARE(arm_twd_a5, "arm,cortex-a5-twd-timer", twd_local_timer_of_register); -CLOCKSOURCE_OF_DECLARE(arm_twd_11mp, "arm,arm11mp-twd-timer", twd_local_timer_of_register); +CLOCKSOURCE_OF_DECLARE_RET(arm_twd_a9, "arm,cortex-a9-twd-timer", twd_local_timer_of_register); +CLOCKSOURCE_OF_DECLARE_RET(arm_twd_a5, "arm,cortex-a5-twd-timer", twd_local_timer_of_register); +CLOCKSOURCE_OF_DECLARE_RET(arm_twd_11mp, "arm,arm11mp-twd-timer", twd_local_timer_of_register); #endif -- cgit v1.2.3 From 43d7560494a264a34e8bb5257ef43b0be6134dac Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 15 Jun 2016 14:50:12 +0200 Subject: clocksource/drivers/arc: Convert init function to return error The init functions do not return any error. They behave as the following: - panic, thus leading to a kernel crash while another timer may work and make the system boot up correctly or - print an error and let the caller unaware if the state of the system Change that by converting the init functions to return an error conforming to the CLOCKSOURCE_OF_RET prototype. Proper error handling (rollback, errno value) will be changed later case by case, thus this change just return back an error or success in the init function. Signed-off-by: Daniel Lezcano --- arch/arc/kernel/time.c | 69 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 27 deletions(-) (limited to 'arch') diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c index 4549ab255dd1..09de669f4ff0 100644 --- a/arch/arc/kernel/time.c +++ b/arch/arc/kernel/time.c @@ -116,21 +116,21 @@ static struct clocksource arc_counter_gfrc = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; -static void __init arc_cs_setup_gfrc(struct device_node *node) +static int __init arc_cs_setup_gfrc(struct device_node *node) { int exists = cpuinfo_arc700[0].extn.gfrc; int ret; if (WARN(!exists, "Global-64-bit-Ctr clocksource not detected")) - return; + return -ENXIO; ret = arc_get_timer_clk(node); if (ret) - return; + return ret; - clocksource_register_hz(&arc_counter_gfrc, arc_timer_freq); + return clocksource_register_hz(&arc_counter_gfrc, arc_timer_freq); } -CLOCKSOURCE_OF_DECLARE(arc_gfrc, "snps,archs-timer-gfrc", arc_cs_setup_gfrc); +CLOCKSOURCE_OF_DECLARE_RET(arc_gfrc, "snps,archs-timer-gfrc", arc_cs_setup_gfrc); #endif @@ -172,27 +172,27 @@ static struct clocksource arc_counter_rtc = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; -static void __init arc_cs_setup_rtc(struct device_node *node) +static int __init arc_cs_setup_rtc(struct device_node *node) { int exists = cpuinfo_arc700[smp_processor_id()].extn.rtc; int ret; if (WARN(!exists, "Local-64-bit-Ctr clocksource not detected")) - return; + return -ENXIO; /* Local to CPU hence not usable in SMP */ if (WARN(IS_ENABLED(CONFIG_SMP), "Local-64-bit-Ctr not usable in SMP")) - return; + return -EINVAL; ret = arc_get_timer_clk(node); if (ret) - return; + return ret; write_aux_reg(AUX_RTC_CTRL, 1); - clocksource_register_hz(&arc_counter_rtc, arc_timer_freq); + return clocksource_register_hz(&arc_counter_rtc, arc_timer_freq); } -CLOCKSOURCE_OF_DECLARE(arc_rtc, "snps,archs-timer-rtc", arc_cs_setup_rtc); +CLOCKSOURCE_OF_DECLARE_RET(arc_rtc, "snps,archs-timer-rtc", arc_cs_setup_rtc); #endif @@ -213,23 +213,23 @@ static struct clocksource arc_counter_timer1 = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; -static void __init arc_cs_setup_timer1(struct device_node *node) +static int __init arc_cs_setup_timer1(struct device_node *node) { int ret; /* Local to CPU hence not usable in SMP */ if (IS_ENABLED(CONFIG_SMP)) - return; + return -EINVAL; ret = arc_get_timer_clk(node); if (ret) - return; + return ret; write_aux_reg(ARC_REG_TIMER1_LIMIT, ARC_TIMER_MAX); write_aux_reg(ARC_REG_TIMER1_CNT, 0); write_aux_reg(ARC_REG_TIMER1_CTRL, TIMER_CTRL_NH); - clocksource_register_hz(&arc_counter_timer1, arc_timer_freq); + return clocksource_register_hz(&arc_counter_timer1, arc_timer_freq); } /********** Clock Event Device *********/ @@ -324,20 +324,28 @@ static struct notifier_block arc_timer_cpu_nb = { /* * clockevent setup for boot CPU */ -static void __init arc_clockevent_setup(struct device_node *node) +static int __init arc_clockevent_setup(struct device_node *node) { struct clock_event_device *evt = this_cpu_ptr(&arc_clockevent_device); int ret; - register_cpu_notifier(&arc_timer_cpu_nb); + ret = register_cpu_notifier(&arc_timer_cpu_nb); + if (ret) { + pr_err("Failed to register cpu notifier"); + return ret; + } arc_timer_irq = irq_of_parse_and_map(node, 0); - if (arc_timer_irq <= 0) - panic("clockevent: missing irq"); + if (arc_timer_irq <= 0) { + pr_err("clockevent: missing irq"); + return -EINVAL; + } ret = arc_get_timer_clk(node); - if (ret) - panic("clockevent: missing clk"); + if (ret) { + pr_err("clockevent: missing clk"); + return ret; + } evt->irq = arc_timer_irq; evt->cpumask = cpumask_of(smp_processor_id()); @@ -347,24 +355,31 @@ static void __init arc_clockevent_setup(struct device_node *node) /* Needs apriori irq_set_percpu_devid() done in intc map function */ ret = request_percpu_irq(arc_timer_irq, timer_irq_handler, "Timer0 (per-cpu-tick)", evt); - if (ret) - panic("clockevent: unable to request irq\n"); + if (ret) { + pr_err("clockevent: unable to request irq\n"); + return ret; + } enable_percpu_irq(arc_timer_irq, 0); + + return 0; } -static void __init arc_of_timer_init(struct device_node *np) +static int __init arc_of_timer_init(struct device_node *np) { static int init_count = 0; + int ret; if (!init_count) { init_count = 1; - arc_clockevent_setup(np); + ret = arc_clockevent_setup(np); } else { - arc_cs_setup_timer1(np); + ret = arc_cs_setup_timer1(np); } + + return ret; } -CLOCKSOURCE_OF_DECLARE(arc_clkevt, "snps,arc-timer", arc_of_timer_init); +CLOCKSOURCE_OF_DECLARE_RET(arc_clkevt, "snps,arc-timer", arc_of_timer_init); /* * Called from start_kernel() - boot CPU only -- cgit v1.2.3 From 177cf6e52b0a1a382b9892d3cc9aafd6e7c5943f Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 7 Jun 2016 00:27:44 +0200 Subject: clocksources: Switch back to the clksrc table All the clocksource drivers's init function are now converted to return an error code. CLOCKSOURCE_OF_DECLARE is no longer used as well as the clksrc-of table. Let's convert back the names: - CLOCKSOURCE_OF_DECLARE_RET => CLOCKSOURCE_OF_DECLARE - clksrc-of-ret => clksrc-of Signed-off-by: Daniel Lezcano For exynos_mct and samsung_pwm_timer: Acked-by: Krzysztof Kozlowski For arch/arc: Acked-by: Vineet Gupta For mediatek driver: Acked-by: Matthias Brugger For the Rockchip-part Acked-by: Heiko Stuebner For STi : Acked-by: Patrice Chotard For the mps2-timer.c and versatile.c changes: Acked-by: Liviu Dudau For the OXNAS part : Acked-by: Neil Armstrong For LPC32xx driver: Acked-by: Sylvain Lemieux For Broadcom Kona timer change: Acked-by: Ray Jui For Sun4i and Sun5i: Acked-by: Chen-Yu Tsai For Meson6: Acked-by: Carlo Caione For Keystone: Acked-by: Santosh Shilimkar For NPS: Acked-by: Noam Camus For bcm2835: Acked-by: Eric Anholt --- arch/arc/kernel/time.c | 6 +++--- arch/arm/kernel/smp_twd.c | 6 +++--- arch/microblaze/kernel/timer.c | 2 +- arch/mips/ralink/cevt-rt3352.c | 2 +- arch/nios2/kernel/time.c | 2 +- drivers/clocksource/arm_arch_timer.c | 6 +++--- drivers/clocksource/arm_global_timer.c | 2 +- drivers/clocksource/armv7m_systick.c | 2 +- drivers/clocksource/asm9260_timer.c | 2 +- drivers/clocksource/bcm2835_timer.c | 2 +- drivers/clocksource/bcm_kona_timer.c | 4 ++-- drivers/clocksource/cadence_ttc_timer.c | 2 +- drivers/clocksource/clksrc-dbx500-prcmu.c | 2 +- drivers/clocksource/clksrc-probe.c | 14 -------------- drivers/clocksource/clksrc_st_lpc.c | 2 +- drivers/clocksource/clps711x-timer.c | 2 +- drivers/clocksource/dw_apb_timer_of.c | 8 ++++---- drivers/clocksource/exynos_mct.c | 4 ++-- drivers/clocksource/fsl_ftm_timer.c | 2 +- drivers/clocksource/h8300_timer16.c | 2 +- drivers/clocksource/h8300_timer8.c | 2 +- drivers/clocksource/h8300_tpu.c | 2 +- drivers/clocksource/meson6_timer.c | 2 +- drivers/clocksource/mips-gic-timer.c | 2 +- drivers/clocksource/moxart_timer.c | 2 +- drivers/clocksource/mps2-timer.c | 2 +- drivers/clocksource/mtk_timer.c | 2 +- drivers/clocksource/mxs_timer.c | 2 +- drivers/clocksource/nomadik-mtu.c | 2 +- drivers/clocksource/pxa_timer.c | 2 +- drivers/clocksource/qcom-timer.c | 4 ++-- drivers/clocksource/rockchip_timer.c | 8 ++++---- drivers/clocksource/samsung_pwm_timer.c | 8 ++++---- drivers/clocksource/sun4i_timer.c | 2 +- drivers/clocksource/tango_xtal.c | 2 +- drivers/clocksource/tegra20_timer.c | 4 ++-- drivers/clocksource/time-armada-370-xp.c | 6 +++--- drivers/clocksource/time-efm32.c | 4 ++-- drivers/clocksource/time-lpc32xx.c | 2 +- drivers/clocksource/time-orion.c | 2 +- drivers/clocksource/time-pistachio.c | 2 +- drivers/clocksource/timer-atlas7.c | 2 +- drivers/clocksource/timer-atmel-pit.c | 2 +- drivers/clocksource/timer-atmel-st.c | 2 +- drivers/clocksource/timer-digicolor.c | 2 +- drivers/clocksource/timer-imx-gpt.c | 24 ++++++++++++------------ drivers/clocksource/timer-integrator-ap.c | 2 +- drivers/clocksource/timer-keystone.c | 2 +- drivers/clocksource/timer-nps.c | 4 ++-- drivers/clocksource/timer-oxnas-rps.c | 4 ++-- drivers/clocksource/timer-prima2.c | 2 +- drivers/clocksource/timer-sp804.c | 4 ++-- drivers/clocksource/timer-stm32.c | 2 +- drivers/clocksource/timer-sun5i.c | 4 ++-- drivers/clocksource/timer-ti-32k.c | 2 +- drivers/clocksource/timer-u300.c | 2 +- drivers/clocksource/versatile.c | 4 ++-- drivers/clocksource/vf_pit_timer.c | 2 +- drivers/clocksource/vt8500_timer.c | 2 +- drivers/clocksource/zevio-timer.c | 2 +- include/asm-generic/vmlinux.lds.h | 2 -- include/linux/clocksource.h | 5 +---- 62 files changed, 98 insertions(+), 117 deletions(-) (limited to 'arch') diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c index 09de669f4ff0..98f22d2eb563 100644 --- a/arch/arc/kernel/time.c +++ b/arch/arc/kernel/time.c @@ -130,7 +130,7 @@ static int __init arc_cs_setup_gfrc(struct device_node *node) return clocksource_register_hz(&arc_counter_gfrc, arc_timer_freq); } -CLOCKSOURCE_OF_DECLARE_RET(arc_gfrc, "snps,archs-timer-gfrc", arc_cs_setup_gfrc); +CLOCKSOURCE_OF_DECLARE(arc_gfrc, "snps,archs-timer-gfrc", arc_cs_setup_gfrc); #endif @@ -192,7 +192,7 @@ static int __init arc_cs_setup_rtc(struct device_node *node) return clocksource_register_hz(&arc_counter_rtc, arc_timer_freq); } -CLOCKSOURCE_OF_DECLARE_RET(arc_rtc, "snps,archs-timer-rtc", arc_cs_setup_rtc); +CLOCKSOURCE_OF_DECLARE(arc_rtc, "snps,archs-timer-rtc", arc_cs_setup_rtc); #endif @@ -379,7 +379,7 @@ static int __init arc_of_timer_init(struct device_node *np) return ret; } -CLOCKSOURCE_OF_DECLARE_RET(arc_clkevt, "snps,arc-timer", arc_of_timer_init); +CLOCKSOURCE_OF_DECLARE(arc_clkevt, "snps,arc-timer", arc_of_timer_init); /* * Called from start_kernel() - boot CPU only diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c index 2b24be41d9cc..b6ec65e68009 100644 --- a/arch/arm/kernel/smp_twd.c +++ b/arch/arm/kernel/smp_twd.c @@ -412,7 +412,7 @@ out: WARN(err, "twd_local_timer_of_register failed (%d)\n", err); return err; } -CLOCKSOURCE_OF_DECLARE_RET(arm_twd_a9, "arm,cortex-a9-twd-timer", twd_local_timer_of_register); -CLOCKSOURCE_OF_DECLARE_RET(arm_twd_a5, "arm,cortex-a5-twd-timer", twd_local_timer_of_register); -CLOCKSOURCE_OF_DECLARE_RET(arm_twd_11mp, "arm,arm11mp-twd-timer", twd_local_timer_of_register); +CLOCKSOURCE_OF_DECLARE(arm_twd_a9, "arm,cortex-a9-twd-timer", twd_local_timer_of_register); +CLOCKSOURCE_OF_DECLARE(arm_twd_a5, "arm,cortex-a5-twd-timer", twd_local_timer_of_register); +CLOCKSOURCE_OF_DECLARE(arm_twd_11mp, "arm,arm11mp-twd-timer", twd_local_timer_of_register); #endif diff --git a/arch/microblaze/kernel/timer.c b/arch/microblaze/kernel/timer.c index 7f35e7b50f1b..5bbf38b916ef 100644 --- a/arch/microblaze/kernel/timer.c +++ b/arch/microblaze/kernel/timer.c @@ -332,5 +332,5 @@ static int __init xilinx_timer_init(struct device_node *timer) return 0; } -CLOCKSOURCE_OF_DECLARE_RET(xilinx_timer, "xlnx,xps-timer-1.00.a", +CLOCKSOURCE_OF_DECLARE(xilinx_timer, "xlnx,xps-timer-1.00.a", xilinx_timer_init); diff --git a/arch/mips/ralink/cevt-rt3352.c b/arch/mips/ralink/cevt-rt3352.c index f2d3c7908626..f24eee04e16a 100644 --- a/arch/mips/ralink/cevt-rt3352.c +++ b/arch/mips/ralink/cevt-rt3352.c @@ -150,4 +150,4 @@ static int __init ralink_systick_init(struct device_node *np) return 0; } -CLOCKSOURCE_OF_DECLARE_RET(systick, "ralink,cevt-systick", ralink_systick_init); +CLOCKSOURCE_OF_DECLARE(systick, "ralink,cevt-systick", ralink_systick_init); diff --git a/arch/nios2/kernel/time.c b/arch/nios2/kernel/time.c index b75e40e17963..d9563ddb337e 100644 --- a/arch/nios2/kernel/time.c +++ b/arch/nios2/kernel/time.c @@ -352,4 +352,4 @@ void __init time_init(void) clocksource_probe(); } -CLOCKSOURCE_OF_DECLARE_RET(nios2_timer, ALTR_TIMER_COMPATIBLE, nios2_time_init); +CLOCKSOURCE_OF_DECLARE(nios2_timer, ALTR_TIMER_COMPATIBLE, nios2_time_init); diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index d0cda68e2c41..9e33309ad2ea 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -784,8 +784,8 @@ static int __init arch_timer_of_init(struct device_node *np) return arch_timer_init(); } -CLOCKSOURCE_OF_DECLARE_RET(armv7_arch_timer, "arm,armv7-timer", arch_timer_of_init); -CLOCKSOURCE_OF_DECLARE_RET(armv8_arch_timer, "arm,armv8-timer", arch_timer_of_init); +CLOCKSOURCE_OF_DECLARE(armv7_arch_timer, "arm,armv7-timer", arch_timer_of_init); +CLOCKSOURCE_OF_DECLARE(armv8_arch_timer, "arm,armv8-timer", arch_timer_of_init); static int __init arch_timer_mem_init(struct device_node *np) { @@ -868,7 +868,7 @@ out: of_node_put(best_frame); return ret; } -CLOCKSOURCE_OF_DECLARE_RET(armv7_arch_timer_mem, "arm,armv7-timer-mem", +CLOCKSOURCE_OF_DECLARE(armv7_arch_timer_mem, "arm,armv7-timer-mem", arch_timer_mem_init); #ifdef CONFIG_ACPI diff --git a/drivers/clocksource/arm_global_timer.c b/drivers/clocksource/arm_global_timer.c index 40104fc93f24..2a9ceb6e93f9 100644 --- a/drivers/clocksource/arm_global_timer.c +++ b/drivers/clocksource/arm_global_timer.c @@ -358,5 +358,5 @@ out_unmap: } /* Only tested on r2p2 and r3p0 */ -CLOCKSOURCE_OF_DECLARE_RET(arm_gt, "arm,cortex-a9-global-timer", +CLOCKSOURCE_OF_DECLARE(arm_gt, "arm,cortex-a9-global-timer", global_timer_of_register); diff --git a/drivers/clocksource/armv7m_systick.c b/drivers/clocksource/armv7m_systick.c index 2b55410edaf6..e93af1f6a36c 100644 --- a/drivers/clocksource/armv7m_systick.c +++ b/drivers/clocksource/armv7m_systick.c @@ -81,5 +81,5 @@ out_unmap: return ret; } -CLOCKSOURCE_OF_DECLARE_RET(arm_systick, "arm,armv7m-systick", +CLOCKSOURCE_OF_DECLARE(arm_systick, "arm,armv7m-systick", system_timer_of_register); diff --git a/drivers/clocksource/asm9260_timer.c b/drivers/clocksource/asm9260_timer.c index d113c0275d06..1ba871b7fe11 100644 --- a/drivers/clocksource/asm9260_timer.c +++ b/drivers/clocksource/asm9260_timer.c @@ -238,5 +238,5 @@ static int __init asm9260_timer_init(struct device_node *np) return 0; } -CLOCKSOURCE_OF_DECLARE_RET(asm9260_timer, "alphascale,asm9260-timer", +CLOCKSOURCE_OF_DECLARE(asm9260_timer, "alphascale,asm9260-timer", asm9260_timer_init); diff --git a/drivers/clocksource/bcm2835_timer.c b/drivers/clocksource/bcm2835_timer.c index 2dcf896b5381..e71acf231c89 100644 --- a/drivers/clocksource/bcm2835_timer.c +++ b/drivers/clocksource/bcm2835_timer.c @@ -142,5 +142,5 @@ static int __init bcm2835_timer_init(struct device_node *node) return 0; } -CLOCKSOURCE_OF_DECLARE_RET(bcm2835, "brcm,bcm2835-system-timer", +CLOCKSOURCE_OF_DECLARE(bcm2835, "brcm,bcm2835-system-timer", bcm2835_timer_init); diff --git a/drivers/clocksource/bcm_kona_timer.c b/drivers/clocksource/bcm_kona_timer.c index c251aa68993f..86f87be3d80f 100644 --- a/drivers/clocksource/bcm_kona_timer.c +++ b/drivers/clocksource/bcm_kona_timer.c @@ -195,9 +195,9 @@ static int __init kona_timer_init(struct device_node *node) return 0; } -CLOCKSOURCE_OF_DECLARE_RET(brcm_kona, "brcm,kona-timer", kona_timer_init); +CLOCKSOURCE_OF_DECLARE(brcm_kona, "brcm,kona-timer", kona_timer_init); /* * bcm,kona-timer is deprecated by brcm,kona-timer * being kept here for driver compatibility */ -CLOCKSOURCE_OF_DECLARE_RET(bcm_kona, "bcm,kona-timer", kona_timer_init); +CLOCKSOURCE_OF_DECLARE(bcm_kona, "bcm,kona-timer", kona_timer_init); diff --git a/drivers/clocksource/cadence_ttc_timer.c b/drivers/clocksource/cadence_ttc_timer.c index e2e76311dde2..388a77bdc39a 100644 --- a/drivers/clocksource/cadence_ttc_timer.c +++ b/drivers/clocksource/cadence_ttc_timer.c @@ -539,4 +539,4 @@ static int __init ttc_timer_init(struct device_node *timer) return 0; } -CLOCKSOURCE_OF_DECLARE_RET(ttc, "cdns,ttc", ttc_timer_init); +CLOCKSOURCE_OF_DECLARE(ttc, "cdns,ttc", ttc_timer_init); diff --git a/drivers/clocksource/clksrc-dbx500-prcmu.c b/drivers/clocksource/clksrc-dbx500-prcmu.c index 5a59d2953388..77a365f573d7 100644 --- a/drivers/clocksource/clksrc-dbx500-prcmu.c +++ b/drivers/clocksource/clksrc-dbx500-prcmu.c @@ -86,5 +86,5 @@ static int __init clksrc_dbx500_prcmu_init(struct device_node *node) #endif return clocksource_register_hz(&clocksource_dbx500_prcmu, RATE_32K); } -CLOCKSOURCE_OF_DECLARE_RET(dbx500_prcmu, "stericsson,db8500-prcmu-timer-4", +CLOCKSOURCE_OF_DECLARE(dbx500_prcmu, "stericsson,db8500-prcmu-timer-4", clksrc_dbx500_prcmu_init); diff --git a/drivers/clocksource/clksrc-probe.c b/drivers/clocksource/clksrc-probe.c index 5fa6a555b35c..bc62be97f0a8 100644 --- a/drivers/clocksource/clksrc-probe.c +++ b/drivers/clocksource/clksrc-probe.c @@ -20,19 +20,14 @@ #include extern struct of_device_id __clksrc_of_table[]; -extern struct of_device_id __clksrc_ret_of_table[]; static const struct of_device_id __clksrc_of_table_sentinel __used __section(__clksrc_of_table_end); -static const struct of_device_id __clksrc_ret_of_table_sentinel - __used __section(__clksrc_ret_of_table_end); - void __init clocksource_probe(void) { struct device_node *np; const struct of_device_id *match; - of_init_fn_1 init_func; of_init_fn_1_ret init_func_ret; unsigned clocksources = 0; int ret; @@ -41,15 +36,6 @@ void __init clocksource_probe(void) if (!of_device_is_available(np)) continue; - init_func = match->data; - init_func(np); - clocksources++; - } - - for_each_matching_node_and_match(np, __clksrc_ret_of_table, &match) { - if (!of_device_is_available(np)) - continue; - init_func_ret = match->data; ret = init_func_ret(np); diff --git a/drivers/clocksource/clksrc_st_lpc.c b/drivers/clocksource/clksrc_st_lpc.c index c9022a9eb593..03cc49217bb4 100644 --- a/drivers/clocksource/clksrc_st_lpc.c +++ b/drivers/clocksource/clksrc_st_lpc.c @@ -132,4 +132,4 @@ static int __init st_clksrc_of_register(struct device_node *np) return ret; } -CLOCKSOURCE_OF_DECLARE_RET(ddata, "st,stih407-lpc", st_clksrc_of_register); +CLOCKSOURCE_OF_DECLARE(ddata, "st,stih407-lpc", st_clksrc_of_register); diff --git a/drivers/clocksource/clps711x-timer.c b/drivers/clocksource/clps711x-timer.c index 3b6619808442..84aed78261e4 100644 --- a/drivers/clocksource/clps711x-timer.c +++ b/drivers/clocksource/clps711x-timer.c @@ -119,5 +119,5 @@ static int __init clps711x_timer_init(struct device_node *np) return -EINVAL; } } -CLOCKSOURCE_OF_DECLARE_RET(clps711x, "cirrus,clps711x-timer", clps711x_timer_init); +CLOCKSOURCE_OF_DECLARE(clps711x, "cirrus,clps711x-timer", clps711x_timer_init); #endif diff --git a/drivers/clocksource/dw_apb_timer_of.c b/drivers/clocksource/dw_apb_timer_of.c index 4985a2cadad9..aee6c0d39a7c 100644 --- a/drivers/clocksource/dw_apb_timer_of.c +++ b/drivers/clocksource/dw_apb_timer_of.c @@ -167,7 +167,7 @@ static int __init dw_apb_timer_init(struct device_node *timer) return 0; } -CLOCKSOURCE_OF_DECLARE_RET(pc3x2_timer, "picochip,pc3x2-timer", dw_apb_timer_init); -CLOCKSOURCE_OF_DECLARE_RET(apb_timer_osc, "snps,dw-apb-timer-osc", dw_apb_timer_init); -CLOCKSOURCE_OF_DECLARE_RET(apb_timer_sp, "snps,dw-apb-timer-sp", dw_apb_timer_init); -CLOCKSOURCE_OF_DECLARE_RET(apb_timer, "snps,dw-apb-timer", dw_apb_timer_init); +CLOCKSOURCE_OF_DECLARE(pc3x2_timer, "picochip,pc3x2-timer", dw_apb_timer_init); +CLOCKSOURCE_OF_DECLARE(apb_timer_osc, "snps,dw-apb-timer-osc", dw_apb_timer_init); +CLOCKSOURCE_OF_DECLARE(apb_timer_sp, "snps,dw-apb-timer-sp", dw_apb_timer_init); +CLOCKSOURCE_OF_DECLARE(apb_timer, "snps,dw-apb-timer", dw_apb_timer_init); diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c index f6caed0c36ae..0d18dd4b3bd2 100644 --- a/drivers/clocksource/exynos_mct.c +++ b/drivers/clocksource/exynos_mct.c @@ -627,5 +627,5 @@ static int __init mct_init_ppi(struct device_node *np) { return mct_init_dt(np, MCT_INT_PPI); } -CLOCKSOURCE_OF_DECLARE_RET(exynos4210, "samsung,exynos4210-mct", mct_init_spi); -CLOCKSOURCE_OF_DECLARE_RET(exynos4412, "samsung,exynos4412-mct", mct_init_ppi); +CLOCKSOURCE_OF_DECLARE(exynos4210, "samsung,exynos4210-mct", mct_init_spi); +CLOCKSOURCE_OF_DECLARE(exynos4412, "samsung,exynos4412-mct", mct_init_ppi); diff --git a/drivers/clocksource/fsl_ftm_timer.c b/drivers/clocksource/fsl_ftm_timer.c index 9ad4ca32fb2d..738515b89073 100644 --- a/drivers/clocksource/fsl_ftm_timer.c +++ b/drivers/clocksource/fsl_ftm_timer.c @@ -369,4 +369,4 @@ err: kfree(priv); return ret; } -CLOCKSOURCE_OF_DECLARE_RET(flextimer, "fsl,ftm-timer", ftm_timer_init); +CLOCKSOURCE_OF_DECLARE(flextimer, "fsl,ftm-timer", ftm_timer_init); diff --git a/drivers/clocksource/h8300_timer16.c b/drivers/clocksource/h8300_timer16.c index 9d99fc85ffad..07d9d5be9054 100644 --- a/drivers/clocksource/h8300_timer16.c +++ b/drivers/clocksource/h8300_timer16.c @@ -187,5 +187,5 @@ free_clk: return ret; } -CLOCKSOURCE_OF_DECLARE_RET(h8300_16bit, "renesas,16bit-timer", +CLOCKSOURCE_OF_DECLARE(h8300_16bit, "renesas,16bit-timer", h8300_16timer_init); diff --git a/drivers/clocksource/h8300_timer8.c b/drivers/clocksource/h8300_timer8.c index 0292a1907619..546bb180f5a4 100644 --- a/drivers/clocksource/h8300_timer8.c +++ b/drivers/clocksource/h8300_timer8.c @@ -215,4 +215,4 @@ free_clk: return ret; } -CLOCKSOURCE_OF_DECLARE_RET(h8300_8bit, "renesas,8bit-timer", h8300_8timer_init); +CLOCKSOURCE_OF_DECLARE(h8300_8bit, "renesas,8bit-timer", h8300_8timer_init); diff --git a/drivers/clocksource/h8300_tpu.c b/drivers/clocksource/h8300_tpu.c index 4faf718b30f3..7bdf1991c847 100644 --- a/drivers/clocksource/h8300_tpu.c +++ b/drivers/clocksource/h8300_tpu.c @@ -154,4 +154,4 @@ free_clk: return ret; } -CLOCKSOURCE_OF_DECLARE_RET(h8300_tpu, "renesas,tpu", h8300_tpu_init); +CLOCKSOURCE_OF_DECLARE(h8300_tpu, "renesas,tpu", h8300_tpu_init); diff --git a/drivers/clocksource/meson6_timer.c b/drivers/clocksource/meson6_timer.c index 3a6e78f62b19..52af591a9fc7 100644 --- a/drivers/clocksource/meson6_timer.c +++ b/drivers/clocksource/meson6_timer.c @@ -174,5 +174,5 @@ static int __init meson6_timer_init(struct device_node *node) 1, 0xfffe); return 0; } -CLOCKSOURCE_OF_DECLARE_RET(meson6, "amlogic,meson6-timer", +CLOCKSOURCE_OF_DECLARE(meson6, "amlogic,meson6-timer", meson6_timer_init); diff --git a/drivers/clocksource/mips-gic-timer.c b/drivers/clocksource/mips-gic-timer.c index b164b8712f39..1572c7a778ab 100644 --- a/drivers/clocksource/mips-gic-timer.c +++ b/drivers/clocksource/mips-gic-timer.c @@ -222,5 +222,5 @@ static void __init gic_clocksource_of_init(struct device_node *node) return 0; } -CLOCKSOURCE_OF_DECLARE_RET(mips_gic_timer, "mti,gic-timer", +CLOCKSOURCE_OF_DECLARE(mips_gic_timer, "mti,gic-timer", gic_clocksource_of_init); diff --git a/drivers/clocksource/moxart_timer.c b/drivers/clocksource/moxart_timer.c index b9c30cd035bf..841454417acd 100644 --- a/drivers/clocksource/moxart_timer.c +++ b/drivers/clocksource/moxart_timer.c @@ -178,4 +178,4 @@ static int __init moxart_timer_init(struct device_node *node) return 0; } -CLOCKSOURCE_OF_DECLARE_RET(moxart, "moxa,moxart-timer", moxart_timer_init); +CLOCKSOURCE_OF_DECLARE(moxart, "moxa,moxart-timer", moxart_timer_init); diff --git a/drivers/clocksource/mps2-timer.c b/drivers/clocksource/mps2-timer.c index c303fa9ba87f..3e4431ed9aa9 100644 --- a/drivers/clocksource/mps2-timer.c +++ b/drivers/clocksource/mps2-timer.c @@ -274,4 +274,4 @@ static int __init mps2_timer_init(struct device_node *np) return 0; } -CLOCKSOURCE_OF_DECLARE_RET(mps2_timer, "arm,mps2-timer", mps2_timer_init); +CLOCKSOURCE_OF_DECLARE(mps2_timer, "arm,mps2-timer", mps2_timer_init); diff --git a/drivers/clocksource/mtk_timer.c b/drivers/clocksource/mtk_timer.c index 432a2c0884a9..90659493c59c 100644 --- a/drivers/clocksource/mtk_timer.c +++ b/drivers/clocksource/mtk_timer.c @@ -265,4 +265,4 @@ err_kzalloc: return -EINVAL; } -CLOCKSOURCE_OF_DECLARE_RET(mtk_mt6577, "mediatek,mt6577-timer", mtk_timer_init); +CLOCKSOURCE_OF_DECLARE(mtk_mt6577, "mediatek,mt6577-timer", mtk_timer_init); diff --git a/drivers/clocksource/mxs_timer.c b/drivers/clocksource/mxs_timer.c index 17b9d1991178..630a8d3904bb 100644 --- a/drivers/clocksource/mxs_timer.c +++ b/drivers/clocksource/mxs_timer.c @@ -295,4 +295,4 @@ static int __init mxs_timer_init(struct device_node *np) return setup_irq(irq, &mxs_timer_irq); } -CLOCKSOURCE_OF_DECLARE_RET(mxs, "fsl,timrot", mxs_timer_init); +CLOCKSOURCE_OF_DECLARE(mxs, "fsl,timrot", mxs_timer_init); diff --git a/drivers/clocksource/nomadik-mtu.c b/drivers/clocksource/nomadik-mtu.c index d2be5b3e0318..3c124d1ca600 100644 --- a/drivers/clocksource/nomadik-mtu.c +++ b/drivers/clocksource/nomadik-mtu.c @@ -284,5 +284,5 @@ static int __init nmdk_timer_of_init(struct device_node *node) return nmdk_timer_init(base, irq, pclk, clk); } -CLOCKSOURCE_OF_DECLARE_RET(nomadik_mtu, "st,nomadik-mtu", +CLOCKSOURCE_OF_DECLARE(nomadik_mtu, "st,nomadik-mtu", nmdk_timer_of_init); diff --git a/drivers/clocksource/pxa_timer.c b/drivers/clocksource/pxa_timer.c index 59af75cc4c74..937e10b84d58 100644 --- a/drivers/clocksource/pxa_timer.c +++ b/drivers/clocksource/pxa_timer.c @@ -213,7 +213,7 @@ static int __init pxa_timer_dt_init(struct device_node *np) return pxa_timer_common_init(irq, clk_get_rate(clk)); } -CLOCKSOURCE_OF_DECLARE_RET(pxa_timer, "marvell,pxa-timer", pxa_timer_dt_init); +CLOCKSOURCE_OF_DECLARE(pxa_timer, "marvell,pxa-timer", pxa_timer_dt_init); /* * Legacy timer init for non device-tree boards. diff --git a/drivers/clocksource/qcom-timer.c b/drivers/clocksource/qcom-timer.c index 79f73bddc5f4..662576339049 100644 --- a/drivers/clocksource/qcom-timer.c +++ b/drivers/clocksource/qcom-timer.c @@ -273,5 +273,5 @@ static int __init msm_dt_timer_init(struct device_node *np) return msm_timer_init(freq, 32, irq, !!percpu_offset); } -CLOCKSOURCE_OF_DECLARE_RET(kpss_timer, "qcom,kpss-timer", msm_dt_timer_init); -CLOCKSOURCE_OF_DECLARE_RET(scss_timer, "qcom,scss-timer", msm_dt_timer_init); +CLOCKSOURCE_OF_DECLARE(kpss_timer, "qcom,kpss-timer", msm_dt_timer_init); +CLOCKSOURCE_OF_DECLARE(scss_timer, "qcom,scss-timer", msm_dt_timer_init); diff --git a/drivers/clocksource/rockchip_timer.c b/drivers/clocksource/rockchip_timer.c index 85aee6953944..23e267acba25 100644 --- a/drivers/clocksource/rockchip_timer.c +++ b/drivers/clocksource/rockchip_timer.c @@ -205,7 +205,7 @@ static int __init rk3399_timer_init(struct device_node *np) return rk_timer_init(np, TIMER_CONTROL_REG3399); } -CLOCKSOURCE_OF_DECLARE_RET(rk3288_timer, "rockchip,rk3288-timer", - rk3288_timer_init); -CLOCKSOURCE_OF_DECLARE_RET(rk3399_timer, "rockchip,rk3399-timer", - rk3399_timer_init); +CLOCKSOURCE_OF_DECLARE(rk3288_timer, "rockchip,rk3288-timer", + rk3288_timer_init); +CLOCKSOURCE_OF_DECLARE(rk3399_timer, "rockchip,rk3399-timer", + rk3399_timer_init); diff --git a/drivers/clocksource/samsung_pwm_timer.c b/drivers/clocksource/samsung_pwm_timer.c index 27a9797e8187..54565bd0093b 100644 --- a/drivers/clocksource/samsung_pwm_timer.c +++ b/drivers/clocksource/samsung_pwm_timer.c @@ -466,7 +466,7 @@ static int __init s3c2410_pwm_clocksource_init(struct device_node *np) { return samsung_pwm_alloc(np, &s3c24xx_variant); } -CLOCKSOURCE_OF_DECLARE_RET(s3c2410_pwm, "samsung,s3c2410-pwm", s3c2410_pwm_clocksource_init); +CLOCKSOURCE_OF_DECLARE(s3c2410_pwm, "samsung,s3c2410-pwm", s3c2410_pwm_clocksource_init); static const struct samsung_pwm_variant s3c64xx_variant = { .bits = 32, @@ -479,7 +479,7 @@ static int __init s3c64xx_pwm_clocksource_init(struct device_node *np) { return samsung_pwm_alloc(np, &s3c64xx_variant); } -CLOCKSOURCE_OF_DECLARE_RET(s3c6400_pwm, "samsung,s3c6400-pwm", s3c64xx_pwm_clocksource_init); +CLOCKSOURCE_OF_DECLARE(s3c6400_pwm, "samsung,s3c6400-pwm", s3c64xx_pwm_clocksource_init); static const struct samsung_pwm_variant s5p64x0_variant = { .bits = 32, @@ -492,7 +492,7 @@ static int __init s5p64x0_pwm_clocksource_init(struct device_node *np) { return samsung_pwm_alloc(np, &s5p64x0_variant); } -CLOCKSOURCE_OF_DECLARE_RET(s5p6440_pwm, "samsung,s5p6440-pwm", s5p64x0_pwm_clocksource_init); +CLOCKSOURCE_OF_DECLARE(s5p6440_pwm, "samsung,s5p6440-pwm", s5p64x0_pwm_clocksource_init); static const struct samsung_pwm_variant s5p_variant = { .bits = 32, @@ -505,5 +505,5 @@ static int __init s5p_pwm_clocksource_init(struct device_node *np) { return samsung_pwm_alloc(np, &s5p_variant); } -CLOCKSOURCE_OF_DECLARE_RET(s5pc100_pwm, "samsung,s5pc100-pwm", s5p_pwm_clocksource_init); +CLOCKSOURCE_OF_DECLARE(s5pc100_pwm, "samsung,s5pc100-pwm", s5p_pwm_clocksource_init); #endif diff --git a/drivers/clocksource/sun4i_timer.c b/drivers/clocksource/sun4i_timer.c index 445373091fe8..97669ee4df2a 100644 --- a/drivers/clocksource/sun4i_timer.c +++ b/drivers/clocksource/sun4i_timer.c @@ -226,5 +226,5 @@ static int __init sun4i_timer_init(struct device_node *node) return ret; } -CLOCKSOURCE_OF_DECLARE_RET(sun4i, "allwinner,sun4i-a10-timer", +CLOCKSOURCE_OF_DECLARE(sun4i, "allwinner,sun4i-a10-timer", sun4i_timer_init); diff --git a/drivers/clocksource/tango_xtal.c b/drivers/clocksource/tango_xtal.c index 7dc716cc976f..12fcef8cf2d3 100644 --- a/drivers/clocksource/tango_xtal.c +++ b/drivers/clocksource/tango_xtal.c @@ -53,4 +53,4 @@ static int __init tango_clocksource_init(struct device_node *np) return 0; } -CLOCKSOURCE_OF_DECLARE_RET(tango, "sigma,tick-counter", tango_clocksource_init); +CLOCKSOURCE_OF_DECLARE(tango, "sigma,tick-counter", tango_clocksource_init); diff --git a/drivers/clocksource/tegra20_timer.c b/drivers/clocksource/tegra20_timer.c index 543c37e3a62c..f960891aa04e 100644 --- a/drivers/clocksource/tegra20_timer.c +++ b/drivers/clocksource/tegra20_timer.c @@ -237,7 +237,7 @@ static int __init tegra20_init_timer(struct device_node *np) return 0; } -CLOCKSOURCE_OF_DECLARE_RET(tegra20_timer, "nvidia,tegra20-timer", tegra20_init_timer); +CLOCKSOURCE_OF_DECLARE(tegra20_timer, "nvidia,tegra20-timer", tegra20_init_timer); static int __init tegra20_init_rtc(struct device_node *np) { @@ -261,4 +261,4 @@ static int __init tegra20_init_rtc(struct device_node *np) return register_persistent_clock(NULL, tegra_read_persistent_clock64); } -CLOCKSOURCE_OF_DECLARE_RET(tegra20_rtc, "nvidia,tegra20-rtc", tegra20_init_rtc); +CLOCKSOURCE_OF_DECLARE(tegra20_rtc, "nvidia,tegra20-rtc", tegra20_init_rtc); diff --git a/drivers/clocksource/time-armada-370-xp.c b/drivers/clocksource/time-armada-370-xp.c index bc4ab48d4048..a4e59239f28e 100644 --- a/drivers/clocksource/time-armada-370-xp.c +++ b/drivers/clocksource/time-armada-370-xp.c @@ -371,7 +371,7 @@ static int __init armada_xp_timer_init(struct device_node *np) return armada_370_xp_timer_common_init(np); } -CLOCKSOURCE_OF_DECLARE_RET(armada_xp, "marvell,armada-xp-timer", +CLOCKSOURCE_OF_DECLARE(armada_xp, "marvell,armada-xp-timer", armada_xp_timer_init); static int __init armada_375_timer_init(struct device_node *np) @@ -409,7 +409,7 @@ static int __init armada_375_timer_init(struct device_node *np) return armada_370_xp_timer_common_init(np); } -CLOCKSOURCE_OF_DECLARE_RET(armada_375, "marvell,armada-375-timer", +CLOCKSOURCE_OF_DECLARE(armada_375, "marvell,armada-375-timer", armada_375_timer_init); static int __init armada_370_timer_init(struct device_node *np) @@ -432,5 +432,5 @@ static int __init armada_370_timer_init(struct device_node *np) return armada_370_xp_timer_common_init(np); } -CLOCKSOURCE_OF_DECLARE_RET(armada_370, "marvell,armada-370-timer", +CLOCKSOURCE_OF_DECLARE(armada_370, "marvell,armada-370-timer", armada_370_timer_init); diff --git a/drivers/clocksource/time-efm32.c b/drivers/clocksource/time-efm32.c index 6e2f79fd6ad0..5ac344b383e1 100644 --- a/drivers/clocksource/time-efm32.c +++ b/drivers/clocksource/time-efm32.c @@ -283,5 +283,5 @@ static int __init efm32_timer_init(struct device_node *np) return ret; } -CLOCKSOURCE_OF_DECLARE_RET(efm32compat, "efm32,timer", efm32_timer_init); -CLOCKSOURCE_OF_DECLARE_RET(efm32, "energymicro,efm32-timer", efm32_timer_init); +CLOCKSOURCE_OF_DECLARE(efm32compat, "efm32,timer", efm32_timer_init); +CLOCKSOURCE_OF_DECLARE(efm32, "energymicro,efm32-timer", efm32_timer_init); diff --git a/drivers/clocksource/time-lpc32xx.c b/drivers/clocksource/time-lpc32xx.c index cb5b8665ff82..9649cfdb9213 100644 --- a/drivers/clocksource/time-lpc32xx.c +++ b/drivers/clocksource/time-lpc32xx.c @@ -311,4 +311,4 @@ static int __init lpc32xx_timer_init(struct device_node *np) return ret; } -CLOCKSOURCE_OF_DECLARE_RET(lpc32xx_timer, "nxp,lpc3220-timer", lpc32xx_timer_init); +CLOCKSOURCE_OF_DECLARE(lpc32xx_timer, "nxp,lpc3220-timer", lpc32xx_timer_init); diff --git a/drivers/clocksource/time-orion.c b/drivers/clocksource/time-orion.c index 5fdeb5dcc4cf..a28f496e97cf 100644 --- a/drivers/clocksource/time-orion.c +++ b/drivers/clocksource/time-orion.c @@ -167,4 +167,4 @@ static int __init orion_timer_init(struct device_node *np) return 0; } -CLOCKSOURCE_OF_DECLARE_RET(orion_timer, "marvell,orion-timer", orion_timer_init); +CLOCKSOURCE_OF_DECLARE(orion_timer, "marvell,orion-timer", orion_timer_init); diff --git a/drivers/clocksource/time-pistachio.c b/drivers/clocksource/time-pistachio.c index adaaec5e9abd..a7d9a08e4b0e 100644 --- a/drivers/clocksource/time-pistachio.c +++ b/drivers/clocksource/time-pistachio.c @@ -214,5 +214,5 @@ static int __init pistachio_clksrc_of_init(struct device_node *node) sched_clock_register(pistachio_read_sched_clock, 32, rate); return clocksource_register_hz(&pcs_gpt.cs, rate); } -CLOCKSOURCE_OF_DECLARE_RET(pistachio_gptimer, "img,pistachio-gptimer", +CLOCKSOURCE_OF_DECLARE(pistachio_gptimer, "img,pistachio-gptimer", pistachio_clksrc_of_init); diff --git a/drivers/clocksource/timer-atlas7.c b/drivers/clocksource/timer-atlas7.c index 7b1a0071a211..90f8fbc154a4 100644 --- a/drivers/clocksource/timer-atlas7.c +++ b/drivers/clocksource/timer-atlas7.c @@ -304,4 +304,4 @@ static int __init sirfsoc_of_timer_init(struct device_node *np) return sirfsoc_atlas7_timer_init(np); } -CLOCKSOURCE_OF_DECLARE_RET(sirfsoc_atlas7_timer, "sirf,atlas7-tick", sirfsoc_of_timer_init); +CLOCKSOURCE_OF_DECLARE(sirfsoc_atlas7_timer, "sirf,atlas7-tick", sirfsoc_of_timer_init); diff --git a/drivers/clocksource/timer-atmel-pit.c b/drivers/clocksource/timer-atmel-pit.c index ffaca7c2c996..1ffac0cb0cb7 100644 --- a/drivers/clocksource/timer-atmel-pit.c +++ b/drivers/clocksource/timer-atmel-pit.c @@ -270,5 +270,5 @@ static int __init at91sam926x_pit_dt_init(struct device_node *node) return at91sam926x_pit_common_init(data); } -CLOCKSOURCE_OF_DECLARE_RET(at91sam926x_pit, "atmel,at91sam9260-pit", +CLOCKSOURCE_OF_DECLARE(at91sam926x_pit, "atmel,at91sam9260-pit", at91sam926x_pit_dt_init); diff --git a/drivers/clocksource/timer-atmel-st.c b/drivers/clocksource/timer-atmel-st.c index e9331d36965b..e90ab5b63a90 100644 --- a/drivers/clocksource/timer-atmel-st.c +++ b/drivers/clocksource/timer-atmel-st.c @@ -260,5 +260,5 @@ static int __init atmel_st_timer_init(struct device_node *node) /* register clocksource */ return clocksource_register_hz(&clk32k, sclk_rate); } -CLOCKSOURCE_OF_DECLARE_RET(atmel_st_timer, "atmel,at91rm9200-st", +CLOCKSOURCE_OF_DECLARE(atmel_st_timer, "atmel,at91rm9200-st", atmel_st_timer_init); diff --git a/drivers/clocksource/timer-digicolor.c b/drivers/clocksource/timer-digicolor.c index b929061ebe56..10318cc99c0e 100644 --- a/drivers/clocksource/timer-digicolor.c +++ b/drivers/clocksource/timer-digicolor.c @@ -202,5 +202,5 @@ static int __init digicolor_timer_init(struct device_node *node) return 0; } -CLOCKSOURCE_OF_DECLARE_RET(conexant_digicolor, "cnxt,cx92755-timer", +CLOCKSOURCE_OF_DECLARE(conexant_digicolor, "cnxt,cx92755-timer", digicolor_timer_init); diff --git a/drivers/clocksource/timer-imx-gpt.c b/drivers/clocksource/timer-imx-gpt.c index d5640a747078..f595460bfc58 100644 --- a/drivers/clocksource/timer-imx-gpt.c +++ b/drivers/clocksource/timer-imx-gpt.c @@ -545,15 +545,15 @@ static int __init imx6dl_timer_init_dt(struct device_node *np) return mxc_timer_init_dt(np, GPT_TYPE_IMX6DL); } -CLOCKSOURCE_OF_DECLARE_RET(imx1_timer, "fsl,imx1-gpt", imx1_timer_init_dt); -CLOCKSOURCE_OF_DECLARE_RET(imx21_timer, "fsl,imx21-gpt", imx21_timer_init_dt); -CLOCKSOURCE_OF_DECLARE_RET(imx27_timer, "fsl,imx27-gpt", imx21_timer_init_dt); -CLOCKSOURCE_OF_DECLARE_RET(imx31_timer, "fsl,imx31-gpt", imx31_timer_init_dt); -CLOCKSOURCE_OF_DECLARE_RET(imx25_timer, "fsl,imx25-gpt", imx31_timer_init_dt); -CLOCKSOURCE_OF_DECLARE_RET(imx50_timer, "fsl,imx50-gpt", imx31_timer_init_dt); -CLOCKSOURCE_OF_DECLARE_RET(imx51_timer, "fsl,imx51-gpt", imx31_timer_init_dt); -CLOCKSOURCE_OF_DECLARE_RET(imx53_timer, "fsl,imx53-gpt", imx31_timer_init_dt); -CLOCKSOURCE_OF_DECLARE_RET(imx6q_timer, "fsl,imx6q-gpt", imx31_timer_init_dt); -CLOCKSOURCE_OF_DECLARE_RET(imx6dl_timer, "fsl,imx6dl-gpt", imx6dl_timer_init_dt); -CLOCKSOURCE_OF_DECLARE_RET(imx6sl_timer, "fsl,imx6sl-gpt", imx6dl_timer_init_dt); -CLOCKSOURCE_OF_DECLARE_RET(imx6sx_timer, "fsl,imx6sx-gpt", imx6dl_timer_init_dt); +CLOCKSOURCE_OF_DECLARE(imx1_timer, "fsl,imx1-gpt", imx1_timer_init_dt); +CLOCKSOURCE_OF_DECLARE(imx21_timer, "fsl,imx21-gpt", imx21_timer_init_dt); +CLOCKSOURCE_OF_DECLARE(imx27_timer, "fsl,imx27-gpt", imx21_timer_init_dt); +CLOCKSOURCE_OF_DECLARE(imx31_timer, "fsl,imx31-gpt", imx31_timer_init_dt); +CLOCKSOURCE_OF_DECLARE(imx25_timer, "fsl,imx25-gpt", imx31_timer_init_dt); +CLOCKSOURCE_OF_DECLARE(imx50_timer, "fsl,imx50-gpt", imx31_timer_init_dt); +CLOCKSOURCE_OF_DECLARE(imx51_timer, "fsl,imx51-gpt", imx31_timer_init_dt); +CLOCKSOURCE_OF_DECLARE(imx53_timer, "fsl,imx53-gpt", imx31_timer_init_dt); +CLOCKSOURCE_OF_DECLARE(imx6q_timer, "fsl,imx6q-gpt", imx31_timer_init_dt); +CLOCKSOURCE_OF_DECLARE(imx6dl_timer, "fsl,imx6dl-gpt", imx6dl_timer_init_dt); +CLOCKSOURCE_OF_DECLARE(imx6sl_timer, "fsl,imx6sl-gpt", imx6dl_timer_init_dt); +CLOCKSOURCE_OF_DECLARE(imx6sx_timer, "fsl,imx6sx-gpt", imx6dl_timer_init_dt); diff --git a/drivers/clocksource/timer-integrator-ap.c b/drivers/clocksource/timer-integrator-ap.c index 675faceab5d6..df6e672afc04 100644 --- a/drivers/clocksource/timer-integrator-ap.c +++ b/drivers/clocksource/timer-integrator-ap.c @@ -232,5 +232,5 @@ static int __init integrator_ap_timer_init_of(struct device_node *node) return 0; } -CLOCKSOURCE_OF_DECLARE_RET(integrator_ap_timer, "arm,integrator-timer", +CLOCKSOURCE_OF_DECLARE(integrator_ap_timer, "arm,integrator-timer", integrator_ap_timer_init_of); diff --git a/drivers/clocksource/timer-keystone.c b/drivers/clocksource/timer-keystone.c index 4199823b2ec9..ab68a47ab3b4 100644 --- a/drivers/clocksource/timer-keystone.c +++ b/drivers/clocksource/timer-keystone.c @@ -226,5 +226,5 @@ err: return error; } -CLOCKSOURCE_OF_DECLARE_RET(keystone_timer, "ti,keystone-timer", +CLOCKSOURCE_OF_DECLARE(keystone_timer, "ti,keystone-timer", keystone_timer_init); diff --git a/drivers/clocksource/timer-nps.c b/drivers/clocksource/timer-nps.c index b5c7b2bd77bd..70c149af8ee0 100644 --- a/drivers/clocksource/timer-nps.c +++ b/drivers/clocksource/timer-nps.c @@ -96,5 +96,5 @@ static int __init nps_timer_init(struct device_node *node) return nps_setup_clocksource(node, clk); } -CLOCKSOURCE_OF_DECLARE_RET(ezchip_nps400_clksrc, "ezchip,nps400-timer", - nps_timer_init); +CLOCKSOURCE_OF_DECLARE(ezchip_nps400_clksrc, "ezchip,nps400-timer", + nps_timer_init); diff --git a/drivers/clocksource/timer-oxnas-rps.c b/drivers/clocksource/timer-oxnas-rps.c index 0d99f40ee757..bd887e2a8cf8 100644 --- a/drivers/clocksource/timer-oxnas-rps.c +++ b/drivers/clocksource/timer-oxnas-rps.c @@ -293,5 +293,5 @@ err_alloc: return ret; } -CLOCKSOURCE_OF_DECLARE_RET(ox810se_rps, - "oxsemi,ox810se-rps-timer", oxnas_rps_timer_init); +CLOCKSOURCE_OF_DECLARE(ox810se_rps, + "oxsemi,ox810se-rps-timer", oxnas_rps_timer_init); diff --git a/drivers/clocksource/timer-prima2.c b/drivers/clocksource/timer-prima2.c index 7b1084d0b45e..dae8a66301d7 100644 --- a/drivers/clocksource/timer-prima2.c +++ b/drivers/clocksource/timer-prima2.c @@ -246,5 +246,5 @@ static int __init sirfsoc_prima2_timer_init(struct device_node *np) return 0; } -CLOCKSOURCE_OF_DECLARE_RET(sirfsoc_prima2_timer, +CLOCKSOURCE_OF_DECLARE(sirfsoc_prima2_timer, "sirf,prima2-tick", sirfsoc_prima2_timer_init); diff --git a/drivers/clocksource/timer-sp804.c b/drivers/clocksource/timer-sp804.c index 3dc47efc9298..d07863388e05 100644 --- a/drivers/clocksource/timer-sp804.c +++ b/drivers/clocksource/timer-sp804.c @@ -287,7 +287,7 @@ err: iounmap(base); return ret; } -CLOCKSOURCE_OF_DECLARE_RET(sp804, "arm,sp804", sp804_of_init); +CLOCKSOURCE_OF_DECLARE(sp804, "arm,sp804", sp804_of_init); static int __init integrator_cp_of_init(struct device_node *np) { @@ -335,4 +335,4 @@ err: iounmap(base); return ret; } -CLOCKSOURCE_OF_DECLARE_RET(intcp, "arm,integrator-cp-timer", integrator_cp_of_init); +CLOCKSOURCE_OF_DECLARE(intcp, "arm,integrator-cp-timer", integrator_cp_of_init); diff --git a/drivers/clocksource/timer-stm32.c b/drivers/clocksource/timer-stm32.c index d5bf352905c8..1b2574c4fb97 100644 --- a/drivers/clocksource/timer-stm32.c +++ b/drivers/clocksource/timer-stm32.c @@ -187,4 +187,4 @@ err_clk_get: return ret; } -CLOCKSOURCE_OF_DECLARE_RET(stm32, "st,stm32-timer", stm32_clockevent_init); +CLOCKSOURCE_OF_DECLARE(stm32, "st,stm32-timer", stm32_clockevent_init); diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c index f0a3ffbab431..c184eb84101e 100644 --- a/drivers/clocksource/timer-sun5i.c +++ b/drivers/clocksource/timer-sun5i.c @@ -346,7 +346,7 @@ static int __init sun5i_timer_init(struct device_node *node) return sun5i_setup_clockevent(node, timer_base, clk, irq); } -CLOCKSOURCE_OF_DECLARE_RET(sun5i_a13, "allwinner,sun5i-a13-hstimer", +CLOCKSOURCE_OF_DECLARE(sun5i_a13, "allwinner,sun5i-a13-hstimer", sun5i_timer_init); -CLOCKSOURCE_OF_DECLARE_RET(sun7i_a20, "allwinner,sun7i-a20-hstimer", +CLOCKSOURCE_OF_DECLARE(sun7i_a20, "allwinner,sun7i-a20-hstimer", sun5i_timer_init); diff --git a/drivers/clocksource/timer-ti-32k.c b/drivers/clocksource/timer-ti-32k.c index e4ad3c6e03f9..92b7e390f6c8 100644 --- a/drivers/clocksource/timer-ti-32k.c +++ b/drivers/clocksource/timer-ti-32k.c @@ -124,5 +124,5 @@ static int __init ti_32k_timer_init(struct device_node *np) return 0; } -CLOCKSOURCE_OF_DECLARE_RET(ti_32k_timer, "ti,omap-counter32k", +CLOCKSOURCE_OF_DECLARE(ti_32k_timer, "ti,omap-counter32k", ti_32k_timer_init); diff --git a/drivers/clocksource/timer-u300.c b/drivers/clocksource/timer-u300.c index a6a0dec41faa..704e40c6f151 100644 --- a/drivers/clocksource/timer-u300.c +++ b/drivers/clocksource/timer-u300.c @@ -458,5 +458,5 @@ static int __init u300_timer_init_of(struct device_node *np) return 0; } -CLOCKSOURCE_OF_DECLARE_RET(u300_timer, "stericsson,u300-apptimer", +CLOCKSOURCE_OF_DECLARE(u300_timer, "stericsson,u300-apptimer", u300_timer_init_of); diff --git a/drivers/clocksource/versatile.c b/drivers/clocksource/versatile.c index 8daeffac300b..220b490a8142 100644 --- a/drivers/clocksource/versatile.c +++ b/drivers/clocksource/versatile.c @@ -38,7 +38,7 @@ static int __init versatile_sched_clock_init(struct device_node *node) return 0; } -CLOCKSOURCE_OF_DECLARE_RET(vexpress, "arm,vexpress-sysreg", +CLOCKSOURCE_OF_DECLARE(vexpress, "arm,vexpress-sysreg", versatile_sched_clock_init); -CLOCKSOURCE_OF_DECLARE_RET(versatile, "arm,versatile-sysreg", +CLOCKSOURCE_OF_DECLARE(versatile, "arm,versatile-sysreg", versatile_sched_clock_init); diff --git a/drivers/clocksource/vf_pit_timer.c b/drivers/clocksource/vf_pit_timer.c index ca4dff4d5684..55d8d8402d90 100644 --- a/drivers/clocksource/vf_pit_timer.c +++ b/drivers/clocksource/vf_pit_timer.c @@ -201,4 +201,4 @@ static int __init pit_timer_init(struct device_node *np) return pit_clockevent_init(clk_rate, irq); } -CLOCKSOURCE_OF_DECLARE_RET(vf610, "fsl,vf610-pit", pit_timer_init); +CLOCKSOURCE_OF_DECLARE(vf610, "fsl,vf610-pit", pit_timer_init); diff --git a/drivers/clocksource/vt8500_timer.c b/drivers/clocksource/vt8500_timer.c index 1bc8707b4f9d..b15069483fbd 100644 --- a/drivers/clocksource/vt8500_timer.c +++ b/drivers/clocksource/vt8500_timer.c @@ -165,4 +165,4 @@ static int __init vt8500_timer_init(struct device_node *np) return 0; } -CLOCKSOURCE_OF_DECLARE_RET(vt8500, "via,vt8500-timer", vt8500_timer_init); +CLOCKSOURCE_OF_DECLARE(vt8500, "via,vt8500-timer", vt8500_timer_init); diff --git a/drivers/clocksource/zevio-timer.c b/drivers/clocksource/zevio-timer.c index cb4cf056d56d..9a53f5ef6157 100644 --- a/drivers/clocksource/zevio-timer.c +++ b/drivers/clocksource/zevio-timer.c @@ -215,4 +215,4 @@ static int __init zevio_timer_init(struct device_node *node) return zevio_timer_add(node); } -CLOCKSOURCE_OF_DECLARE_RET(zevio_timer, "lsi,zevio-timer", zevio_timer_init); +CLOCKSOURCE_OF_DECLARE(zevio_timer, "lsi,zevio-timer", zevio_timer_init); diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 8c6c626285c0..6a67ab94b553 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -173,7 +173,6 @@ *(__##name##_of_table_end) #define CLKSRC_OF_TABLES() OF_TABLE(CONFIG_CLKSRC_OF, clksrc) -#define CLKSRC_RET_OF_TABLES() OF_TABLE(CONFIG_CLKSRC_OF, clksrc_ret) #define IRQCHIP_OF_MATCH_TABLE() OF_TABLE(CONFIG_IRQCHIP, irqchip) #define CLK_OF_TABLES() OF_TABLE(CONFIG_COMMON_CLK, clk) #define IOMMU_OF_TABLES() OF_TABLE(CONFIG_OF_IOMMU, iommu) @@ -532,7 +531,6 @@ CLK_OF_TABLES() \ RESERVEDMEM_OF_TABLES() \ CLKSRC_OF_TABLES() \ - CLKSRC_RET_OF_TABLES() \ IOMMU_OF_TABLES() \ CPU_METHOD_OF_TABLES() \ CPUIDLE_METHOD_OF_TABLES() \ diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 15c3839850f4..08398182f56e 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -244,10 +244,7 @@ extern int clocksource_mmio_init(void __iomem *, const char *, extern int clocksource_i8253_init(void); #define CLOCKSOURCE_OF_DECLARE(name, compat, fn) \ - OF_DECLARE_1(clksrc, name, compat, fn) - -#define CLOCKSOURCE_OF_DECLARE_RET(name, compat, fn) \ - OF_DECLARE_1_RET(clksrc_ret, name, compat, fn) + OF_DECLARE_1_RET(clksrc, name, compat, fn) #ifdef CONFIG_CLKSRC_PROBE extern void clocksource_probe(void); -- cgit v1.2.3 From 2ea879a7cf544e05c7cd0bfd569857ec2a8a75a9 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 2 Jun 2016 18:35:38 +0200 Subject: clocksource/drivers/bcm2835: Add the COMPILE_TEST option Change the Kconfig option logic to fullfil with the current approach. A new Kconfig option is added, CONFIG_BCM2835_TIMER and is selected by the platform. Then the clocksource's Kconfig is changed to make this option selectable by the user if the COMPILE_TEST option is set. Otherwise, it is up to the platform's Kconfig to select the timer. Signed-off-by: Daniel Lezcano --- arch/arm/mach-bcm/Kconfig | 1 + drivers/clocksource/Kconfig | 7 +++++++ drivers/clocksource/Makefile | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-bcm/Kconfig b/arch/arm/mach-bcm/Kconfig index 68ab6412392a..1284ce1d03e0 100644 --- a/arch/arm/mach-bcm/Kconfig +++ b/arch/arm/mach-bcm/Kconfig @@ -143,6 +143,7 @@ config ARCH_BCM2835 select ARM_TIMER_SP804 select HAVE_ARM_ARCH_TIMER if ARCH_MULTI_V7 select CLKSRC_OF + select BCM2835_TIMER select PINCTRL select PINCTRL_BCM2835 help diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index d4b9e04ee1a4..0275b166a598 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -27,6 +27,13 @@ config CLKBLD_I8253 config CLKSRC_MMIO bool +config BCM2835_TIMER + bool "BCM2835 timer driver" if COMPILE_TEST + depends on GENERIC_CLOCKEVENTS + select CLKSRC_MMIO + help + Enables the support for the BCM2835 timer driver. + config DIGICOLOR_TIMER bool "Digicolor timer driver" if COMPILE_TEST depends on GENERIC_CLOCKEVENTS diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index bc66981c8cc7..008d8a0da752 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -19,7 +19,7 @@ obj-$(CONFIG_CLKSRC_NOMADIK_MTU) += nomadik-mtu.o obj-$(CONFIG_CLKSRC_DBX500_PRCMU) += clksrc-dbx500-prcmu.o obj-$(CONFIG_ARMADA_370_XP_TIMER) += time-armada-370-xp.o obj-$(CONFIG_ORION_TIMER) += time-orion.o -obj-$(CONFIG_ARCH_BCM2835) += bcm2835_timer.o +obj-$(CONFIG_BCM2835_TIMER) += bcm2835_timer.o obj-$(CONFIG_ARCH_CLPS711X) += clps711x-timer.o obj-$(CONFIG_ARCH_ATLAS7) += timer-atlas7.o obj-$(CONFIG_ARCH_MOXART) += moxart_timer.o -- cgit v1.2.3 From 1cad71e35f88bd41b954e9984c7d2a8ce3924db0 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 2 Jun 2016 19:20:36 +0200 Subject: clocksource/drivers/bcm_kona: Add the COMPILE_TEST option Change the Kconfig option logic to fullfil with the current approach. A new Kconfig option is added, CONFIG_BCM_KONA_TIMER and is selected by the platform. Then the clocksource's Kconfig is changed to make this option selectable by the user if the COMPILE_TEST option is set. Otherwise, it is up to the platform's Kconfig to select the timer. Signed-off-by: Daniel Lezcano --- arch/arm/mach-bcm/Kconfig | 1 + drivers/clocksource/Kconfig | 7 +++++++ drivers/clocksource/Makefile | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-bcm/Kconfig b/arch/arm/mach-bcm/Kconfig index 1284ce1d03e0..4f1709b31822 100644 --- a/arch/arm/mach-bcm/Kconfig +++ b/arch/arm/mach-bcm/Kconfig @@ -89,6 +89,7 @@ config ARCH_BCM_MOBILE select HAVE_ARM_ARCH_TIMER select PINCTRL select ARCH_BCM_MOBILE_SMP if SMP + select BCM_KONA_TIMER help This enables support for systems based on Broadcom mobile SoCs. diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 01ecd8833ed1..a3cccf568a9f 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -34,6 +34,13 @@ config BCM2835_TIMER help Enables the support for the BCM2835 timer driver. +config BCM_KONA_TIMER + bool "BCM mobile timer driver" if COMPILE_TEST + depends on GENERIC_CLOCKEVENTS + select CLKSRC_MMIO + help + Enables the support for the BCM Kona mobile timer driver. + config DIGICOLOR_TIMER bool "Digicolor timer driver" if COMPILE_TEST depends on GENERIC_CLOCKEVENTS diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index 008d8a0da752..df99c9279b6f 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -33,7 +33,7 @@ obj-$(CONFIG_MESON6_TIMER) += meson6_timer.o obj-$(CONFIG_TEGRA_TIMER) += tegra20_timer.o obj-$(CONFIG_VT8500_TIMER) += vt8500_timer.o obj-$(CONFIG_ARCH_NSPIRE) += zevio-timer.o -obj-$(CONFIG_ARCH_BCM_MOBILE) += bcm_kona_timer.o +obj-$(CONFIG_BCM_KONA_TIMER) += bcm_kona_timer.o obj-$(CONFIG_CADENCE_TTC_TIMER) += cadence_ttc_timer.o obj-$(CONFIG_CLKSRC_EFM32) += time-efm32.o obj-$(CONFIG_CLKSRC_STM32) += timer-stm32.o -- cgit v1.2.3 From ecf0efdc98cd46886fa119492f052ab3eaba9ddf Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 2 Jun 2016 20:06:54 +0200 Subject: clocksource/drivers/clps_711x: Add the COMPILE_TEST option Change the Kconfig option logic to fullfil with the current approach. A new Kconfig option is added, CONFIG_CLPS711X_TIMER and is selected by the platform. Then the clocksource's Kconfig is changed to make this option selectable by the user if the COMPILE_TEST option is set. Otherwise, it is up to the platform's Kconfig to select the timer. Signed-off-by: Daniel Lezcano --- arch/arm/Kconfig | 2 +- drivers/clocksource/Kconfig | 7 +++++++ drivers/clocksource/Makefile | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 90542db1220d..f0636ec94903 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -358,10 +358,10 @@ config ARCH_CLPS711X bool "Cirrus Logic CLPS711x/EP721x/EP731x-based" select ARCH_REQUIRE_GPIOLIB select AUTO_ZRELADDR - select CLKSRC_MMIO select COMMON_CLK select CPU_ARM720T select GENERIC_CLOCKEVENTS + select CLPS711X_TIMER select MFD_SYSCON select SOC_BUS help diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index a3cccf568a9f..f41eef2fafde 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -155,6 +155,13 @@ config CLKSRC_DBX500_PRCMU help Use the always on PRCMU Timer as clocksource +config CLPS711X_TIMER + bool "Cirrus logic timer driver" if COMPILE_TEST + depends on GENERIC_CLOCKEVENTS + select CLKSRC_MMIO + help + Enables support for the Cirrus Logic PS711 timer. + config CLKSRC_DBX500_PRCMU_SCHED_CLOCK bool "Clocksource PRCMU Timer sched_clock" depends on (CLKSRC_DBX500_PRCMU && !CLKSRC_NOMADIK_MTU_SCHED_CLOCK) diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index df99c9279b6f..25e599c4eeb7 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -20,7 +20,7 @@ obj-$(CONFIG_CLKSRC_DBX500_PRCMU) += clksrc-dbx500-prcmu.o obj-$(CONFIG_ARMADA_370_XP_TIMER) += time-armada-370-xp.o obj-$(CONFIG_ORION_TIMER) += time-orion.o obj-$(CONFIG_BCM2835_TIMER) += bcm2835_timer.o -obj-$(CONFIG_ARCH_CLPS711X) += clps711x-timer.o +obj-$(CONFIG_CLPS711X_TIMER) += clps711x-timer.o obj-$(CONFIG_ARCH_ATLAS7) += timer-atlas7.o obj-$(CONFIG_ARCH_MOXART) += moxart_timer.o obj-$(CONFIG_ARCH_MXS) += mxs_timer.o -- cgit v1.2.3 From b56d5d218499404b2abfd6f33a6d480312bf8a92 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 3 Jun 2016 13:11:39 +0200 Subject: clocksource/drivers/atlas7: Add the COMPILE_TEST option Change the Kconfig option logic to fullfil with the current approach. A new Kconfig option is added, CONFIG_ATLAS7_TIMER and is selected by the platform. Then the clocksource's Kconfig is changed to make this option selectable by the user if the COMPILE_TEST option is set. Otherwise, it is up to the platform's Kconfig to select the timer. Signed-off-by: Daniel Lezcano --- arch/arm/mach-prima2/Kconfig | 1 + drivers/clocksource/Kconfig | 7 +++++++ drivers/clocksource/Makefile | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-prima2/Kconfig b/arch/arm/mach-prima2/Kconfig index 0cf4426183cf..2db8d24ab67a 100644 --- a/arch/arm/mach-prima2/Kconfig +++ b/arch/arm/mach-prima2/Kconfig @@ -28,6 +28,7 @@ config ARCH_ATLAS7 default y select ARM_GIC select CPU_V7 + select ATLAS7_TIMER select HAVE_ARM_SCU if SMP select HAVE_SMP help diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index f41eef2fafde..669dd3cd0291 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -162,6 +162,13 @@ config CLPS711X_TIMER help Enables support for the Cirrus Logic PS711 timer. +config ATLAS7_TIMER + bool "Atlas7 timer driver" if COMPILE_TEST + depends on GENERIC_CLOCKEVENTS + select CLKSRC_MMIO + help + Enables support for the Atlas7 timer. + config CLKSRC_DBX500_PRCMU_SCHED_CLOCK bool "Clocksource PRCMU Timer sched_clock" depends on (CLKSRC_DBX500_PRCMU && !CLKSRC_NOMADIK_MTU_SCHED_CLOCK) diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index 25e599c4eeb7..7abf144ac6cc 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -21,7 +21,7 @@ obj-$(CONFIG_ARMADA_370_XP_TIMER) += time-armada-370-xp.o obj-$(CONFIG_ORION_TIMER) += time-orion.o obj-$(CONFIG_BCM2835_TIMER) += bcm2835_timer.o obj-$(CONFIG_CLPS711X_TIMER) += clps711x-timer.o -obj-$(CONFIG_ARCH_ATLAS7) += timer-atlas7.o +obj-$(CONFIG_ATLAS7_TIMER) += timer-atlas7.o obj-$(CONFIG_ARCH_MOXART) += moxart_timer.o obj-$(CONFIG_ARCH_MXS) += mxs_timer.o obj-$(CONFIG_CLKSRC_PXA) += pxa_timer.o -- cgit v1.2.3 From 419be9e36cf2349f15d0b7280ba46be6f9da7a61 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 3 Jun 2016 13:29:03 +0200 Subject: clocksource/drivers/moxart: Add the COMPILE_TEST option Change the Kconfig option logic to fullfil with the current approach. A new Kconfig option is added, CONFIG_MOXART_TIMER and is selected by the platform. Then the clocksource's Kconfig is changed to make this option selectable by the user if the COMPILE_TEST option is set. Otherwise, it is up to the platform's Kconfig to select the timer. Signed-off-by: Daniel Lezcano --- arch/arm/mach-moxart/Kconfig | 2 +- drivers/clocksource/Kconfig | 7 +++++++ drivers/clocksource/Makefile | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-moxart/Kconfig b/arch/arm/mach-moxart/Kconfig index 180d9d216719..ddc79cea32d3 100644 --- a/arch/arm/mach-moxart/Kconfig +++ b/arch/arm/mach-moxart/Kconfig @@ -3,7 +3,7 @@ menuconfig ARCH_MOXART depends on ARCH_MULTI_V4 select CPU_FA526 select ARM_DMA_MEM_BUFFERABLE - select CLKSRC_MMIO + select MOXART_TIMER select GENERIC_IRQ_CHIP select ARCH_REQUIRE_GPIOLIB select PHYLIB if NETDEVICES diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 669dd3cd0291..db2c5ff6957e 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -169,6 +169,13 @@ config ATLAS7_TIMER help Enables support for the Atlas7 timer. +config MOXART_TIMER + bool "Moxart timer driver" if COMPILE_TEST + depends on GENERIC_CLOCKEVENTS + select CLKSRC_MMIO + help + Enables support for the Moxart timer. + config CLKSRC_DBX500_PRCMU_SCHED_CLOCK bool "Clocksource PRCMU Timer sched_clock" depends on (CLKSRC_DBX500_PRCMU && !CLKSRC_NOMADIK_MTU_SCHED_CLOCK) diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index 7abf144ac6cc..e64e37a5f8a0 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -22,7 +22,7 @@ obj-$(CONFIG_ORION_TIMER) += time-orion.o obj-$(CONFIG_BCM2835_TIMER) += bcm2835_timer.o obj-$(CONFIG_CLPS711X_TIMER) += clps711x-timer.o obj-$(CONFIG_ATLAS7_TIMER) += timer-atlas7.o -obj-$(CONFIG_ARCH_MOXART) += moxart_timer.o +obj-$(CONFIG_MOXART_TIMER) += moxart_timer.o obj-$(CONFIG_ARCH_MXS) += mxs_timer.o obj-$(CONFIG_CLKSRC_PXA) += pxa_timer.o obj-$(CONFIG_ARCH_PRIMA2) += timer-prima2.o -- cgit v1.2.3 From d81c50a0360f8a186150b9bb572d5e0514c25ce9 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 3 Jun 2016 13:36:18 +0200 Subject: clocksource/drivers/mxs: Add the COMPILE_TEST option Change the Kconfig option logic to fullfil with the current approach. A new Kconfig option is added, CONFIG_MXS_TIMER and is selected by the platform. Then the clocksource's Kconfig is changed to make this option selectable by the user if the COMPILE_TEST option is set. Otherwise, it is up to the platform's Kconfig to select the timer. Signed-off-by: Daniel Lezcano --- arch/arm/mach-mxs/Kconfig | 2 +- drivers/clocksource/Kconfig | 8 ++++++++ drivers/clocksource/Makefile | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-mxs/Kconfig b/arch/arm/mach-mxs/Kconfig index 84794137b175..68a3a9ec605d 100644 --- a/arch/arm/mach-mxs/Kconfig +++ b/arch/arm/mach-mxs/Kconfig @@ -16,7 +16,7 @@ config ARCH_MXS bool "Freescale MXS (i.MX23, i.MX28) support" depends on ARCH_MULTI_V5 select ARCH_REQUIRE_GPIOLIB - select CLKSRC_MMIO + select MXS_TIMER select PINCTRL select SOC_BUS select SOC_IMX23 diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index db2c5ff6957e..5d70cdfb8f63 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -176,6 +176,14 @@ config MOXART_TIMER help Enables support for the Moxart timer. +config MXS_TIMER + bool "Mxs timer driver" if COMPILE_TEST + depends on GENERIC_CLOCKEVENTS + select CLKSRC_MMIO + select STMP_DEVICE + help + Enables support for the Mxs timer. + config CLKSRC_DBX500_PRCMU_SCHED_CLOCK bool "Clocksource PRCMU Timer sched_clock" depends on (CLKSRC_DBX500_PRCMU && !CLKSRC_NOMADIK_MTU_SCHED_CLOCK) diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index e64e37a5f8a0..1360bbab05e0 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -23,7 +23,7 @@ obj-$(CONFIG_BCM2835_TIMER) += bcm2835_timer.o obj-$(CONFIG_CLPS711X_TIMER) += clps711x-timer.o obj-$(CONFIG_ATLAS7_TIMER) += timer-atlas7.o obj-$(CONFIG_MOXART_TIMER) += moxart_timer.o -obj-$(CONFIG_ARCH_MXS) += mxs_timer.o +obj-$(CONFIG_MXS_TIMER) += mxs_timer.o obj-$(CONFIG_CLKSRC_PXA) += pxa_timer.o obj-$(CONFIG_ARCH_PRIMA2) += timer-prima2.o obj-$(CONFIG_ARCH_U300) += timer-u300.o -- cgit v1.2.3 From f3550d499576c423db0d902930cf8d848fe09744 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 3 Jun 2016 14:28:38 +0200 Subject: clocksource/drivers/prima2: Add the COMPILE_TEST option Change the Kconfig option logic to fullfil with the current approach. A new Kconfig option is added, CONFIG_PRIMA2_TIMER and is selected by the platform. Then the clocksource's Kconfig is changed to make this option selectable by the user if the COMPILE_TEST option is set. Otherwise, it is up to the platform's Kconfig to select the timer. Signed-off-by: Daniel Lezcano --- arch/arm/mach-prima2/Kconfig | 1 + drivers/clocksource/Kconfig | 7 +++++++ drivers/clocksource/Makefile | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-prima2/Kconfig b/arch/arm/mach-prima2/Kconfig index 2db8d24ab67a..9e938f2961cf 100644 --- a/arch/arm/mach-prima2/Kconfig +++ b/arch/arm/mach-prima2/Kconfig @@ -39,6 +39,7 @@ config ARCH_PRIMA2 default y select SIRF_IRQ select ZONE_DMA + select PRIMA2_TIMER help Support for CSR SiRFSoC ARM Cortex A9 Platform diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 5d70cdfb8f63..c350fbd805d7 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -184,6 +184,13 @@ config MXS_TIMER help Enables support for the Mxs timer. +config PRIMA2_TIMER + bool "Prima2 timer driver" if COMPILE_TEST + depends on GENERIC_CLOCKEVENTS + select CLKSRC_MMIO + help + Enables support for the Prima2 timer. + config CLKSRC_DBX500_PRCMU_SCHED_CLOCK bool "Clocksource PRCMU Timer sched_clock" depends on (CLKSRC_DBX500_PRCMU && !CLKSRC_NOMADIK_MTU_SCHED_CLOCK) diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index 1360bbab05e0..b419d5da11c7 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -25,7 +25,7 @@ obj-$(CONFIG_ATLAS7_TIMER) += timer-atlas7.o obj-$(CONFIG_MOXART_TIMER) += moxart_timer.o obj-$(CONFIG_MXS_TIMER) += mxs_timer.o obj-$(CONFIG_CLKSRC_PXA) += pxa_timer.o -obj-$(CONFIG_ARCH_PRIMA2) += timer-prima2.o +obj-$(CONFIG_PRIMA2_TIMER) += timer-prima2.o obj-$(CONFIG_ARCH_U300) += timer-u300.o obj-$(CONFIG_SUN4I_TIMER) += sun4i_timer.o obj-$(CONFIG_SUN5I_HSTIMER) += timer-sun5i.o -- cgit v1.2.3 From 85f98db4adbbd3ec6b41537d31241b59bf47c66f Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 3 Jun 2016 14:31:16 +0200 Subject: clocksource/drivers/u300: Add the COMPILE_TEST option Change the Kconfig option logic to fullfil with the current approach. A new Kconfig option is added, CONFIG_U300_TIMER and is selected by the platform. Then the clocksource's Kconfig is changed to make this option selectable by the user if the COMPILE_TEST option is set. Otherwise, it is up to the platform's Kconfig to select the timer. Due on the delay specific code, this driver will compile only on the ARM architecture. Signed-off-by: Daniel Lezcano --- arch/arm/mach-u300/Kconfig | 2 +- drivers/clocksource/Kconfig | 8 ++++++++ drivers/clocksource/Makefile | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-u300/Kconfig b/arch/arm/mach-u300/Kconfig index 301a98498453..4fdc3425ffbd 100644 --- a/arch/arm/mach-u300/Kconfig +++ b/arch/arm/mach-u300/Kconfig @@ -4,7 +4,7 @@ menuconfig ARCH_U300 select ARCH_REQUIRE_GPIOLIB select ARM_AMBA select ARM_VIC - select CLKSRC_MMIO + select U300_TIMER select CPU_ARM926T select HAVE_TCM select PINCTRL diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index c350fbd805d7..d425f80b1e42 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -191,6 +191,14 @@ config PRIMA2_TIMER help Enables support for the Prima2 timer. +config U300_TIMER + bool "U300 timer driver" if COMPILE_TEST + depends on GENERIC_CLOCKEVENTS + depends on ARM + select CLKSRC_MMIO + help + Enables support for the U300 timer. + config CLKSRC_DBX500_PRCMU_SCHED_CLOCK bool "Clocksource PRCMU Timer sched_clock" depends on (CLKSRC_DBX500_PRCMU && !CLKSRC_NOMADIK_MTU_SCHED_CLOCK) diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index b419d5da11c7..adbc3a8082d0 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -26,7 +26,7 @@ obj-$(CONFIG_MOXART_TIMER) += moxart_timer.o obj-$(CONFIG_MXS_TIMER) += mxs_timer.o obj-$(CONFIG_CLKSRC_PXA) += pxa_timer.o obj-$(CONFIG_PRIMA2_TIMER) += timer-prima2.o -obj-$(CONFIG_ARCH_U300) += timer-u300.o +obj-$(CONFIG_U300_TIMER) += timer-u300.o obj-$(CONFIG_SUN4I_TIMER) += sun4i_timer.o obj-$(CONFIG_SUN5I_HSTIMER) += timer-sun5i.o obj-$(CONFIG_MESON6_TIMER) += meson6_timer.o -- cgit v1.2.3 From d683b9dcc8a8d743f7b660ff3b77ccfbe652e4b9 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 3 Jun 2016 15:03:21 +0200 Subject: clocksource/drivers/nspire: Add the COMPILE_TEST option Change the Kconfig option logic to fullfil with the current approach. A new Kconfig option is added, CONFIG_NSPIRE_TIMER and is selected by the platform. Then the clocksource's Kconfig is changed to make this option selectable by the user if the COMPILE_TEST option is set. Otherwise, it is up to the platform's Kconfig to select the timer. Signed-off-by: Daniel Lezcano --- arch/arm/mach-nspire/Kconfig | 1 + drivers/clocksource/Kconfig | 7 +++++++ drivers/clocksource/Makefile | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-nspire/Kconfig b/arch/arm/mach-nspire/Kconfig index bc41f26c1a12..d4985305cab2 100644 --- a/arch/arm/mach-nspire/Kconfig +++ b/arch/arm/mach-nspire/Kconfig @@ -7,5 +7,6 @@ config ARCH_NSPIRE select ARM_AMBA select ARM_VIC select ARM_TIMER_SP804 + select NSPIRE_TIMER help This enables support for systems using the TI-NSPIRE CPU diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index d425f80b1e42..0cfc4bf1c8ac 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -199,6 +199,13 @@ config U300_TIMER help Enables support for the U300 timer. +config NSPIRE_TIMER + bool "NSpire timer driver" if COMPILE_TEST + depends on GENERIC_CLOCKEVENTS + select CLKSRC_MMIO + help + Enables support for the Nspire timer. + config CLKSRC_DBX500_PRCMU_SCHED_CLOCK bool "Clocksource PRCMU Timer sched_clock" depends on (CLKSRC_DBX500_PRCMU && !CLKSRC_NOMADIK_MTU_SCHED_CLOCK) diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index adbc3a8082d0..d888c986cc43 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -32,7 +32,7 @@ obj-$(CONFIG_SUN5I_HSTIMER) += timer-sun5i.o obj-$(CONFIG_MESON6_TIMER) += meson6_timer.o obj-$(CONFIG_TEGRA_TIMER) += tegra20_timer.o obj-$(CONFIG_VT8500_TIMER) += vt8500_timer.o -obj-$(CONFIG_ARCH_NSPIRE) += zevio-timer.o +obj-$(CONFIG_NSPIRE_TIMER) += zevio-timer.o obj-$(CONFIG_BCM_KONA_TIMER) += bcm_kona_timer.o obj-$(CONFIG_CADENCE_TTC_TIMER) += cadence_ttc_timer.o obj-$(CONFIG_CLKSRC_EFM32) += time-efm32.o -- cgit v1.2.3 From c12547a00dfd3aaacfd5ce362ee4b9585c320054 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 3 Jun 2016 15:05:05 +0200 Subject: clocksource/drivers/keystone: Add the COMPILE_TEST option Change the Kconfig option logic to fullfil with the current approach. A new Kconfig option is added, CONFIG_KEYSTONE_TIMER and is selected by the platform. Then the clocksource's Kconfig is changed to make this option selectable by the user if the COMPILE_TEST option is set. Otherwise, it is up to the platform's Kconfig to select the timer. Signed-off-by: Daniel Lezcano --- arch/arm/mach-keystone/Kconfig | 2 +- drivers/clocksource/Kconfig | 8 ++++++++ drivers/clocksource/Makefile | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-keystone/Kconfig b/arch/arm/mach-keystone/Kconfig index ea955f6db8b7..bac577badc7e 100644 --- a/arch/arm/mach-keystone/Kconfig +++ b/arch/arm/mach-keystone/Kconfig @@ -4,7 +4,7 @@ config ARCH_KEYSTONE depends on ARM_PATCH_PHYS_VIRT select ARM_GIC select HAVE_ARM_ARCH_TIMER - select CLKSRC_MMIO + select KEYSTONE_TIMER select ARM_ERRATA_798181 if SMP select COMMON_CLK_KEYSTONE select ARCH_SUPPORTS_BIG_ENDIAN diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 0cfc4bf1c8ac..7e5709a1e4f0 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -206,6 +206,14 @@ config NSPIRE_TIMER help Enables support for the Nspire timer. +config KEYSTONE_TIMER + bool "Keystone timer driver" if COMPILE_TEST + depends on GENERIC_CLOCKEVENTS + depends on ARM || ARM64 + select CLKSRC_MMIO + help + Enables support for the Keystone timer. + config CLKSRC_DBX500_PRCMU_SCHED_CLOCK bool "Clocksource PRCMU Timer sched_clock" depends on (CLKSRC_DBX500_PRCMU && !CLKSRC_NOMADIK_MTU_SCHED_CLOCK) diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index d888c986cc43..a8184319c41f 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -56,7 +56,7 @@ obj-$(CONFIG_ARMV7M_SYSTICK) += armv7m_systick.o obj-$(CONFIG_ARM_TIMER_SP804) += timer-sp804.o obj-$(CONFIG_CLKSRC_METAG_GENERIC) += metag_generic.o obj-$(CONFIG_ARCH_HAS_TICK_BROADCAST) += dummy_timer.o -obj-$(CONFIG_ARCH_KEYSTONE) += timer-keystone.o +obj-$(CONFIG_KEYSTONE_TIMER) += timer-keystone.o obj-$(CONFIG_ARCH_INTEGRATOR_AP) += timer-integrator-ap.o obj-$(CONFIG_CLKSRC_VERSATILE) += versatile.o obj-$(CONFIG_CLKSRC_MIPS_GIC) += mips-gic-timer.o -- cgit v1.2.3 From 568c0342e494fa4c05377c6c83c653afa350985a Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 3 Jun 2016 15:11:21 +0200 Subject: clocksource/drivers/integrator-ap: Add the COMPILE_TEST option Change the Kconfig option logic to fullfil with the current approach. A new Kconfig option is added, CONFIG_INTEGRATOR_AP_TIMER and is selected by the platform. Then the clocksource's Kconfig is changed to make this option selectable by the user if the COMPILE_TEST option is set. Otherwise, it is up to the platform's Kconfig to select the timer. Signed-off-by: Daniel Lezcano --- arch/arm/mach-integrator/Kconfig | 2 +- drivers/clocksource/Kconfig | 7 +++++++ drivers/clocksource/Makefile | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-integrator/Kconfig b/arch/arm/mach-integrator/Kconfig index b2a85ba13f08..291262e5aeaf 100644 --- a/arch/arm/mach-integrator/Kconfig +++ b/arch/arm/mach-integrator/Kconfig @@ -20,7 +20,7 @@ if ARCH_INTEGRATOR config ARCH_INTEGRATOR_AP bool "Support Integrator/AP and Integrator/PP2 platforms" - select CLKSRC_MMIO + select INTEGRATOR_AP_TIMER select MIGHT_HAVE_PCI select SERIAL_AMBA_PL010 if TTY select SERIAL_AMBA_PL010_CONSOLE if TTY diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 7e5709a1e4f0..8055b37347ff 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -214,6 +214,13 @@ config KEYSTONE_TIMER help Enables support for the Keystone timer. +config INTEGRATOR_AP_TIMER + bool "Integrator-ap timer driver" if COMPILE_TEST + depends on GENERIC_CLOCKEVENTS + select CLKSRC_MMIO + help + Enables support for the Integrator-ap timer. + config CLKSRC_DBX500_PRCMU_SCHED_CLOCK bool "Clocksource PRCMU Timer sched_clock" depends on (CLKSRC_DBX500_PRCMU && !CLKSRC_NOMADIK_MTU_SCHED_CLOCK) diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index a8184319c41f..fd9d6df0bbc0 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -57,7 +57,7 @@ obj-$(CONFIG_ARM_TIMER_SP804) += timer-sp804.o obj-$(CONFIG_CLKSRC_METAG_GENERIC) += metag_generic.o obj-$(CONFIG_ARCH_HAS_TICK_BROADCAST) += dummy_timer.o obj-$(CONFIG_KEYSTONE_TIMER) += timer-keystone.o -obj-$(CONFIG_ARCH_INTEGRATOR_AP) += timer-integrator-ap.o +obj-$(CONFIG_INTEGRATOR_AP_TIMER) += timer-integrator-ap.o obj-$(CONFIG_CLKSRC_VERSATILE) += versatile.o obj-$(CONFIG_CLKSRC_MIPS_GIC) += mips-gic-timer.o obj-$(CONFIG_CLKSRC_TANGO_XTAL) += tango_xtal.o -- cgit v1.2.3 From eee25ab19de632af1ab4d2ac50bfc5006802e664 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 28 Jun 2016 22:11:14 +0200 Subject: ARM: dts: sun7i: Fix pll3x2 and pll7x2 not having a parent clock Fix pll3x2 and pll7x2 not having a parent clock, specifically this fixes the kernel turning of pll3 while simplefb is using it when uboot has configured things to use pll3x2 as lcd ch clk parent. Signed-off-by: Hans de Goede Signed-off-by: Maxime Ripard --- arch/arm/boot/dts/sun7i-a20.dtsi | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index f480051c1f8a..2c34bbbb9570 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -232,6 +232,7 @@ pll3x2: pll3x2_clk { #clock-cells = <0>; compatible = "fixed-factor-clock"; + clocks = <&pll3>; clock-div = <1>; clock-mult = <2>; clock-output-names = "pll3-2x"; @@ -273,6 +274,7 @@ pll7x2: pll7x2_clk { #clock-cells = <0>; compatible = "fixed-factor-clock"; + clocks = <&pll7>; clock-div = <1>; clock-mult = <2>; clock-output-names = "pll7-2x"; -- cgit v1.2.3 From b44439e42912b9dcc510a0ff891809ea2cadc46b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 20 Jun 2016 18:08:22 +0200 Subject: ARM: mvebu: compile pm code conditionally A cleanup to include the headers correctly caused another build problem: arch/arm/mach-mvebu/kirkwood-pm.c:70:13: error: redefinition of 'kirkwood_pm_init' arch/arm/mach-mvebu/kirkwood-pm.h:23:20: note: previous definition of 'kirkwood_pm_init' was here The underlying issue is that kirkwood-pm.o is not actually meant to be used when CONFIG_PM is disabled, so we should also leave it out of the Makefile. The same seems to be true for the PM code in MACH_MVEBU_V7, and I'm treating it the same way here. Signed-off-by: Arnd Bergmann Fixes: d705c1a66e15 ("ARM: Kirkwood: fix kirkwood_pm_init() declaration/type") Signed-off-by: Gregory CLEMENT --- arch/arm/mach-mvebu/Makefile | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-mvebu/Makefile b/arch/arm/mach-mvebu/Makefile index ecf9e0c3b107..e53c6cfcab51 100644 --- a/arch/arm/mach-mvebu/Makefile +++ b/arch/arm/mach-mvebu/Makefile @@ -7,9 +7,15 @@ CFLAGS_pmsu.o := -march=armv7-a obj-$(CONFIG_MACH_MVEBU_ANY) += system-controller.o mvebu-soc-id.o ifeq ($(CONFIG_MACH_MVEBU_V7),y) -obj-y += cpu-reset.o board-v7.o coherency.o coherency_ll.o pmsu.o pmsu_ll.o pm.o pm-board.o +obj-y += cpu-reset.o board-v7.o coherency.o coherency_ll.o pmsu.o pmsu_ll.o + +obj-$(CONFIG_PM) += pm.o pm-board.o obj-$(CONFIG_SMP) += platsmp.o headsmp.o platsmp-a9.o headsmp-a9.o endif obj-$(CONFIG_MACH_DOVE) += dove.o -obj-$(CONFIG_MACH_KIRKWOOD) += kirkwood.o kirkwood-pm.o + +ifeq ($(CONFIG_MACH_KIRKWOOD),y) +obj-y += kirkwood.o +obj-$(CONFIG_PM) += kirkwood-pm.o +endif -- cgit v1.2.3 From 65c0554b73c920023cc8998802e508b798113b46 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 30 Jun 2016 18:11:41 +0200 Subject: x86/power/64: Fix kernel text mapping corruption during image restoration Logan Gunthorpe reports that hibernation stopped working reliably for him after commit ab76f7b4ab23 (x86/mm: Set NX on gap between __ex_table and rodata). That turns out to be a consequence of a long-standing issue with the 64-bit image restoration code on x86, which is that the temporary page tables set up by it to avoid page tables corruption when the last bits of the image kernel's memory contents are copied into their original page frames re-use the boot kernel's text mapping, but that mapping may very well get corrupted just like any other part of the page tables. Of course, if that happens, the final jump to the image kernel's entry point will go to nowhere. The exact reason why commit ab76f7b4ab23 matters here is that it sometimes causes a PMD of a large page to be split into PTEs that are allocated dynamically and get corrupted during image restoration as described above. To fix that issue note that the code copying the last bits of the image kernel's memory contents to the page frames occupied by them previoulsy doesn't use the kernel text mapping, because it runs from a special page covered by the identity mapping set up for that code from scratch. Hence, the kernel text mapping is only needed before that code starts to run and then it will only be used just for the final jump to the image kernel's entry point. Accordingly, the temporary page tables set up in swsusp_arch_resume() on x86-64 need to contain the kernel text mapping too. That mapping is only going to be used for the final jump to the image kernel, so it only needs to cover the image kernel's entry point, because the first thing the image kernel does after getting control back is to switch over to its own original page tables. Moreover, the virtual address of the image kernel's entry point in that mapping has to be the same as the one mapped by the image kernel's page tables. With that in mind, modify the x86-64's arch_hibernation_header_save() and arch_hibernation_header_restore() routines to pass the physical address of the image kernel's entry point (in addition to its virtual address) to the boot kernel (a small piece of assembly code involved in passing the entry point's virtual address to the image kernel is not necessary any more after that, so drop it). Update RESTORE_MAGIC too to reflect the image header format change. Next, in set_up_temporary_mappings(), use the physical and virtual addresses of the image kernel's entry point passed in the image header to set up a minimum kernel text mapping (using memory pages that won't be overwritten by the image kernel's memory contents) that will map those addresses to each other as appropriate. This makes the concern about the possible corruption of the original boot kernel text mapping go away and if the the minimum kernel text mapping used for the final jump marks the image kernel's entry point memory as executable, the jump to it is guaraneed to succeed. Fixes: ab76f7b4ab23 (x86/mm: Set NX on gap between __ex_table and rodata) Link: http://marc.info/?l=linux-pm&m=146372852823760&w=2 Reported-by: Logan Gunthorpe Reported-and-tested-by: Borislav Petkov Tested-by: Kees Cook Signed-off-by: Rafael J. Wysocki --- arch/x86/power/hibernate_64.c | 97 ++++++++++++++++++++++++++++++++++----- arch/x86/power/hibernate_asm_64.S | 55 ++++++++++------------ 2 files changed, 109 insertions(+), 43 deletions(-) (limited to 'arch') diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index 009947d419a6..f2b5e6a5cf95 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -19,6 +19,7 @@ #include #include #include +#include /* Defined in hibernate_asm_64.S */ extern asmlinkage __visible int restore_image(void); @@ -28,6 +29,7 @@ extern asmlinkage __visible int restore_image(void); * kernel's text (this value is passed in the image header). */ unsigned long restore_jump_address __visible; +unsigned long jump_address_phys; /* * Value of the cr3 register from before the hibernation (this value is passed @@ -37,7 +39,43 @@ unsigned long restore_cr3 __visible; pgd_t *temp_level4_pgt __visible; -void *relocated_restore_code __visible; +unsigned long relocated_restore_code __visible; + +static int set_up_temporary_text_mapping(void) +{ + pmd_t *pmd; + pud_t *pud; + + /* + * The new mapping only has to cover the page containing the image + * kernel's entry point (jump_address_phys), because the switch over to + * it is carried out by relocated code running from a page allocated + * specifically for this purpose and covered by the identity mapping, so + * the temporary kernel text mapping is only needed for the final jump. + * Moreover, in that mapping the virtual address of the image kernel's + * entry point must be the same as its virtual address in the image + * kernel (restore_jump_address), so the image kernel's + * restore_registers() code doesn't find itself in a different area of + * the virtual address space after switching over to the original page + * tables used by the image kernel. + */ + pud = (pud_t *)get_safe_page(GFP_ATOMIC); + if (!pud) + return -ENOMEM; + + pmd = (pmd_t *)get_safe_page(GFP_ATOMIC); + if (!pmd) + return -ENOMEM; + + set_pmd(pmd + pmd_index(restore_jump_address), + __pmd((jump_address_phys & PMD_MASK) | __PAGE_KERNEL_LARGE_EXEC)); + set_pud(pud + pud_index(restore_jump_address), + __pud(__pa(pmd) | _KERNPG_TABLE)); + set_pgd(temp_level4_pgt + pgd_index(restore_jump_address), + __pgd(__pa(pud) | _KERNPG_TABLE)); + + return 0; +} static void *alloc_pgt_page(void *context) { @@ -59,9 +97,10 @@ static int set_up_temporary_mappings(void) if (!temp_level4_pgt) return -ENOMEM; - /* It is safe to reuse the original kernel mapping */ - set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map), - init_level4_pgt[pgd_index(__START_KERNEL_map)]); + /* Prepare a temporary mapping for the kernel text */ + result = set_up_temporary_text_mapping(); + if (result) + return result; /* Set up the direct mapping from scratch */ for (i = 0; i < nr_pfn_mapped; i++) { @@ -78,19 +117,50 @@ static int set_up_temporary_mappings(void) return 0; } +static int relocate_restore_code(void) +{ + pgd_t *pgd; + pud_t *pud; + + relocated_restore_code = get_safe_page(GFP_ATOMIC); + if (!relocated_restore_code) + return -ENOMEM; + + memcpy((void *)relocated_restore_code, &core_restore_code, PAGE_SIZE); + + /* Make the page containing the relocated code executable */ + pgd = (pgd_t *)__va(read_cr3()) + pgd_index(relocated_restore_code); + pud = pud_offset(pgd, relocated_restore_code); + if (pud_large(*pud)) { + set_pud(pud, __pud(pud_val(*pud) & ~_PAGE_NX)); + } else { + pmd_t *pmd = pmd_offset(pud, relocated_restore_code); + + if (pmd_large(*pmd)) { + set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_NX)); + } else { + pte_t *pte = pte_offset_kernel(pmd, relocated_restore_code); + + set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_NX)); + } + } + __flush_tlb_all(); + + return 0; +} + int swsusp_arch_resume(void) { int error; /* We have got enough memory and from now on we cannot recover */ - if ((error = set_up_temporary_mappings())) + error = set_up_temporary_mappings(); + if (error) return error; - relocated_restore_code = (void *)get_safe_page(GFP_ATOMIC); - if (!relocated_restore_code) - return -ENOMEM; - memcpy(relocated_restore_code, &core_restore_code, - &restore_registers - &core_restore_code); + error = relocate_restore_code(); + if (error) + return error; restore_image(); return 0; @@ -109,11 +179,12 @@ int pfn_is_nosave(unsigned long pfn) struct restore_data_record { unsigned long jump_address; + unsigned long jump_address_phys; unsigned long cr3; unsigned long magic; }; -#define RESTORE_MAGIC 0x0123456789ABCDEFUL +#define RESTORE_MAGIC 0x123456789ABCDEF0UL /** * arch_hibernation_header_save - populate the architecture specific part @@ -126,7 +197,8 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size) if (max_size < sizeof(struct restore_data_record)) return -EOVERFLOW; - rdr->jump_address = restore_jump_address; + rdr->jump_address = (unsigned long)&restore_registers; + rdr->jump_address_phys = __pa_symbol(&restore_registers); rdr->cr3 = restore_cr3; rdr->magic = RESTORE_MAGIC; return 0; @@ -142,6 +214,7 @@ int arch_hibernation_header_restore(void *addr) struct restore_data_record *rdr = addr; restore_jump_address = rdr->jump_address; + jump_address_phys = rdr->jump_address_phys; restore_cr3 = rdr->cr3; return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL; } diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S index 4400a43b9e28..3177c2bc26f6 100644 --- a/arch/x86/power/hibernate_asm_64.S +++ b/arch/x86/power/hibernate_asm_64.S @@ -44,9 +44,6 @@ ENTRY(swsusp_arch_suspend) pushfq popq pt_regs_flags(%rax) - /* save the address of restore_registers */ - movq $restore_registers, %rax - movq %rax, restore_jump_address(%rip) /* save cr3 */ movq %cr3, %rax movq %rax, restore_cr3(%rip) @@ -57,31 +54,34 @@ ENTRY(swsusp_arch_suspend) ENDPROC(swsusp_arch_suspend) ENTRY(restore_image) - /* switch to temporary page tables */ - movq $__PAGE_OFFSET, %rdx - movq temp_level4_pgt(%rip), %rax - subq %rdx, %rax - movq %rax, %cr3 - /* Flush TLB */ - movq mmu_cr4_features(%rip), %rax - movq %rax, %rdx - andq $~(X86_CR4_PGE), %rdx - movq %rdx, %cr4; # turn off PGE - movq %cr3, %rcx; # flush TLB - movq %rcx, %cr3; - movq %rax, %cr4; # turn PGE back on - /* prepare to jump to the image kernel */ - movq restore_jump_address(%rip), %rax - movq restore_cr3(%rip), %rbx + movq restore_jump_address(%rip), %r8 + movq restore_cr3(%rip), %r9 + + /* prepare to switch to temporary page tables */ + movq temp_level4_pgt(%rip), %rax + movq mmu_cr4_features(%rip), %rbx /* prepare to copy image data to their original locations */ movq restore_pblist(%rip), %rdx + + /* jump to relocated restore code */ movq relocated_restore_code(%rip), %rcx jmpq *%rcx /* code below has been relocated to a safe page */ ENTRY(core_restore_code) + /* switch to temporary page tables */ + movq $__PAGE_OFFSET, %rcx + subq %rcx, %rax + movq %rax, %cr3 + /* flush TLB */ + movq %rbx, %rcx + andq $~(X86_CR4_PGE), %rcx + movq %rcx, %cr4; # turn off PGE + movq %cr3, %rcx; # flush TLB + movq %rcx, %cr3; + movq %rbx, %cr4; # turn PGE back on .Lloop: testq %rdx, %rdx jz .Ldone @@ -96,24 +96,17 @@ ENTRY(core_restore_code) /* progress to the next pbe */ movq pbe_next(%rdx), %rdx jmp .Lloop + .Ldone: /* jump to the restore_registers address from the image header */ - jmpq *%rax - /* - * NOTE: This assumes that the boot kernel's text mapping covers the - * image kernel's page containing restore_registers and the address of - * this page is the same as in the image kernel's text mapping (it - * should always be true, because the text mapping is linear, starting - * from 0, and is supposed to cover the entire kernel text for every - * kernel). - * - * code below belongs to the image kernel - */ + jmpq *%r8 + /* code below belongs to the image kernel */ + .align PAGE_SIZE ENTRY(restore_registers) FRAME_BEGIN /* go back to the original page tables */ - movq %rbx, %cr3 + movq %r9, %cr3 /* Flush TLB, including "global" things (vmalloc) */ movq mmu_cr4_features(%rip), %rax -- cgit v1.2.3 From 1ead852dd88779eda12cb09cc894a03d9abfe1ec Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 16 Jun 2016 19:13:49 +0200 Subject: x86/amd_nb: Fix boot crash on non-AMD systems Fix boot crash that triggers if this driver is built into a kernel and run on non-AMD systems. AMD northbridges users call amd_cache_northbridges() and it returns a negative value to signal that we weren't able to cache/detect any northbridges on the system. At least, it should do so as all its callers expect it to do so. But it does return a negative value only when kmalloc() fails. Fix it to return -ENODEV if there are no NBs cached as otherwise, amd_nb users like amd64_edac, for example, which relies on it to know whether it should load or not, gets loaded on systems like Intel Xeons where it shouldn't. Reported-and-tested-by: Tony Battersby Signed-off-by: Borislav Petkov Cc: Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1466097230-5333-2-git-send-email-bp@alien8.de Link: https://lkml.kernel.org/r/5761BEB0.9000807@cybernetics.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_nb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index a147e676fc7b..e991d5c8bb3a 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -71,8 +71,8 @@ int amd_cache_northbridges(void) while ((misc = next_northbridge(misc, amd_nb_misc_ids)) != NULL) i++; - if (i == 0) - return 0; + if (!i) + return -ENODEV; nb = kzalloc(i * sizeof(struct amd_northbridge), GFP_KERNEL); if (!nb) -- cgit v1.2.3 From 0519e8b4cb2bda598f941088948129f9fe9e6acd Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 29 Jun 2016 22:01:34 +0300 Subject: x86/platform/intel-mid: Add pinctrl for Intel Merrifield Intel Merrifield uses a special address space reserved for Family-Level Interface Shim (FLIS) that allows consumers to mux and configure pins. Create a platform device for it. Signed-off-by: Andy Shevchenko Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1467226894-107109-1-git-send-email-andriy.shevchenko@linux.intel.com [ Fixed typo. ] Signed-off-by: Ingo Molnar --- arch/x86/platform/intel-mid/device_libs/Makefile | 2 + .../intel-mid/device_libs/platform_mrfld_pinctrl.c | 43 ++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 arch/x86/platform/intel-mid/device_libs/platform_mrfld_pinctrl.c (limited to 'arch') diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile index abe8ba87c970..79e97ed5be5b 100644 --- a/arch/x86/platform/intel-mid/device_libs/Makefile +++ b/arch/x86/platform/intel-mid/device_libs/Makefile @@ -1,3 +1,5 @@ +# Family-Level Interface Shim (FLIS) +obj-$(subst m,y,$(CONFIG_PINCTRL_MERRIFIELD)) += platform_mrfld_pinctrl.o # IPC Devices obj-y += platform_ipc.o obj-$(subst m,y,$(CONFIG_MFD_INTEL_MSIC)) += platform_msic.o diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_pinctrl.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_pinctrl.c new file mode 100644 index 000000000000..4de8a664e6a1 --- /dev/null +++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_pinctrl.c @@ -0,0 +1,43 @@ +/* + * Intel Merrifield FLIS platform device initialization file + * + * Copyright (C) 2016, Intel Corporation + * + * Author: Andy Shevchenko + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include +#include +#include + +#include + +#define FLIS_BASE_ADDR 0xff0c0000 +#define FLIS_LENGTH 0x8000 + +static struct resource mrfld_pinctrl_mmio_resource = { + .start = FLIS_BASE_ADDR, + .end = FLIS_BASE_ADDR + FLIS_LENGTH - 1, + .flags = IORESOURCE_MEM, +}; + +static struct platform_device mrfld_pinctrl_device = { + .name = "pinctrl-merrifield", + .id = PLATFORM_DEVID_NONE, + .resource = &mrfld_pinctrl_mmio_resource, + .num_resources = 1, +}; + +static int __init mrfld_pinctrl_init(void) +{ + if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER) + return platform_device_register(&mrfld_pinctrl_device); + + return -ENODEV; +} +arch_initcall(mrfld_pinctrl_init); -- cgit v1.2.3 From 487cf917ed0d12afaf403d9d77684bf44b8c13be Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Wed, 29 Jun 2016 04:27:36 -0400 Subject: Revert "ACPI, PCI, IRQ: remove redundant code in acpi_irq_penalty_init()" Trying to make the ISA and PCI init functionality common turned out to be a bad idea, because the ISA path depends on external functionality. Restore the previous behavior and limit the refactoring to PCI interrupts only. Fixes: 1fcb6a813c4f "ACPI,PCI,IRQ: remove redundant code in acpi_irq_penalty_init()" Signed-off-by: Sinan Kaya Tested-by: Wim Osterholt Signed-off-by: Rafael J. Wysocki --- arch/x86/pci/acpi.c | 1 + drivers/acpi/pci_link.c | 36 ++++++++++++++++++++++++++++++++++++ include/acpi/acpi_drivers.h | 1 + 3 files changed, 38 insertions(+) (limited to 'arch') diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index b2a4e2a61f6b..3cd69832d7f4 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -396,6 +396,7 @@ int __init pci_acpi_init(void) return -ENODEV; printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n"); + acpi_irq_penalty_init(); pcibios_enable_irq = acpi_pci_irq_enable; pcibios_disable_irq = acpi_pci_irq_disable; x86_init.pci.init_irq = x86_init_noop; diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c index db7be62a8222..606083bb3f00 100644 --- a/drivers/acpi/pci_link.c +++ b/drivers/acpi/pci_link.c @@ -517,6 +517,42 @@ static int acpi_irq_get_penalty(int irq) return penalty; } +int __init acpi_irq_penalty_init(void) +{ + struct acpi_pci_link *link; + int i; + + /* + * Update penalties to facilitate IRQ balancing. + */ + list_for_each_entry(link, &acpi_link_list, list) { + + /* + * reflect the possible and active irqs in the penalty table -- + * useful for breaking ties. + */ + if (link->irq.possible_count) { + int penalty = + PIRQ_PENALTY_PCI_POSSIBLE / + link->irq.possible_count; + + for (i = 0; i < link->irq.possible_count; i++) { + if (link->irq.possible[i] < ACPI_MAX_ISA_IRQS) + acpi_isa_irq_penalty[link->irq. + possible[i]] += + penalty; + } + + } else if (link->irq.active && + (link->irq.active < ACPI_MAX_ISA_IRQS)) { + acpi_isa_irq_penalty[link->irq.active] += + PIRQ_PENALTY_PCI_POSSIBLE; + } + } + + return 0; +} + static int acpi_irq_balance = -1; /* 0: static, 1: balance */ static int acpi_pci_link_allocate(struct acpi_pci_link *link) diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h index 797ae2ec8eee..29c691265b49 100644 --- a/include/acpi/acpi_drivers.h +++ b/include/acpi/acpi_drivers.h @@ -78,6 +78,7 @@ /* ACPI PCI Interrupt Link (pci_link.c) */ +int acpi_irq_penalty_init(void); int acpi_pci_link_allocate_irq(acpi_handle handle, int index, int *triggering, int *polarity, char **name); int acpi_pci_link_free_irq(acpi_handle handle); -- cgit v1.2.3 From 9010ae4a8dee29e5886e86682799dde0eee7f447 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Fri, 1 Jul 2016 15:22:22 -0700 Subject: perf/x86/intel: Update event constraints when HT is off This patch updates the event constraints for non-PEBS mode for Intel Broadwell and Skylake processors. When HT is off, each CPU gets 8 generic counters. However, not all events can be programmed on any of the 8 counters. This patch adds the constraints for the MEM_* events which can only be measured on the bottom 4 counters. The constraints are also valid when HT is off because, then, there are only 4 generic counters and they are the bottom counters. Signed-off-by: Stephane Eranian Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Cc: kan.liang@intel.com Link: http://lkml.kernel.org/r/1467411742-13245-1-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'arch') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 7c666958a625..9b4f9d3ce465 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -115,6 +115,10 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly = INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */ INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ + /* + * When HT is off these events can only run on the bottom 4 counters + * When HT is on, they are impacted by the HT bug and require EXCL access + */ INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ @@ -139,6 +143,10 @@ static struct event_constraint intel_ivb_event_constraints[] __read_mostly = INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ + /* + * When HT is off these events can only run on the bottom 4 counters + * When HT is on, they are impacted by the HT bug and require EXCL access + */ INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ @@ -182,6 +190,16 @@ struct event_constraint intel_skl_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ INTEL_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */ + + /* + * when HT is off, these can only run on the bottom 4 counters + */ + INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_INST_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_L3_HIT_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xcd, 0xf), /* MEM_TRANS_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xc6, 0xf), /* FRONTEND_RETIRED.* */ + EVENT_CONSTRAINT_END }; @@ -250,6 +268,10 @@ static struct event_constraint intel_hsw_event_constraints[] = { /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */ INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), + /* + * When HT is off these events can only run on the bottom 4 counters + * When HT is on, they are impacted by the HT bug and require EXCL access + */ INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ @@ -264,6 +286,13 @@ struct event_constraint intel_bdw_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */ INTEL_UBIT_EVENT_CONSTRAINT(0x8a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */ + /* + * when HT is off, these can only run on the bottom 4 counters + */ + INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_INST_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_L3_HIT_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xcd, 0xf), /* MEM_TRANS_RETIRED.* */ EVENT_CONSTRAINT_END }; -- cgit v1.2.3 From fc18822510721fe694d273c5211c71ea52796d76 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 1 Jul 2016 23:02:05 -0500 Subject: perf/x86: Fix 32-bit perf user callgraph collection A basic perf callgraph record operation causes an immediate panic on a 32-bit kernel compiled with CONFIG_CC_STACKPROTECTOR=y: $ perf record -g ls Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: c0404fbd CPU: 0 PID: 998 Comm: ls Not tainted 4.7.0-rc5+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.1-1.fc24 04/01/2014 c0dd5967 ff7afe1c 00000086 f41dbc2c c07445a0 464c457f f41dbca8 f41dbc44 c05646f4 f41dbca8 464c457f f41dbca8 464c457f f41dbc54 c04625be c0ce56fc c0404fbd f41dbc88 c0404fbd b74668f0 f41dc000 00000000 c0000000 00000000 Call Trace: [] dump_stack+0x58/0x78 [] panic+0x8e/0x1c6 [] __stack_chk_fail+0x1e/0x30 [] ? perf_callchain_user+0x22d/0x230 [] perf_callchain_user+0x22d/0x230 [] get_perf_callchain+0x1ff/0x270 [] perf_callchain+0x78/0x90 [] perf_prepare_sample+0x24b/0x370 [] perf_event_output_forward+0x24/0x70 [] __perf_event_overflow+0xa0/0x210 [] ? cpu_clock_event_read+0x43/0x50 [] perf_swevent_hrtimer+0x101/0x180 [] ? kmap_atomic_prot+0x35/0x140 [] ? get_page_from_freelist+0x279/0x950 [] ? vma_interval_tree_remove+0x158/0x230 [] ? wp_page_copy.isra.82+0x2f4/0x630 [] ? page_add_file_rmap+0x1d/0x50 [] ? unlock_page+0x61/0x80 [] ? filemap_map_pages+0x305/0x320 [] ? handle_mm_fault+0xb7f/0x1560 [] ? timerqueue_del+0x1b/0x70 [] ? __remove_hrtimer+0x2e/0x60 [] __hrtimer_run_queues+0xcb/0x2a0 [] ? __perf_event_overflow+0x210/0x210 [] hrtimer_interrupt+0x8a/0x180 [] local_apic_timer_interrupt+0x32/0x60 [] smp_apic_timer_interrupt+0x33/0x50 [] apic_timer_interrupt+0x34/0x3c Kernel Offset: disabled ---[ end Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: c0404fbd The panic is caused by the fact that perf_callchain_user() mistakenly assumes it's 64-bit only and ends up corrupting the stack. Signed-off-by: Josh Poimboeuf Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: stable@vger.kernel.org # v4.5+ Fixes: 75925e1ad7f5 ("perf/x86: Optimize stack walk user accesses") Link: http://lkml.kernel.org/r/1a547f5077ec30f75f9b57074837c3c80df86e5e.1467432113.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 33787ee817f0..26ced536005a 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2319,7 +2319,7 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { struct stack_frame frame; - const void __user *fp; + const unsigned long __user *fp; if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { /* TODO: We don't support guest os callchain now */ @@ -2332,7 +2332,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs if (regs->flags & (X86_VM_MASK | PERF_EFLAGS_VM)) return; - fp = (void __user *)regs->bp; + fp = (unsigned long __user *)regs->bp; perf_callchain_store(entry, regs->ip); @@ -2345,16 +2345,17 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs pagefault_disable(); while (entry->nr < entry->max_stack) { unsigned long bytes; + frame.next_frame = NULL; frame.return_address = 0; - if (!access_ok(VERIFY_READ, fp, 16)) + if (!access_ok(VERIFY_READ, fp, sizeof(*fp) * 2)) break; - bytes = __copy_from_user_nmi(&frame.next_frame, fp, 8); + bytes = __copy_from_user_nmi(&frame.next_frame, fp, sizeof(*fp)); if (bytes != 0) break; - bytes = __copy_from_user_nmi(&frame.return_address, fp+8, 8); + bytes = __copy_from_user_nmi(&frame.return_address, fp + 1, sizeof(*fp)); if (bytes != 0) break; -- cgit v1.2.3 From 86a8280a7fe007d61b05fa8a352edc0595283dad Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Sat, 21 May 2016 13:57:20 +0200 Subject: m68k: Assorted spelling fixes - s/acccess/access/ - s/accoding/according/ - s/addad/added/ - s/addreess/address/ - s/allocatiom/allocation/ - s/Assember/Assembler/ - s/compactnes/compactness/ - s/conneced/connected/ - s/decending/descending/ - s/diectly/directly/ - s/diplacement/displacement/ Signed-off-by: Andrea Gelmini [geert: Squashed, fix arch/m68k/ifpsp060/src/pfpsp.S] Signed-off-by: Geert Uytterhoeven --- arch/m68k/coldfire/head.S | 2 +- arch/m68k/coldfire/m5272.c | 2 +- arch/m68k/coldfire/pci.c | 2 +- arch/m68k/ifpsp060/src/fpsp.S | 8 ++++---- arch/m68k/ifpsp060/src/pfpsp.S | 4 ++-- arch/m68k/include/asm/dma.h | 2 +- arch/m68k/include/asm/m525xsim.h | 4 ++-- arch/m68k/include/asm/mcfmmu.h | 2 +- arch/m68k/include/asm/q40_master.h | 2 +- arch/m68k/mac/iop.c | 2 +- arch/m68k/math-emu/fp_decode.h | 2 +- 11 files changed, 16 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/m68k/coldfire/head.S b/arch/m68k/coldfire/head.S index fa31be297b85..73d92ea0ce65 100644 --- a/arch/m68k/coldfire/head.S +++ b/arch/m68k/coldfire/head.S @@ -288,7 +288,7 @@ _clear_bss: #endif /* - * Assember start up done, start code proper. + * Assembler start up done, start code proper. */ jsr start_kernel /* start Linux kernel */ diff --git a/arch/m68k/coldfire/m5272.c b/arch/m68k/coldfire/m5272.c index c525e4c08f84..9abb1a441da0 100644 --- a/arch/m68k/coldfire/m5272.c +++ b/arch/m68k/coldfire/m5272.c @@ -111,7 +111,7 @@ void __init config_BSP(char *commandp, int size) /***************************************************************************/ /* - * Some 5272 based boards have the FEC ethernet diectly connected to + * Some 5272 based boards have the FEC ethernet directly connected to * an ethernet switch. In this case we need to use the fixed phy type, * and we need to declare it early in boot. */ diff --git a/arch/m68k/coldfire/pci.c b/arch/m68k/coldfire/pci.c index 821de928dc3f..6a640be48568 100644 --- a/arch/m68k/coldfire/pci.c +++ b/arch/m68k/coldfire/pci.c @@ -42,7 +42,7 @@ static unsigned long iospace; /* * We need to be carefull probing on bus 0 (directly connected to host - * bridge). We should only acccess the well defined possible devices in + * bridge). We should only access the well defined possible devices in * use, ignore aliases and the like. */ static unsigned char mcf_host_slot2sid[32] = { diff --git a/arch/m68k/ifpsp060/src/fpsp.S b/arch/m68k/ifpsp060/src/fpsp.S index 78cb60f5bb4d..9bbffebe3eb5 100644 --- a/arch/m68k/ifpsp060/src/fpsp.S +++ b/arch/m68k/ifpsp060/src/fpsp.S @@ -10191,7 +10191,7 @@ xdnrm_con: xdnrm_sd: mov.l %a1,-(%sp) tst.b LOCAL_EX(%a0) # is denorm pos or neg? - smi.b %d1 # set d0 accodingly + smi.b %d1 # set d0 accordingly bsr.l unf_sub mov.l (%sp)+,%a1 xdnrm_exit: @@ -10990,7 +10990,7 @@ src_qnan_m: # routines where an instruction is selected by an index into # a large jump table corresponding to a given instruction which # has been decoded. Flow continues here where we now decode -# further accoding to the source operand type. +# further according to the source operand type. # global fsinh @@ -23196,14 +23196,14 @@ m_sign: # # 1. Branch on the sign of the adjusted exponent. # 2p.(positive exp) -# 2. Check M16 and the digits in lwords 2 and 3 in decending order. +# 2. Check M16 and the digits in lwords 2 and 3 in descending order. # 3. Add one for each zero encountered until a non-zero digit. # 4. Subtract the count from the exp. # 5. Check if the exp has crossed zero in #3 above; make the exp abs # and set SE. # 6. Multiply the mantissa by 10**count. # 2n.(negative exp) -# 2. Check the digits in lwords 3 and 2 in decending order. +# 2. Check the digits in lwords 3 and 2 in descending order. # 3. Add one for each zero encountered until a non-zero digit. # 4. Add the count to the exp. # 5. Check if the exp has crossed zero in #3 above; clear SE. diff --git a/arch/m68k/ifpsp060/src/pfpsp.S b/arch/m68k/ifpsp060/src/pfpsp.S index 4aedef973cf6..3535e6c87eec 100644 --- a/arch/m68k/ifpsp060/src/pfpsp.S +++ b/arch/m68k/ifpsp060/src/pfpsp.S @@ -13156,14 +13156,14 @@ m_sign: # # 1. Branch on the sign of the adjusted exponent. # 2p.(positive exp) -# 2. Check M16 and the digits in lwords 2 and 3 in decending order. +# 2. Check M16 and the digits in lwords 2 and 3 in descending order. # 3. Add one for each zero encountered until a non-zero digit. # 4. Subtract the count from the exp. # 5. Check if the exp has crossed zero in #3 above; make the exp abs # and set SE. # 6. Multiply the mantissa by 10**count. # 2n.(negative exp) -# 2. Check the digits in lwords 3 and 2 in decending order. +# 2. Check the digits in lwords 3 and 2 in descending order. # 3. Add one for each zero encountered until a non-zero digit. # 4. Add the count to the exp. # 5. Check if the exp has crossed zero in #3 above; clear SE. diff --git a/arch/m68k/include/asm/dma.h b/arch/m68k/include/asm/dma.h index 429fe26e320c..208b4daa14b3 100644 --- a/arch/m68k/include/asm/dma.h +++ b/arch/m68k/include/asm/dma.h @@ -18,7 +18,7 @@ * AUG/22/2000 : added support for 32-bit Dual-Address-Mode (K) 2000 * Oliver Kamphenkel (O.Kamphenkel@tu-bs.de) * - * AUG/25/2000 : addad support for 8, 16 and 32-bit Single-Address-Mode (K)2000 + * AUG/25/2000 : added support for 8, 16 and 32-bit Single-Address-Mode (K)2000 * Oliver Kamphenkel (O.Kamphenkel@tu-bs.de) * * APR/18/2002 : added proper support for MCF5272 DMA controller. diff --git a/arch/m68k/include/asm/m525xsim.h b/arch/m68k/include/asm/m525xsim.h index f186459072e9..699f20c8a0fe 100644 --- a/arch/m68k/include/asm/m525xsim.h +++ b/arch/m68k/include/asm/m525xsim.h @@ -123,10 +123,10 @@ /* * I2C module. */ -#define MCFI2C_BASE0 (MCF_MBAR + 0x280) /* Base addreess I2C0 */ +#define MCFI2C_BASE0 (MCF_MBAR + 0x280) /* Base address I2C0 */ #define MCFI2C_SIZE0 0x20 /* Register set size */ -#define MCFI2C_BASE1 (MCF_MBAR2 + 0x440) /* Base addreess I2C1 */ +#define MCFI2C_BASE1 (MCF_MBAR2 + 0x440) /* Base address I2C1 */ #define MCFI2C_SIZE1 0x20 /* Register set size */ /* diff --git a/arch/m68k/include/asm/mcfmmu.h b/arch/m68k/include/asm/mcfmmu.h index 26cc3d5a63f8..8824236e303f 100644 --- a/arch/m68k/include/asm/mcfmmu.h +++ b/arch/m68k/include/asm/mcfmmu.h @@ -38,7 +38,7 @@ /* * MMU Operation register. */ -#define MMUOR_UAA 0x00000001 /* Update allocatiom address */ +#define MMUOR_UAA 0x00000001 /* Update allocation address */ #define MMUOR_ACC 0x00000002 /* TLB access */ #define MMUOR_RD 0x00000004 /* TLB access read */ #define MMUOR_WR 0x00000000 /* TLB access write */ diff --git a/arch/m68k/include/asm/q40_master.h b/arch/m68k/include/asm/q40_master.h index fc5b36278d04..c48d21b68f04 100644 --- a/arch/m68k/include/asm/q40_master.h +++ b/arch/m68k/include/asm/q40_master.h @@ -1,6 +1,6 @@ /* * Q40 master Chip Control - * RTC stuff merged for compactnes.. + * RTC stuff merged for compactness. */ #ifndef _Q40_MASTER_H diff --git a/arch/m68k/mac/iop.c b/arch/m68k/mac/iop.c index 4d2adfb32a2a..7990b6f50105 100644 --- a/arch/m68k/mac/iop.c +++ b/arch/m68k/mac/iop.c @@ -60,7 +60,7 @@ * * The host talks to the IOPs using a rather simple message-passing scheme via * a shared memory area in the IOP RAM. Each IOP has seven "channels"; each - * channel is conneced to a specific software driver on the IOP. For example + * channel is connected to a specific software driver on the IOP. For example * on the SCC IOP there is one channel for each serial port. Each channel has * an incoming and and outgoing message queue with a depth of one. * diff --git a/arch/m68k/math-emu/fp_decode.h b/arch/m68k/math-emu/fp_decode.h index 759679d9ab96..6d1e760e2a0e 100644 --- a/arch/m68k/math-emu/fp_decode.h +++ b/arch/m68k/math-emu/fp_decode.h @@ -130,7 +130,7 @@ do_fscc=0 bfextu %d2{#13,#3},%d0 .endm -| decode the 8bit diplacement from the brief extension word +| decode the 8bit displacement from the brief extension word .macro fp_decode_disp8 move.b %d2,%d0 ext.w %d0 -- cgit v1.2.3 From 06ee6d571f0e350253a8fc3492316b2be007fae2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jul 2016 17:39:24 +0900 Subject: genirq: Add affinity hint to irq allocation Add an extra argument to the irq(domain) allocation functions, so we can hand down affinity hints to the allocator. Thats necessary to implement proper support for multiqueue devices. Signed-off-by: Thomas Gleixner Cc: Christoph Hellwig Cc: linux-block@vger.kernel.org Cc: linux-pci@vger.kernel.org Cc: linux-nvme@lists.infradead.org Cc: axboe@fb.com Cc: agordeev@redhat.com Link: http://lkml.kernel.org/r/1467621574-8277-4-git-send-email-hch@lst.de Signed-off-by: Thomas Gleixner --- arch/sparc/kernel/irq_64.c | 2 +- arch/x86/kernel/apic/io_apic.c | 5 +++-- include/linux/irq.h | 4 ++-- include/linux/irqdomain.h | 9 ++++++--- kernel/irq/ipi.c | 2 +- kernel/irq/irqdesc.c | 12 ++++++++---- kernel/irq/irqdomain.c | 22 ++++++++++++++-------- kernel/irq/manage.c | 7 ++++--- kernel/irq/msi.c | 3 ++- 9 files changed, 41 insertions(+), 25 deletions(-) (limited to 'arch') diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index e22416ce56ea..34a7930b76ef 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -242,7 +242,7 @@ unsigned int irq_alloc(unsigned int dev_handle, unsigned int dev_ino) { int irq; - irq = __irq_alloc_descs(-1, 1, 1, numa_node_id(), NULL); + irq = __irq_alloc_descs(-1, 1, 1, numa_node_id(), NULL, NULL); if (irq <= 0) goto out; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 446702ed99dc..7c4f90dd4c2a 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -981,7 +981,7 @@ static int alloc_irq_from_domain(struct irq_domain *domain, int ioapic, u32 gsi, return __irq_domain_alloc_irqs(domain, irq, 1, ioapic_alloc_attr_node(info), - info, legacy); + info, legacy, NULL); } /* @@ -1014,7 +1014,8 @@ static int alloc_isa_irq_from_domain(struct irq_domain *domain, info->ioapic_pin)) return -ENOMEM; } else { - irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true); + irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true, + NULL); if (irq >= 0) { irq_data = irq_domain_get_irq_data(domain, irq); data = irq_data->chip_data; diff --git a/include/linux/irq.h b/include/linux/irq.h index f6074813688d..39ce46ac5c18 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -708,11 +708,11 @@ static inline struct cpumask *irq_data_get_affinity_mask(struct irq_data *d) unsigned int arch_dynirq_lower_bound(unsigned int from); int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, - struct module *owner); + struct module *owner, const struct cpumask *affinity); /* use macros to avoid needing export.h for THIS_MODULE */ #define irq_alloc_descs(irq, from, cnt, node) \ - __irq_alloc_descs(irq, from, cnt, node, THIS_MODULE) + __irq_alloc_descs(irq, from, cnt, node, THIS_MODULE, NULL) #define irq_alloc_desc(node) \ irq_alloc_descs(-1, 0, 1, node) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index f1f36e04d885..1aee0fbe900e 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -39,6 +39,7 @@ struct irq_domain; struct of_device_id; struct irq_chip; struct irq_data; +struct cpumask; /* Number of irqs reserved for a legacy isa controller */ #define NUM_ISA_INTERRUPTS 16 @@ -217,7 +218,8 @@ extern struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec, enum irq_domain_bus_token bus_token); extern void irq_set_default_host(struct irq_domain *host); extern int irq_domain_alloc_descs(int virq, unsigned int nr_irqs, - irq_hw_number_t hwirq, int node); + irq_hw_number_t hwirq, int node, + const struct cpumask *affinity); static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node) { @@ -389,7 +391,7 @@ static inline struct irq_domain *irq_domain_add_hierarchy(struct irq_domain *par extern int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, unsigned int nr_irqs, int node, void *arg, - bool realloc); + bool realloc, const struct cpumask *affinity); extern void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs); extern void irq_domain_activate_irq(struct irq_data *irq_data); extern void irq_domain_deactivate_irq(struct irq_data *irq_data); @@ -397,7 +399,8 @@ extern void irq_domain_deactivate_irq(struct irq_data *irq_data); static inline int irq_domain_alloc_irqs(struct irq_domain *domain, unsigned int nr_irqs, int node, void *arg) { - return __irq_domain_alloc_irqs(domain, -1, nr_irqs, node, arg, false); + return __irq_domain_alloc_irqs(domain, -1, nr_irqs, node, arg, false, + NULL); } extern int irq_domain_alloc_irqs_recursive(struct irq_domain *domain, diff --git a/kernel/irq/ipi.c b/kernel/irq/ipi.c index 89b49f6773f0..4fd23510d5f2 100644 --- a/kernel/irq/ipi.c +++ b/kernel/irq/ipi.c @@ -76,7 +76,7 @@ int irq_reserve_ipi(struct irq_domain *domain, } } - virq = irq_domain_alloc_descs(-1, nr_irqs, 0, NUMA_NO_NODE); + virq = irq_domain_alloc_descs(-1, nr_irqs, 0, NUMA_NO_NODE, NULL); if (virq <= 0) { pr_warn("Can't reserve IPI, failed to alloc descs\n"); return -ENOMEM; diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 8731e1c5d1e7..b8df4fcdbb5f 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -223,7 +223,7 @@ static void free_desc(unsigned int irq) } static int alloc_descs(unsigned int start, unsigned int cnt, int node, - struct module *owner) + const struct cpumask *affinity, struct module *owner) { struct irq_desc *desc; int i; @@ -333,6 +333,7 @@ static void free_desc(unsigned int irq) } static inline int alloc_descs(unsigned int start, unsigned int cnt, int node, + const struct cpumask *affinity, struct module *owner) { u32 i; @@ -453,12 +454,15 @@ EXPORT_SYMBOL_GPL(irq_free_descs); * @cnt: Number of consecutive irqs to allocate. * @node: Preferred node on which the irq descriptor should be allocated * @owner: Owning module (can be NULL) + * @affinity: Optional pointer to an affinity mask which hints where the + * irq descriptors should be allocated and which default + * affinities to use * * Returns the first irq number or error code */ int __ref __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, - struct module *owner) + struct module *owner, const struct cpumask *affinity) { int start, ret; @@ -494,7 +498,7 @@ __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, bitmap_set(allocated_irqs, start, cnt); mutex_unlock(&sparse_irq_lock); - return alloc_descs(start, cnt, node, owner); + return alloc_descs(start, cnt, node, affinity, owner); err: mutex_unlock(&sparse_irq_lock); @@ -512,7 +516,7 @@ EXPORT_SYMBOL_GPL(__irq_alloc_descs); */ unsigned int irq_alloc_hwirqs(int cnt, int node) { - int i, irq = __irq_alloc_descs(-1, 0, cnt, node, NULL); + int i, irq = __irq_alloc_descs(-1, 0, cnt, node, NULL, NULL); if (irq < 0) return 0; diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 8798b6c9e945..79459b732dc9 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -481,7 +481,7 @@ unsigned int irq_create_mapping(struct irq_domain *domain, } /* Allocate a virtual interrupt number */ - virq = irq_domain_alloc_descs(-1, 1, hwirq, of_node_to_nid(of_node)); + virq = irq_domain_alloc_descs(-1, 1, hwirq, of_node_to_nid(of_node), NULL); if (virq <= 0) { pr_debug("-> virq allocation failed\n"); return 0; @@ -835,19 +835,23 @@ const struct irq_domain_ops irq_domain_simple_ops = { EXPORT_SYMBOL_GPL(irq_domain_simple_ops); int irq_domain_alloc_descs(int virq, unsigned int cnt, irq_hw_number_t hwirq, - int node) + int node, const struct cpumask *affinity) { unsigned int hint; if (virq >= 0) { - virq = irq_alloc_descs(virq, virq, cnt, node); + virq = __irq_alloc_descs(virq, virq, cnt, node, THIS_MODULE, + affinity); } else { hint = hwirq % nr_irqs; if (hint == 0) hint++; - virq = irq_alloc_descs_from(hint, cnt, node); - if (virq <= 0 && hint > 1) - virq = irq_alloc_descs_from(1, cnt, node); + virq = __irq_alloc_descs(-1, hint, cnt, node, THIS_MODULE, + affinity); + if (virq <= 0 && hint > 1) { + virq = __irq_alloc_descs(-1, 1, cnt, node, THIS_MODULE, + affinity); + } } return virq; @@ -1160,6 +1164,7 @@ int irq_domain_alloc_irqs_recursive(struct irq_domain *domain, * @node: NUMA node id for memory allocation * @arg: domain specific argument * @realloc: IRQ descriptors have already been allocated if true + * @affinity: Optional irq affinity mask for multiqueue devices * * Allocate IRQ numbers and initialized all data structures to support * hierarchy IRQ domains. @@ -1175,7 +1180,7 @@ int irq_domain_alloc_irqs_recursive(struct irq_domain *domain, */ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, unsigned int nr_irqs, int node, void *arg, - bool realloc) + bool realloc, const struct cpumask *affinity) { int i, ret, virq; @@ -1193,7 +1198,8 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, if (realloc && irq_base >= 0) { virq = irq_base; } else { - virq = irq_domain_alloc_descs(irq_base, nr_irqs, 0, node); + virq = irq_domain_alloc_descs(irq_base, nr_irqs, 0, node, + affinity); if (virq < 0) { pr_debug("cannot allocate IRQ(base %d, count %d)\n", irq_base, nr_irqs); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 30658e9827f0..ad0aac6d1248 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -353,10 +353,11 @@ static int setup_affinity(struct irq_desc *desc, struct cpumask *mask) return 0; /* - * Preserve an userspace affinity setup, but make sure that - * one of the targets is online. + * Preserve the managed affinity setting and an userspace affinity + * setup, but make sure that one of the targets is online. */ - if (irqd_has_set(&desc->irq_data, IRQD_AFFINITY_SET)) { + if (irqd_affinity_is_managed(&desc->irq_data) || + irqd_has_set(&desc->irq_data, IRQD_AFFINITY_SET)) { if (cpumask_intersects(desc->irq_common_data.affinity, cpu_online_mask)) set = desc->irq_common_data.affinity; diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index eb5bf2b50b07..58dbbacc6fbb 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -334,7 +334,8 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, ops->set_desc(&arg, desc); virq = __irq_domain_alloc_irqs(domain, -1, desc->nvec_used, - dev_to_node(dev), &arg, false); + dev_to_node(dev), &arg, false, + NULL); if (virq < 0) { ret = -ENOSPC; if (ops->handle_error) -- cgit v1.2.3 From 175a20c16fdb7700fcac63f1eeb2caa7e1dddd2d Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 23 Jun 2016 18:06:49 +0300 Subject: x86/perf/intel/rapl: Fix module name collision with powercap intel-rapl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 4b6e2571bf00 the rapl perf module calls itself intel-rapl. That name was already in use by the rapl powercap driver, which now fails to load if the perf module is loaded. Fix the problem by renaming the perf module to intel-rapl-perf, so that both modules can coexist. Fixes: 4b6e2571bf00 ("x86/perf/intel/rapl: Make the Intel RAPL PMU driver modular") Signed-off-by: Ville Syrjälä Cc: Vince Weaver Cc: Alexander Shishkin Cc: Kan Liang Cc: Stephane Eranian Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1466694409-3620-1-git-send-email-ville.syrjala@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/events/intel/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/Makefile b/arch/x86/events/intel/Makefile index 3660b2cf245a..06c2baa51814 100644 --- a/arch/x86/events/intel/Makefile +++ b/arch/x86/events/intel/Makefile @@ -1,8 +1,8 @@ obj-$(CONFIG_CPU_SUP_INTEL) += core.o bts.o cqm.o obj-$(CONFIG_CPU_SUP_INTEL) += ds.o knc.o obj-$(CONFIG_CPU_SUP_INTEL) += lbr.o p4.o p6.o pt.o -obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += intel-rapl.o -intel-rapl-objs := rapl.o +obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += intel-rapl-perf.o +intel-rapl-perf-objs := rapl.o obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel-uncore.o intel-uncore-objs := uncore.o uncore_nhmex.o uncore_snb.o uncore_snbep.o obj-$(CONFIG_PERF_EVENTS_INTEL_CSTATE) += intel-cstate.o -- cgit v1.2.3 From 88d02a2ba6c52350f9a73ff1b01a5be839c3ca17 Mon Sep 17 00:00:00 2001 From: David Daney Date: Thu, 16 Jun 2016 15:50:31 -0700 Subject: MIPS: Fix page table corruption on THP permission changes. When the core THP code is modifying the permissions of a huge page it calls pmd_modify(), which unfortunately was clearing the _PAGE_HUGE bit of the page table entry. The result can be kernel messages like: mm/memory.c:397: bad pmd 000000040080004d. mm/memory.c:397: bad pmd 00000003ff00004d. mm/memory.c:397: bad pmd 000000040100004d. or: ------------[ cut here ]------------ WARNING: at mm/mmap.c:3200 exit_mmap+0x150/0x158() Modules linked in: ipv6 at24 octeon3_ethernet octeon_srio_nexus m25p80 CPU: 12 PID: 1295 Comm: pmderr Not tainted 3.10.87-rt80-Cavium-Octeon #4 Stack : 0000000040808000 0000000014009ce1 0000000000400004 ffffffff81076ba0 0000000000000000 0000000000000000 ffffffff85110000 0000000000000119 0000000000000004 0000000000000000 0000000000000119 43617669756d2d4f 0000000000000000 ffffffff850fda40 ffffffff85110000 0000000000000000 0000000000000000 0000000000000009 ffffffff809207a0 0000000000000c80 ffffffff80f1bf20 0000000000000001 000000ffeca36828 0000000000000001 0000000000000000 0000000000000001 000000ffeca7e700 ffffffff80886924 80000003fd7a0000 80000003fd7a39b0 80000003fdea8000 ffffffff80885780 80000003fdea8000 ffffffff80f12218 000000000000000c 000000000000050f 0000000000000000 ffffffff80865c4c 0000000000000000 0000000000000000 ... Call Trace: [] show_stack+0x6c/0xf8 [] warn_slowpath_common+0x78/0xa8 [] exit_mmap+0x150/0x158 [] mmput+0x5c/0x110 [] do_exit+0x230/0xa68 [] do_group_exit+0x54/0x1d0 [] __wake_up_parent+0x0/0x18 ---[ end trace c7b38293191c57dc ]--- BUG: Bad rss-counter state mm:80000003fa168000 idx:1 val:1536 Fix by not clearing _PAGE_HUGE bit. Signed-off-by: David Daney Tested-by: Aaro Koskinen Cc: stable@vger.kernel.org Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/13687/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index f53816744d60..7d44e888134f 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -633,7 +633,7 @@ static inline struct page *pmd_page(pmd_t pmd) static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) { - pmd_val(pmd) = (pmd_val(pmd) & _PAGE_CHG_MASK) | + pmd_val(pmd) = (pmd_val(pmd) & (_PAGE_CHG_MASK | _PAGE_HUGE)) | (pgprot_val(newprot) & ~_PAGE_CHG_MASK); return pmd; } -- cgit v1.2.3 From 5eb495349f5ec3b134f7341a2450392fc86d99d0 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Thu, 30 Jun 2016 17:32:08 +0200 Subject: ARM: tegra: beaver: Allow SD card voltage to be changed This allows to switch the card signal voltage level to 1.8 V, which is needed for any ultra high speed modes to work. Signed-off-by: Lucas Stach Acked-by: Jon Hunter Signed-off-by: Thierry Reding Signed-off-by: Olof Johansson --- arch/arm/boot/dts/tegra30-beaver.dts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/tegra30-beaver.dts b/arch/arm/boot/dts/tegra30-beaver.dts index 1eca3b28ac64..b6da15d823a6 100644 --- a/arch/arm/boot/dts/tegra30-beaver.dts +++ b/arch/arm/boot/dts/tegra30-beaver.dts @@ -1843,7 +1843,7 @@ ldo5_reg: ldo5 { regulator-name = "vddio_sdmmc,avdd_vdac"; - regulator-min-microvolt = <3300000>; + regulator-min-microvolt = <1800000>; regulator-max-microvolt = <3300000>; regulator-always-on; }; @@ -1914,6 +1914,7 @@ sdhci@78000000 { status = "okay"; + vqmmc-supply = <&ldo5_reg>; cd-gpios = <&gpio TEGRA_GPIO(I, 5) GPIO_ACTIVE_LOW>; wp-gpios = <&gpio TEGRA_GPIO(T, 3) GPIO_ACTIVE_HIGH>; power-gpios = <&gpio TEGRA_GPIO(D, 7) GPIO_ACTIVE_HIGH>; -- cgit v1.2.3 From aefbc4d04c7b09cb6775a32cea7986c62e489ee2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 30 Jun 2016 11:49:08 +0200 Subject: perf/x86/intel: Fix rdlbr_to() MSR reading typo It helps to actually read the right MSR.. Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: linux-kernel@vger.kernel.org Fixes: d4cf1949f968 ("perf/x86/intel: Add {rd,wr}lbr_{to,from} wrappers") Signed-off-by: Ingo Molnar --- arch/x86/events/intel/lbr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index cc4555a9e876..707d358e0dff 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -323,7 +323,7 @@ static inline u64 rdlbr_to(unsigned int idx) { u64 val; - rdmsrl(x86_pmu.lbr_from + idx, val); + rdmsrl(x86_pmu.lbr_to + idx, val); return val; } -- cgit v1.2.3 From 46866b59dfbe9bf99bb1323ce1f3fd2073a81aa3 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 29 Jun 2016 07:01:51 -0700 Subject: perf/x86/intel/uncore: Add support for the Intel Skylake client uncore PMU This patch adds full support for Intel SKL client uncore PMU: - Add support for SKL client CPU uncore PMU, which is similar to the BDW client PMU driver. (There are some differences in CBOX numbering and uncore control MSR.) - Add new support for SkyLake Mobile uncore PMUs, for both CPU and PCI uncore functionality. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1467208912-8179-1-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore.c | 2 ++ arch/x86/events/intel/uncore.h | 1 + arch/x86/events/intel/uncore_snb.c | 67 +++++++++++++++++++++++++++++++++++++- 3 files changed, 69 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index dc965d2cf076..59b4974c697f 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1379,6 +1379,7 @@ static const struct intel_uncore_init_fun knl_uncore_init __initconst = { }; static const struct intel_uncore_init_fun skl_uncore_init __initconst = { + .cpu_init = skl_uncore_cpu_init, .pci_init = skl_uncore_pci_init, }; @@ -1403,6 +1404,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, bdx_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP,skl_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_uncore_init), {}, }; diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index 66c3a3657a10..d6063e438158 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -364,6 +364,7 @@ int bdw_uncore_pci_init(void); int skl_uncore_pci_init(void); void snb_uncore_cpu_init(void); void nhm_uncore_cpu_init(void); +void skl_uncore_cpu_init(void); int snb_pci2phy_map_init(int devid); /* perf_event_intel_uncore_snbep.c */ diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 96531d2b843f..97a69dbba649 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -1,4 +1,4 @@ -/* Nehalem/SandBridge/Haswell uncore support */ +/* Nehalem/SandBridge/Haswell/Broadwell/Skylake uncore support */ #include "uncore.h" /* Uncore IMC PCI IDs */ @@ -9,6 +9,7 @@ #define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 #define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604 #define PCI_DEVICE_ID_INTEL_SKL_IMC 0x191f +#define PCI_DEVICE_ID_INTEL_SKL_U_IMC 0x190c /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff @@ -64,6 +65,10 @@ #define NHM_UNC_PERFEVTSEL0 0x3c0 #define NHM_UNC_UNCORE_PMC0 0x3b0 +/* SKL uncore global control */ +#define SKL_UNC_PERF_GLOBAL_CTL 0xe01 +#define SKL_UNC_GLOBAL_CTL_CORE_ALL ((1 << 5) - 1) + DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); @@ -179,6 +184,60 @@ void snb_uncore_cpu_init(void) snb_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; } +static void skl_uncore_msr_init_box(struct intel_uncore_box *box) +{ + if (box->pmu->pmu_idx == 0) { + wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, + SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL); + } +} + +static void skl_uncore_msr_exit_box(struct intel_uncore_box *box) +{ + if (box->pmu->pmu_idx == 0) + wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, 0); +} + +static struct intel_uncore_ops skl_uncore_msr_ops = { + .init_box = skl_uncore_msr_init_box, + .exit_box = skl_uncore_msr_exit_box, + .disable_event = snb_uncore_msr_disable_event, + .enable_event = snb_uncore_msr_enable_event, + .read_counter = uncore_msr_read_counter, +}; + +static struct intel_uncore_type skl_uncore_cbox = { + .name = "cbox", + .num_counters = 4, + .num_boxes = 5, + .perf_ctr_bits = 44, + .fixed_ctr_bits = 48, + .perf_ctr = SNB_UNC_CBO_0_PER_CTR0, + .event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0, + .fixed_ctr = SNB_UNC_FIXED_CTR, + .fixed_ctl = SNB_UNC_FIXED_CTR_CTRL, + .single_fixed = 1, + .event_mask = SNB_UNC_RAW_EVENT_MASK, + .msr_offset = SNB_UNC_CBO_MSR_OFFSET, + .ops = &skl_uncore_msr_ops, + .format_group = &snb_uncore_format_group, + .event_descs = snb_uncore_events, +}; + +static struct intel_uncore_type *skl_msr_uncores[] = { + &skl_uncore_cbox, + &snb_uncore_arb, + NULL, +}; + +void skl_uncore_cpu_init(void) +{ + uncore_msr_uncores = skl_msr_uncores; + if (skl_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) + skl_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + snb_uncore_arb.ops = &skl_uncore_msr_ops; +} + enum { SNB_PCI_UNCORE_IMC, }; @@ -544,6 +603,11 @@ static const struct pci_device_id skl_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* end: all zeroes */ }, }; @@ -587,6 +651,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */ IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver), /* 5th Gen Core U */ IMC_DEV(SKL_IMC, &skl_uncore_pci_driver), /* 6th Gen Core */ + IMC_DEV(SKL_U_IMC, &skl_uncore_pci_driver), /* 6th Gen Core U */ { /* end marker */ } }; -- cgit v1.2.3 From 03e3c2b7edbe1e8758196b2c7843333eb328063d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 27 Jun 2016 18:43:54 +0100 Subject: locking/barriers, arch/arm64: Implement LDXR+WFE based smp_cond_load_acquire() smp_cond_load_acquire() is used to spin on a variable until some expression involving that variable becomes true. On arm64, we can build this using the LDXR and WFE instructions, since clearing of the exclusive monitor as a result of the variable being changed by another CPU generates an event, which will wake us up out of WFE. This patch implements smp_cond_load_acquire() using LDXR and WFE, which themselves are contained in an internal __cmpwait() function. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: catalin.marinas@arm.com Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1467049434-30451-1-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar --- arch/arm64/include/asm/barrier.h | 13 ++++++++++ arch/arm64/include/asm/cmpxchg.h | 51 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index dae5c49618db..4eea7f618dce 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -91,6 +91,19 @@ do { \ __u.__val; \ }) +#define smp_cond_load_acquire(ptr, cond_expr) \ +({ \ + typeof(ptr) __PTR = (ptr); \ + typeof(*ptr) VAL; \ + for (;;) { \ + VAL = smp_load_acquire(__PTR); \ + if (cond_expr) \ + break; \ + __cmpwait_relaxed(__PTR, VAL); \ + } \ + VAL; \ +}) + #include #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 510c7b404454..bd86a79491bc 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -224,4 +224,55 @@ __CMPXCHG_GEN(_mb) __ret; \ }) +#define __CMPWAIT_CASE(w, sz, name) \ +static inline void __cmpwait_case_##name(volatile void *ptr, \ + unsigned long val) \ +{ \ + unsigned long tmp; \ + \ + asm volatile( \ + " ldxr" #sz "\t%" #w "[tmp], %[v]\n" \ + " eor %" #w "[tmp], %" #w "[tmp], %" #w "[val]\n" \ + " cbnz %" #w "[tmp], 1f\n" \ + " wfe\n" \ + "1:" \ + : [tmp] "=&r" (tmp), [v] "+Q" (*(unsigned long *)ptr) \ + : [val] "r" (val)); \ +} + +__CMPWAIT_CASE(w, b, 1); +__CMPWAIT_CASE(w, h, 2); +__CMPWAIT_CASE(w, , 4); +__CMPWAIT_CASE( , , 8); + +#undef __CMPWAIT_CASE + +#define __CMPWAIT_GEN(sfx) \ +static inline void __cmpwait##sfx(volatile void *ptr, \ + unsigned long val, \ + int size) \ +{ \ + switch (size) { \ + case 1: \ + return __cmpwait_case##sfx##_1(ptr, (u8)val); \ + case 2: \ + return __cmpwait_case##sfx##_2(ptr, (u16)val); \ + case 4: \ + return __cmpwait_case##sfx##_4(ptr, val); \ + case 8: \ + return __cmpwait_case##sfx##_8(ptr, val); \ + default: \ + BUILD_BUG(); \ + } \ + \ + unreachable(); \ +} + +__CMPWAIT_GEN() + +#undef __CMPWAIT_GEN + +#define __cmpwait_relaxed(ptr, val) \ + __cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr))) + #endif /* __ASM_CMPXCHG_H */ -- cgit v1.2.3 From 920a4a70c55058a9997f2e35bf41503acf87c301 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jul 2016 09:50:16 +0000 Subject: timers, x86/apic/uv: Initialize the UV heartbeat timer as pinned Pinned timers must carry the pinned attribute in the timer structure itself, so convert the code to the new API. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Cc: Arjan van de Ven Cc: Chris Mason Cc: Eric Dumazet Cc: George Spelvin Cc: Josh Triplett Cc: Len Brown Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160704094341.133837204@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_uv_x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 29003154fafd..7a50519e6afc 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -919,7 +919,7 @@ static void uv_heartbeat(unsigned long ignored) uv_set_scir_bits(bits); /* enable next timer period */ - mod_timer_pinned(timer, jiffies + SCIR_CPU_HB_INTERVAL); + mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL); } static void uv_heartbeat_enable(int cpu) @@ -928,7 +928,7 @@ static void uv_heartbeat_enable(int cpu) struct timer_list *timer = &uv_cpu_scir_info(cpu)->timer; uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY); - setup_timer(timer, uv_heartbeat, cpu); + setup_pinned_timer(timer, uv_heartbeat, cpu); timer->expires = jiffies + SCIR_CPU_HB_INTERVAL; add_timer_on(timer, cpu); uv_cpu_scir_info(cpu)->enabled = 1; -- cgit v1.2.3 From f9c287ba3861714a1959accf14e815c44291bec4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jul 2016 09:50:17 +0000 Subject: timers, x86/mce: Initialize MCE restart timer as pinned Pinned timers must carry the pinned attribute in the timer structure itself, so convert the code to the new API. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Cc: Arjan van de Ven Cc: Chris Mason Cc: Eric Dumazet Cc: George Spelvin Cc: Josh Triplett Cc: Len Brown Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160704094341.215783439@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 92e5e37d97bf..b80a6361a9e1 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1309,7 +1309,7 @@ static void __restart_timer(struct timer_list *t, unsigned long interval) if (timer_pending(t)) { if (time_before(when, t->expires)) - mod_timer_pinned(t, when); + mod_timer(t, when); } else { t->expires = round_jiffies(when); add_timer_on(t, smp_processor_id()); @@ -1735,7 +1735,7 @@ static void __mcheck_cpu_init_timer(void) struct timer_list *t = this_cpu_ptr(&mce_timer); unsigned int cpu = smp_processor_id(); - setup_timer(t, mce_timer_fn, cpu); + setup_pinned_timer(t, mce_timer_fn, cpu); mce_start_timer(cpu, t); } -- cgit v1.2.3 From 54b880caf15034644b564e378abf67b7f9eaf4dc Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 24 Jun 2016 23:42:23 +0100 Subject: kbuild, x86: Track generated headers with generated-y Track generated header files which aren't already in genhdr-y, alongside generic-y wrappers in the */include/generated/[uapi/]asm/ directories. Currently only x86 generates extra headers in these directories, for the purposes of enumerating system calls for different ABIs, and xen hypercalls. This will allow the asm-generic wrapper handling code to remove stale wrappers when files are removed from generic-y, without also removing these headers which are generated separately. Reported-by: kbuild test robot Signed-off-by: James Hogan Acked-by: Arnd Bergmann Cc: Jonathan Corbet Cc: linux-kbuild@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: Michal Marek Link: http://lkml.kernel.org/r/1466808144-23209-2-git-send-email-james.hogan@imgtec.com Signed-off-by: Thomas Gleixner --- Documentation/kbuild/makefiles.txt | 14 ++++++++++++++ arch/x86/include/asm/Kbuild | 6 ++++++ 2 files changed, 20 insertions(+) (limited to 'arch') diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt index 13f888a02a3d..385a5ef41c17 100644 --- a/Documentation/kbuild/makefiles.txt +++ b/Documentation/kbuild/makefiles.txt @@ -47,6 +47,7 @@ This document describes the Linux kernel Makefiles. --- 7.2 genhdr-y --- 7.3 destination-y --- 7.4 generic-y + --- 7.5 generated-y === 8 Kbuild Variables === 9 Makefile language @@ -1319,6 +1320,19 @@ See subsequent chapter for the syntax of the Kbuild file. Example: termios.h #include + --- 7.5 generated-y + + If an architecture generates other header files alongside generic-y + wrappers, and not included in genhdr-y, then generated-y specifies + them. + + This prevents them being treated as stale asm-generic wrappers and + removed. + + Example: + #arch/x86/include/asm/Kbuild + generated-y += syscalls_32.h + === 8 Kbuild Variables The top Makefile exports the following variables: diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index aeac434c9feb..2cfed174e3c9 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -1,5 +1,11 @@ +generated-y += syscalls_32.h +generated-y += syscalls_64.h +generated-y += unistd_32_ia32.h +generated-y += unistd_64_x32.h +generated-y += xen-hypercalls.h + genhdr-y += unistd_32.h genhdr-y += unistd_64.h genhdr-y += unistd_x32.h -- cgit v1.2.3 From e19a6ee2460bdd0d0055a6029383422773f9999a Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 20 Jun 2016 18:28:01 +0100 Subject: arm64: kernel: Save and restore UAO and addr_limit on exception entry If we take an exception while at EL1, the exception handler inherits the original context's addr_limit and PSTATE.UAO values. To be consistent always reset addr_limit and PSTATE.UAO on (re-)entry to EL1. This prevents accidental re-use of the original context's addr_limit. Based on a similar patch for arm from Russell King. Cc: # 4.6- Acked-by: Will Deacon Reviewed-by: Mark Rutland Signed-off-by: James Morse Signed-off-by: Will Deacon --- arch/arm64/include/asm/ptrace.h | 2 ++ arch/arm64/kernel/asm-offsets.c | 1 + arch/arm64/kernel/entry.S | 19 +++++++++++++++++-- arch/arm64/mm/fault.c | 3 ++- 4 files changed, 22 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index a307eb6e7fa8..7f94755089e2 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -117,6 +117,8 @@ struct pt_regs { }; u64 orig_x0; u64 syscallno; + u64 orig_addr_limit; + u64 unused; // maintain 16 byte alignment }; #define arch_has_single_step() (1) diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index f8e5d47f0880..2f4ba774488a 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -60,6 +60,7 @@ int main(void) DEFINE(S_PC, offsetof(struct pt_regs, pc)); DEFINE(S_ORIG_X0, offsetof(struct pt_regs, orig_x0)); DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno)); + DEFINE(S_ORIG_ADDR_LIMIT, offsetof(struct pt_regs, orig_addr_limit)); DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs)); BLANK(); DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter)); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 12e8d2bcb3f9..6c3b7345a6c4 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -97,7 +98,14 @@ mov x29, xzr // fp pointed to user-space .else add x21, sp, #S_FRAME_SIZE - .endif + get_thread_info tsk + /* Save the task's original addr_limit and set USER_DS (TASK_SIZE_64) */ + ldr x20, [tsk, #TI_ADDR_LIMIT] + str x20, [sp, #S_ORIG_ADDR_LIMIT] + mov x20, #TASK_SIZE_64 + str x20, [tsk, #TI_ADDR_LIMIT] + ALTERNATIVE(nop, SET_PSTATE_UAO(0), ARM64_HAS_UAO, CONFIG_ARM64_UAO) + .endif /* \el == 0 */ mrs x22, elr_el1 mrs x23, spsr_el1 stp lr, x21, [sp, #S_LR] @@ -128,6 +136,14 @@ .endm .macro kernel_exit, el + .if \el != 0 + /* Restore the task's original addr_limit. */ + ldr x20, [sp, #S_ORIG_ADDR_LIMIT] + str x20, [tsk, #TI_ADDR_LIMIT] + + /* No need to restore UAO, it will be restored from SPSR_EL1 */ + .endif + ldp x21, x22, [sp, #S_PC] // load ELR, SPSR .if \el == 0 ct_user_enter @@ -406,7 +422,6 @@ el1_irq: bl trace_hardirqs_off #endif - get_thread_info tsk irq_handler #ifdef CONFIG_PREEMPT diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 013e2cbe7924..b1166d1e5955 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -280,7 +280,8 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, } if (permission_fault(esr) && (addr < USER_DS)) { - if (get_fs() == KERNEL_DS) + /* regs->orig_addr_limit may be 0 if we entered from EL0 */ + if (regs->orig_addr_limit == KERNEL_DS) die("Accessing user space memory with fs=KERNEL_DS", regs, esr); if (!search_exception_tables(regs->pc)) -- cgit v1.2.3 From 47c459beabe969c6751e2ea8d1f85c5fa1652d6c Mon Sep 17 00:00:00 2001 From: Ganapatrao Kulkarni Date: Thu, 7 Jul 2016 10:18:17 +0530 Subject: arm64: Enable workaround for Cavium erratum 27456 on thunderx-81xx Cavium erratum 27456 commit 104a0c02e8b1 ("arm64: Add workaround for Cavium erratum 27456") is applicable for thunderx-81xx pass1.0 SoC as well. Adding code to enable to 81xx. Signed-off-by: Ganapatrao Kulkarni Reviewed-by: Andrew Pinski Signed-off-by: Will Deacon --- arch/arm64/include/asm/cputype.h | 2 ++ arch/arm64/kernel/cpu_errata.c | 6 ++++++ 2 files changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 87e1985f3be8..9d9fd4b9a72e 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -80,12 +80,14 @@ #define APM_CPU_PART_POTENZA 0x000 #define CAVIUM_CPU_PART_THUNDERX 0x0A1 +#define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2 #define BRCM_CPU_PART_VULCAN 0x516 #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) +#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index d42789499f17..af716b65110d 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -98,6 +98,12 @@ const struct arm64_cpu_capabilities arm64_errata[] = { MIDR_RANGE(MIDR_THUNDERX, 0x00, (1 << MIDR_VARIANT_SHIFT) | 1), }, + { + /* Cavium ThunderX, T81 pass 1.0 */ + .desc = "Cavium erratum 27456", + .capability = ARM64_WORKAROUND_CAVIUM_27456, + MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x00), + }, #endif { } -- cgit v1.2.3 From e99a0745bdf8a5f7e3126a686846af4aeb852cc9 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 5 Jul 2016 23:09:07 +0300 Subject: x86/pci, x86/platform/intel_mid_pci: Remove duplicate power off code Intel MID platforms (Moorestown, Medfield, Clovertrail, Merrifield) are sharing the code in the intel_mid_pci.c module. There is no need to power off specific Moorestown devices after the following commit: 5823d0893ec2 ("x86/platform/intel-mid: Add Power Management Unit driver") ... because the condition in mrfld_power_off_dev() is true for any platform from the above list. Remove duplicate power off certain devices on Intel Moorestown and rename the affected functions to show that they are applied to any of Intel MID platforms. Signed-off-by: Andy Shevchenko Cc: Bjorn Helgaas Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1467749348-100518-1-git-send-email-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/pci/intel_mid_pci.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c index a9710433be4d..5413d6a9817c 100644 --- a/arch/x86/pci/intel_mid_pci.c +++ b/arch/x86/pci/intel_mid_pci.c @@ -316,7 +316,7 @@ static void pci_d3delay_fixup(struct pci_dev *dev) } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_d3delay_fixup); -static void mid_power_off_dev(struct pci_dev *dev) +static void mid_power_off_one_device(struct pci_dev *dev) { u16 pmcsr; @@ -330,12 +330,7 @@ static void mid_power_off_dev(struct pci_dev *dev) pci_set_power_state(dev, PCI_D3hot); } -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0801, mid_power_off_dev); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0809, mid_power_off_dev); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x080C, mid_power_off_dev); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0815, mid_power_off_dev); - -static void mrfld_power_off_dev(struct pci_dev *dev) +static void mid_power_off_devices(struct pci_dev *dev) { int id; @@ -350,10 +345,10 @@ static void mrfld_power_off_dev(struct pci_dev *dev) * This sets only PMCSR bits. The actual power off will happen in * arch/x86/platform/intel-mid/pwr.c. */ - mid_power_off_dev(dev); + mid_power_off_one_device(dev); } -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, mrfld_power_off_dev); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, mid_power_off_devices); /* * Langwell devices reside at fixed offsets, don't try to move them. -- cgit v1.2.3 From ca22312dc840065206285626829ceed8bb4df88c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 5 Jul 2016 23:09:08 +0300 Subject: x86/platform/intel-mid: Extend PWRMU to support Penwell Intel Penwell is one of the first SoCs in Intel MID series. It has slightly older version of PWRMU IP, though it is compatible with one found on Intel Tangier. Since we are not using (yet) any advanced stuff in the driver we may safely re-use what it's done for Intel Tangier for now. Extend PWRMU driver to support Intel Penwell by adding PCI ID and re-using existing ->set_initial_state() function. Signed-off-by: Andy Shevchenko Cc: Bjorn Helgaas Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1467749348-100518-2-git-send-email-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/platform/intel-mid/pwr.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/platform/intel-mid/pwr.c b/arch/x86/platform/intel-mid/pwr.c index 59faf05d23f5..5bc90dd102d4 100644 --- a/arch/x86/platform/intel-mid/pwr.c +++ b/arch/x86/platform/intel-mid/pwr.c @@ -75,6 +75,7 @@ #define LSS_PWS_BITS 2 /* power state width */ /* Supported device IDs */ +#define PCI_DEVICE_ID_PENWELL 0x0828 #define PCI_DEVICE_ID_TANGIER 0x11a1 struct mid_pwr_dev { @@ -354,7 +355,7 @@ static int mid_pwr_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; } -static int tng_set_initial_state(struct mid_pwr *pwr) +static int mid_set_initial_state(struct mid_pwr *pwr) { unsigned int i, j; int ret; @@ -397,12 +398,13 @@ static int tng_set_initial_state(struct mid_pwr *pwr) return 0; } -static const struct mid_pwr_device_info tng_info = { - .set_initial_state = tng_set_initial_state, +static const struct mid_pwr_device_info mid_info = { + .set_initial_state = mid_set_initial_state, }; static const struct pci_device_id mid_pwr_pci_ids[] = { - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_TANGIER), (kernel_ulong_t)&tng_info }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_PENWELL), (kernel_ulong_t)&mid_info }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_TANGIER), (kernel_ulong_t)&mid_info }, {} }; MODULE_DEVICE_TABLE(pci, mid_pwr_pci_ids); -- cgit v1.2.3 From e81e11bc71573709352a5275e175a4b2ee1325e5 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 5 Jul 2016 03:14:50 +0300 Subject: x86/platform/intel-mid: Enable spidev on Intel Edison boards Intel Edison board provides one of the SPI bus for user's connected devices. Append platform data to get spidev enumerated over it. Signed-off-by: Andy Shevchenko Cc: Dan O'Donovan Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1467677690-90007-1-git-send-email-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/platform/intel-mid/device_libs/Makefile | 2 + .../intel-mid/device_libs/platform_spidev.c | 50 ++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 arch/x86/platform/intel-mid/device_libs/platform_spidev.c (limited to 'arch') diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile index 79e97ed5be5b..fc135bf70511 100644 --- a/arch/x86/platform/intel-mid/device_libs/Makefile +++ b/arch/x86/platform/intel-mid/device_libs/Makefile @@ -10,6 +10,8 @@ obj-$(subst m,y,$(CONFIG_MFD_INTEL_MSIC)) += platform_msic_battery.o obj-$(subst m,y,$(CONFIG_INTEL_MID_POWER_BUTTON)) += platform_msic_power_btn.o obj-$(subst m,y,$(CONFIG_GPIO_INTEL_PMIC)) += platform_pmic_gpio.o obj-$(subst m,y,$(CONFIG_INTEL_MFLD_THERMAL)) += platform_msic_thermal.o +# SPI Devices +obj-$(subst m,y,$(CONFIG_SPI_SPIDEV)) += platform_spidev.o # I2C Devices obj-$(subst m,y,$(CONFIG_SENSORS_EMC1403)) += platform_emc1403.o obj-$(subst m,y,$(CONFIG_SENSORS_LIS3LV02D)) += platform_lis331.o diff --git a/arch/x86/platform/intel-mid/device_libs/platform_spidev.c b/arch/x86/platform/intel-mid/device_libs/platform_spidev.c new file mode 100644 index 000000000000..30c601b399ee --- /dev/null +++ b/arch/x86/platform/intel-mid/device_libs/platform_spidev.c @@ -0,0 +1,50 @@ +/* + * spidev platform data initilization file + * + * (C) Copyright 2014, 2016 Intel Corporation + * Authors: Andy Shevchenko + * Dan O'Donovan + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include +#include +#include +#include + +#include + +#define MRFLD_SPI_DEFAULT_DMA_BURST 8 +#define MRFLD_SPI_DEFAULT_TIMEOUT 500 + +/* GPIO pin for spidev chipselect */ +#define MRFLD_SPIDEV_GPIO_CS 111 + +static struct pxa2xx_spi_chip spidev_spi_chip = { + .dma_burst_size = MRFLD_SPI_DEFAULT_DMA_BURST, + .timeout = MRFLD_SPI_DEFAULT_TIMEOUT, + .gpio_cs = MRFLD_SPIDEV_GPIO_CS, +}; + +static void __init *spidev_platform_data(void *info) +{ + struct spi_board_info *spi_info = info; + + spi_info->mode = SPI_MODE_0; + spi_info->controller_data = &spidev_spi_chip; + + return NULL; +} + +static const struct devs_id spidev_dev_id __initconst = { + .name = "spidev", + .type = SFI_DEV_TYPE_SPI, + .delay = 0, + .get_platform_data = &spidev_platform_data, +}; + +sfi_device(spidev_dev_id); -- cgit v1.2.3 From 955d1427a91b18f53e082bd7c19c40ce13b0a0f4 Mon Sep 17 00:00:00 2001 From: Aravind Gopalakrishnan Date: Fri, 8 Jul 2016 11:09:38 +0200 Subject: x86/mce/AMD: Increase size of the bank_map type Change bank_map type from 'char' to 'int' since we now have more than eight banks in a system. Signed-off-by: Aravind Gopalakrishnan Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Aravind Gopalakrishnan Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-edac Link: http://lkml.kernel.org/r/1467968983-4874-2-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 10b0661651e0..7b7f3be783d4 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -93,7 +93,7 @@ const char * const amd_df_mcablock_names[] = { EXPORT_SYMBOL_GPL(amd_df_mcablock_names); static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks); -static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ +static DEFINE_PER_CPU(unsigned int, bank_map); /* see which banks are on */ static void amd_threshold_interrupt(void); static void amd_deferred_error_interrupt(void); -- cgit v1.2.3 From 340e983ab8afd02b59d698dd1365d7773bf136b3 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Fri, 8 Jul 2016 11:09:39 +0200 Subject: x86/RAS/AMD: Reduce the number of IPIs when prepping error injection We currently use wrmsr_on_cpu() 4 times when prepping for an error injection. This will generate 4 IPIs for each MSR write. We can reduce the number of IPIs to 1 by grouping the MSR writes and executing them serially on the appropriate CPU. Suggested-by: Borislav Petkov Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Aravind Gopalakrishnan Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-edac Link: http://lkml.kernel.org/r/1467968983-4874-3-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/ras/mce_amd_inj.c | 58 ++++++++++++++++++++++------------------------ 1 file changed, 28 insertions(+), 30 deletions(-) (limited to 'arch') diff --git a/arch/x86/ras/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c index e69f4701a076..1104515d5ad2 100644 --- a/arch/x86/ras/mce_amd_inj.c +++ b/arch/x86/ras/mce_amd_inj.c @@ -241,6 +241,31 @@ static void toggle_nb_mca_mst_cpu(u16 nid) __func__, PCI_FUNC(F3->devfn), NBCFG); } +static void prepare_msrs(void *info) +{ + struct mce i_mce = *(struct mce *)info; + u8 b = i_mce.bank; + + wrmsrl(MSR_IA32_MCG_STATUS, i_mce.mcgstatus); + + if (boot_cpu_has(X86_FEATURE_SMCA)) { + if (i_mce.inject_flags == DFR_INT_INJ) { + wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), i_mce.status); + wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), i_mce.addr); + } else { + wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), i_mce.status); + wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), i_mce.addr); + } + + wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), i_mce.misc); + } else { + wrmsrl(MSR_IA32_MCx_STATUS(b), i_mce.status); + wrmsrl(MSR_IA32_MCx_ADDR(b), i_mce.addr); + wrmsrl(MSR_IA32_MCx_MISC(b), i_mce.misc); + } + +} + static void do_inject(void) { u64 mcg_status = 0; @@ -287,36 +312,9 @@ static void do_inject(void) toggle_hw_mce_inject(cpu, true); - wrmsr_on_cpu(cpu, MSR_IA32_MCG_STATUS, - (u32)mcg_status, (u32)(mcg_status >> 32)); - - if (boot_cpu_has(X86_FEATURE_SMCA)) { - if (inj_type == DFR_INT_INJ) { - wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_DESTAT(b), - (u32)i_mce.status, (u32)(i_mce.status >> 32)); - - wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_DEADDR(b), - (u32)i_mce.addr, (u32)(i_mce.addr >> 32)); - } else { - wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_STATUS(b), - (u32)i_mce.status, (u32)(i_mce.status >> 32)); - - wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_ADDR(b), - (u32)i_mce.addr, (u32)(i_mce.addr >> 32)); - } - - wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(b), - (u32)i_mce.misc, (u32)(i_mce.misc >> 32)); - } else { - wrmsr_on_cpu(cpu, MSR_IA32_MCx_STATUS(b), - (u32)i_mce.status, (u32)(i_mce.status >> 32)); - - wrmsr_on_cpu(cpu, MSR_IA32_MCx_ADDR(b), - (u32)i_mce.addr, (u32)(i_mce.addr >> 32)); - - wrmsr_on_cpu(cpu, MSR_IA32_MCx_MISC(b), - (u32)i_mce.misc, (u32)(i_mce.misc >> 32)); - } + i_mce.mcgstatus = mcg_status; + i_mce.inject_flags = inj_type; + smp_call_function_single(cpu, prepare_msrs, &i_mce, 0); toggle_hw_mce_inject(cpu, false); -- cgit v1.2.3 From 38c54ccb2ded3e93d8a353baeb7b9e12e1b77e23 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 8 Jul 2016 11:09:41 +0200 Subject: x86/mce: Fix mce_rdmsrl() warning message The MSR address we're dumping in there should be in hex, otherwise we get funsies like: [ 0.016000] WARNING: CPU: 1 PID: 0 at arch/x86/kernel/cpu/mcheck/mce.c:428 mce_rdmsrl+0xd9/0xe0 [ 0.016000] mce: Unable to read msr -1073733631! ^^^^^^^^^^^ Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Link: http://lkml.kernel.org/r/1467968983-4874-5-git-send-email-bp@alien8.de [ Fixed capitalization of 'MSR'. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 92e5e37d97bf..58af6300992d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -425,7 +425,7 @@ static u64 mce_rdmsrl(u32 msr) } if (rdmsrl_safe(msr, &v)) { - WARN_ONCE(1, "mce: Unable to read msr %d!\n", msr); + WARN_ONCE(1, "mce: Unable to read MSR 0x%x!\n", msr); /* * Return zero in case the access faulted. This should * not happen normally but can happen if the CPU does -- cgit v1.2.3 From ef16dd0c2a523d2e3975bb1bea9f5727e3e7146f Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 5 Jul 2016 00:31:25 +0200 Subject: x86/dumpstack: Honor supplied @regs arg The comment suggests that show_stack(NULL, NULL) should backtrace the current context, but the code doesn't match the comment. If regs are given, start the "Stack:" hexdump at regs->sp. Signed-off-by: Andy Lutomirski Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1467671487-10344-2-git-send-email-bp@alien8.de Link: http://lkml.kernel.org/r/efcd79bf4106d61f1cd258c2caa87f3a0618eeac.1466036668.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack_32.c | 4 +++- arch/x86/kernel/dumpstack_64.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index fef917e79b9d..948d77da3881 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -96,7 +96,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, int i; if (sp == NULL) { - if (task) + if (regs) + sp = (unsigned long *)regs->sp; + else if (task) sp = (unsigned long *)task->thread.sp; else sp = (unsigned long *)&sp; diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index d558a8a49016..a81e1ef73bf2 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -264,7 +264,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, * back trace for this cpu: */ if (sp == NULL) { - if (task) + if (regs) + sp = (unsigned long *)regs->sp; + else if (task) sp = (unsigned long *)task->thread.sp; else sp = (unsigned long *)&sp; -- cgit v1.2.3 From 81c2949f7fdcf8ff681326669afde24962232670 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 5 Jul 2016 00:31:27 +0200 Subject: x86/dumpstack: Add show_stack_regs() and use it Add a helper to dump supplied pt_regs and use it in the MSR exception handling code to have precise stack traces pointing to the actual function causing the MSR access exception and not the stack frame of the exception handler itself. The new output looks like this: unchecked MSR access error: RDMSR from 0xdeadbeef at rIP: 0xffffffff8102ddb6 (early_init_intel+0x16/0x3a0) 00000000756e6547 ffffffff81c03f68 ffffffff81dd0940 ffffffff81c03f10 ffffffff81d42e65 0000000001000000 ffffffff81c03f58 ffffffff81d3e5a3 0000800000000000 ffffffff81800080 ffffffffffffffff 0000000000000000 Call Trace: [] early_cpu_init+0xe7/0x136 [] setup_arch+0xa5/0x9df [] start_kernel+0x9f/0x43a [] x86_64_start_reservations+0x2f/0x31 [] x86_64_start_kernel+0x168/0x176 Signed-off-by: Borislav Petkov Reviewed-by: Andy Lutomirski Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1467671487-10344-4-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/kdebug.h | 1 + arch/x86/kernel/dumpstack.c | 5 +++++ arch/x86/mm/extable.c | 13 ++++++++----- 3 files changed, 14 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index e5f5dc9787d5..1ef9d581b5d9 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h @@ -26,6 +26,7 @@ extern void die(const char *, struct pt_regs *,long); extern int __must_check __die(const char *, struct pt_regs *, long); extern void show_trace(struct task_struct *t, struct pt_regs *regs, unsigned long *sp, unsigned long bp); +extern void show_stack_regs(struct pt_regs *regs); extern void __show_regs(struct pt_regs *regs, int all); extern unsigned long oops_begin(void); extern void oops_end(unsigned long, struct pt_regs *, int signr); diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index ef8017ca5ba9..d66e5ac823b2 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -197,6 +197,11 @@ void show_stack(struct task_struct *task, unsigned long *sp) show_stack_log_lvl(task, NULL, sp, bp, ""); } +void show_stack_regs(struct pt_regs *regs) +{ + show_stack_log_lvl(current, regs, (unsigned long *)regs->sp, regs->bp, ""); +} + static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; static int die_owner = -1; static unsigned int die_nest_count; diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 4bb53b89f3c5..fafc771568c7 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -1,6 +1,7 @@ #include #include #include +#include typedef bool (*ex_handler_t)(const struct exception_table_entry *, struct pt_regs *, int); @@ -46,8 +47,9 @@ EXPORT_SYMBOL(ex_handler_ext); bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup, struct pt_regs *regs, int trapnr) { - WARN_ONCE(1, "unchecked MSR access error: RDMSR from 0x%x\n", - (unsigned int)regs->cx); + if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pF)\n", + (unsigned int)regs->cx, regs->ip, (void *)regs->ip)) + show_stack_regs(regs); /* Pretend that the read succeeded and returned 0. */ regs->ip = ex_fixup_addr(fixup); @@ -60,9 +62,10 @@ EXPORT_SYMBOL(ex_handler_rdmsr_unsafe); bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup, struct pt_regs *regs, int trapnr) { - WARN_ONCE(1, "unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x)\n", - (unsigned int)regs->cx, - (unsigned int)regs->dx, (unsigned int)regs->ax); + if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pF)\n", + (unsigned int)regs->cx, (unsigned int)regs->dx, + (unsigned int)regs->ax, regs->ip, (void *)regs->ip)) + show_stack_regs(regs); /* Pretend that the write succeeded. */ regs->ip = ex_fixup_addr(fixup); -- cgit v1.2.3 From 39380b80d72723282f0ea1d1bbf2294eae45013e Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Fri, 8 Jul 2016 11:38:28 +0200 Subject: x86/mm/pat, /dev/mem: Remove superfluous error message Currently it's possible for broken (or malicious) userspace to flood a kernel log indefinitely with messages a-la Program dmidecode tried to access /dev/mem between f0000->100000 because range_is_allowed() is case of CONFIG_STRICT_DEVMEM being turned on dumps this information each and every time devmem_is_allowed() fails. Reportedly userspace that is able to trigger contignuous flow of these messages exists. It would be possible to rate limit this message, but that'd have a questionable value; the administrator wouldn't get information about all the failing accessess, so then the information would be both superfluous and incomplete at the same time :) Returning EPERM (which is what is actually happening) is enough indication for userspace what has happened; no need to log this particular error as some sort of special condition. Signed-off-by: Jiri Kosina Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Kees Cook Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Link: http://lkml.kernel.org/r/alpine.LNX.2.00.1607081137020.24757@cbobk.fhfr.pm Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 5 +---- drivers/char/mem.c | 6 +----- 2 files changed, 2 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index fb0604f11eec..db00e3e2f3dc 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -755,11 +755,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) return 1; while (cursor < to) { - if (!devmem_is_allowed(pfn)) { - pr_info("x86/PAT: Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n", - current->comm, from, to - 1); + if (!devmem_is_allowed(pfn)) return 0; - } cursor += PAGE_SIZE; pfn++; } diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 71025c2f6bbb..d633974e7f8b 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -66,12 +66,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) u64 cursor = from; while (cursor < to) { - if (!devmem_is_allowed(pfn)) { - printk(KERN_INFO - "Program %s tried to access /dev/mem between %Lx->%Lx.\n", - current->comm, from, to); + if (!devmem_is_allowed(pfn)) return 0; - } cursor += PAGE_SIZE; pfn++; } -- cgit v1.2.3 From b059a453b1cf1c8453c2b2ed373d3147d6264ebd Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Tue, 28 Jun 2016 14:35:38 +0300 Subject: x86/vdso: Add mremap hook to vm_special_mapping Add possibility for 32-bit user-space applications to move the vDSO mapping. Previously, when a user-space app called mremap() for the vDSO address, in the syscall return path it would land on the previous address of the vDSOpage, resulting in segmentation violation. Now it lands fine and returns to userspace with a remapped vDSO. This will also fix the context.vdso pointer for 64-bit, which does not affect the user of vDSO after mremap() currently, but this may change in the future. As suggested by Andy, return -EINVAL for mremap() that would split the vDSO image: that operation cannot possibly result in a working system so reject it. Renamed and moved the text_mapping structure declaration inside map_vdso(), as it used only there and now it complements the vvar_mapping variable. There is still a problem for remapping the vDSO in glibc applications: the linker relocates addresses for syscalls on the vDSO page, so you need to relink with the new addresses. Without that the next syscall through glibc may fail: Program received signal SIGSEGV, Segmentation fault. #0 0xf7fd9b80 in __kernel_vsyscall () #1 0xf7ec8238 in _exit () from /usr/lib32/libc.so.6 Signed-off-by: Dmitry Safonov Acked-by: Andy Lutomirski Cc: 0x7f454c46@gmail.com Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20160628113539.13606-2-dsafonov@virtuozzo.com Signed-off-by: Ingo Molnar --- arch/x86/entry/vdso/vma.c | 47 ++++++++++++++++++++++++++++++++++++++++++----- include/linux/mm_types.h | 3 +++ mm/mmap.c | 10 ++++++++++ 3 files changed, 55 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index ab220ac9b3b9..3329844e3c43 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -97,10 +98,40 @@ static int vdso_fault(const struct vm_special_mapping *sm, return 0; } -static const struct vm_special_mapping text_mapping = { - .name = "[vdso]", - .fault = vdso_fault, -}; +static void vdso_fix_landing(const struct vdso_image *image, + struct vm_area_struct *new_vma) +{ +#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION + if (in_ia32_syscall() && image == &vdso_image_32) { + struct pt_regs *regs = current_pt_regs(); + unsigned long vdso_land = image->sym_int80_landing_pad; + unsigned long old_land_addr = vdso_land + + (unsigned long)current->mm->context.vdso; + + /* Fixing userspace landing - look at do_fast_syscall_32 */ + if (regs->ip == old_land_addr) + regs->ip = new_vma->vm_start + vdso_land; + } +#endif +} + +static int vdso_mremap(const struct vm_special_mapping *sm, + struct vm_area_struct *new_vma) +{ + unsigned long new_size = new_vma->vm_end - new_vma->vm_start; + const struct vdso_image *image = current->mm->context.vdso_image; + + if (image->size != new_size) + return -EINVAL; + + if (WARN_ON_ONCE(current->mm != new_vma->vm_mm)) + return -EFAULT; + + vdso_fix_landing(image, new_vma); + current->mm->context.vdso = (void __user *)new_vma->vm_start; + + return 0; +} static int vvar_fault(const struct vm_special_mapping *sm, struct vm_area_struct *vma, struct vm_fault *vmf) @@ -151,6 +182,12 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr) struct vm_area_struct *vma; unsigned long addr, text_start; int ret = 0; + + static const struct vm_special_mapping vdso_mapping = { + .name = "[vdso]", + .fault = vdso_fault, + .mremap = vdso_mremap, + }; static const struct vm_special_mapping vvar_mapping = { .name = "[vvar]", .fault = vvar_fault, @@ -185,7 +222,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr) image->size, VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, - &text_mapping); + &vdso_mapping); if (IS_ERR(vma)) { ret = PTR_ERR(vma); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index ca3e517980a0..917f2b6a0cde 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -594,6 +594,9 @@ struct vm_special_mapping { int (*fault)(const struct vm_special_mapping *sm, struct vm_area_struct *vma, struct vm_fault *vmf); + + int (*mremap)(const struct vm_special_mapping *sm, + struct vm_area_struct *new_vma); }; enum tlb_flush_reason { diff --git a/mm/mmap.c b/mm/mmap.c index de2c1769cc68..234edffec1d0 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2943,9 +2943,19 @@ static const char *special_mapping_name(struct vm_area_struct *vma) return ((struct vm_special_mapping *)vma->vm_private_data)->name; } +static int special_mapping_mremap(struct vm_area_struct *new_vma) +{ + struct vm_special_mapping *sm = new_vma->vm_private_data; + + if (sm->mremap) + return sm->mremap(sm, new_vma); + return 0; +} + static const struct vm_operations_struct special_mapping_vmops = { .close = special_mapping_close, .fault = special_mapping_fault, + .mremap = special_mapping_mremap, .name = special_mapping_name, }; -- cgit v1.2.3 From 6daa2ec0b3e3808c55329d12de3c157cf38b17b0 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 1 Jul 2016 15:34:40 +0800 Subject: x86/KASLR: Fix boot crash with certain memory configurations Ye Xiaolong reported this boot crash: | | XZ-compressed data is corrupt | | -- System halted | Fix the bug in mem_avoid_overlap() of finding the earliest overlap. Reported-and-tested-by: Ye Xiaolong Signed-off-by: Baoquan He Cc: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 749c9e00c674..010ea16e5f77 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -285,6 +285,7 @@ static bool mem_avoid_overlap(struct mem_vector *img, if (mem_overlaps(img, &mem_avoid[i]) && mem_avoid[i].start < earliest) { *overlap = mem_avoid[i]; + earliest = overlap->start; is_overlapping = true; } } @@ -299,6 +300,7 @@ static bool mem_avoid_overlap(struct mem_vector *img, if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) { *overlap = avoid; + earliest = overlap->start; is_overlapping = true; } -- cgit v1.2.3 From 9a7e7b571826c4399aa639af4a670642d96d935c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 8 Jul 2016 16:01:48 +0200 Subject: x86/asm/entry: Make thunk's restore a local label No need to have it appear in objdump output. No functionality change. Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20160708141016.GH3808@pd.tnic Signed-off-by: Ingo Molnar --- arch/x86/entry/thunk_64.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S index 027aec4a74df..627ecbcb2e62 100644 --- a/arch/x86/entry/thunk_64.S +++ b/arch/x86/entry/thunk_64.S @@ -33,7 +33,7 @@ .endif call \func - jmp restore + jmp .L_restore _ASM_NOKPROBE(\name) .endm @@ -54,7 +54,7 @@ #if defined(CONFIG_TRACE_IRQFLAGS) \ || defined(CONFIG_DEBUG_LOCK_ALLOC) \ || defined(CONFIG_PREEMPT) -restore: +.L_restore: popq %r11 popq %r10 popq %r9 @@ -66,5 +66,5 @@ restore: popq %rdi popq %rbp ret - _ASM_NOKPROBE(restore) + _ASM_NOKPROBE(.L_restore) #endif -- cgit v1.2.3 From d899a7d146a2ed8a7e6c2f61bcd232908bcbaabc Mon Sep 17 00:00:00 2001 From: Thomas Garnier Date: Tue, 21 Jun 2016 17:46:58 -0700 Subject: x86/mm: Refactor KASLR entropy functions Move the KASLR entropy functions into arch/x86/lib to be used in early kernel boot for KASLR memory randomization. Signed-off-by: Thomas Garnier Signed-off-by: Kees Cook Cc: Alexander Kuleshov Cc: Alexander Popov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Aneesh Kumar K.V Cc: Baoquan He Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Christian Borntraeger Cc: Dan Williams Cc: Dave Hansen Cc: Dave Young Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: Jan Beulich Cc: Joerg Roedel Cc: Jonathan Corbet Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kirill A. Shutemov Cc: Linus Torvalds Cc: Lv Zheng Cc: Mark Salter Cc: Martin Schwidefsky Cc: Matt Fleming Cc: Peter Zijlstra Cc: Stephen Smalley Cc: Thomas Gleixner Cc: Toshi Kani Cc: Xiao Guangrong Cc: Yinghai Lu Cc: kernel-hardening@lists.openwall.com Cc: linux-doc@vger.kernel.org Link: http://lkml.kernel.org/r/1466556426-32664-2-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 76 +++------------------------------ arch/x86/include/asm/kaslr.h | 6 +++ arch/x86/lib/Makefile | 1 + arch/x86/lib/kaslr.c | 90 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 102 insertions(+), 71 deletions(-) create mode 100644 arch/x86/include/asm/kaslr.h create mode 100644 arch/x86/lib/kaslr.c (limited to 'arch') diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 010ea16e5f77..a66854d99ee1 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -12,10 +12,6 @@ #include "misc.h" #include "error.h" -#include -#include -#include - #include #include #include @@ -26,26 +22,6 @@ static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION; -#define I8254_PORT_CONTROL 0x43 -#define I8254_PORT_COUNTER0 0x40 -#define I8254_CMD_READBACK 0xC0 -#define I8254_SELECT_COUNTER0 0x02 -#define I8254_STATUS_NOTREADY 0x40 -static inline u16 i8254(void) -{ - u16 status, timer; - - do { - outb(I8254_PORT_CONTROL, - I8254_CMD_READBACK | I8254_SELECT_COUNTER0); - status = inb(I8254_PORT_COUNTER0); - timer = inb(I8254_PORT_COUNTER0); - timer |= inb(I8254_PORT_COUNTER0) << 8; - } while (status & I8254_STATUS_NOTREADY); - - return timer; -} - static unsigned long rotate_xor(unsigned long hash, const void *area, size_t size) { @@ -62,7 +38,7 @@ static unsigned long rotate_xor(unsigned long hash, const void *area, } /* Attempt to create a simple but unpredictable starting entropy. */ -static unsigned long get_random_boot(void) +static unsigned long get_boot_seed(void) { unsigned long hash = 0; @@ -72,50 +48,8 @@ static unsigned long get_random_boot(void) return hash; } -static unsigned long get_random_long(const char *purpose) -{ -#ifdef CONFIG_X86_64 - const unsigned long mix_const = 0x5d6008cbf3848dd3UL; -#else - const unsigned long mix_const = 0x3f39e593UL; -#endif - unsigned long raw, random = get_random_boot(); - bool use_i8254 = true; - - debug_putstr(purpose); - debug_putstr(" KASLR using"); - - if (has_cpuflag(X86_FEATURE_RDRAND)) { - debug_putstr(" RDRAND"); - if (rdrand_long(&raw)) { - random ^= raw; - use_i8254 = false; - } - } - - if (has_cpuflag(X86_FEATURE_TSC)) { - debug_putstr(" RDTSC"); - raw = rdtsc(); - - random ^= raw; - use_i8254 = false; - } - - if (use_i8254) { - debug_putstr(" i8254"); - random ^= i8254(); - } - - /* Circular multiply for better bit diffusion */ - asm("mul %3" - : "=a" (random), "=d" (raw) - : "a" (random), "rm" (mix_const)); - random += raw; - - debug_putstr("...\n"); - - return random; -} +#define KASLR_COMPRESSED_BOOT +#include "../../lib/kaslr.c" struct mem_vector { unsigned long start; @@ -349,7 +283,7 @@ static unsigned long slots_fetch_random(void) if (slot_max == 0) return 0; - slot = get_random_long("Physical") % slot_max; + slot = kaslr_get_random_long("Physical") % slot_max; for (i = 0; i < slot_area_index; i++) { if (slot >= slot_areas[i].num) { @@ -479,7 +413,7 @@ static unsigned long find_random_virt_addr(unsigned long minimum, slots = (KERNEL_IMAGE_SIZE - minimum - image_size) / CONFIG_PHYSICAL_ALIGN + 1; - random_addr = get_random_long("Virtual") % slots; + random_addr = kaslr_get_random_long("Virtual") % slots; return random_addr * CONFIG_PHYSICAL_ALIGN + minimum; } diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h new file mode 100644 index 000000000000..5547438db5ea --- /dev/null +++ b/arch/x86/include/asm/kaslr.h @@ -0,0 +1,6 @@ +#ifndef _ASM_KASLR_H_ +#define _ASM_KASLR_H_ + +unsigned long kaslr_get_random_long(const char *purpose); + +#endif diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 72a576752a7e..cfa6d076f4f2 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -24,6 +24,7 @@ lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o lib-y += memcpy_$(BITS).o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o +lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o obj-y += msr.o msr-reg.o msr-reg-export.o diff --git a/arch/x86/lib/kaslr.c b/arch/x86/lib/kaslr.c new file mode 100644 index 000000000000..f7dfeda83e5c --- /dev/null +++ b/arch/x86/lib/kaslr.c @@ -0,0 +1,90 @@ +/* + * Entropy functions used on early boot for KASLR base and memory + * randomization. The base randomization is done in the compressed + * kernel and memory randomization is done early when the regular + * kernel starts. This file is included in the compressed kernel and + * normally linked in the regular. + */ +#include +#include +#include +#include +#include + +/* + * When built for the regular kernel, several functions need to be stubbed out + * or changed to their regular kernel equivalent. + */ +#ifndef KASLR_COMPRESSED_BOOT +#include +#include + +#define debug_putstr(v) early_printk(v) +#define has_cpuflag(f) boot_cpu_has(f) +#define get_boot_seed() kaslr_offset() +#endif + +#define I8254_PORT_CONTROL 0x43 +#define I8254_PORT_COUNTER0 0x40 +#define I8254_CMD_READBACK 0xC0 +#define I8254_SELECT_COUNTER0 0x02 +#define I8254_STATUS_NOTREADY 0x40 +static inline u16 i8254(void) +{ + u16 status, timer; + + do { + outb(I8254_PORT_CONTROL, + I8254_CMD_READBACK | I8254_SELECT_COUNTER0); + status = inb(I8254_PORT_COUNTER0); + timer = inb(I8254_PORT_COUNTER0); + timer |= inb(I8254_PORT_COUNTER0) << 8; + } while (status & I8254_STATUS_NOTREADY); + + return timer; +} + +unsigned long kaslr_get_random_long(const char *purpose) +{ +#ifdef CONFIG_X86_64 + const unsigned long mix_const = 0x5d6008cbf3848dd3UL; +#else + const unsigned long mix_const = 0x3f39e593UL; +#endif + unsigned long raw, random = get_boot_seed(); + bool use_i8254 = true; + + debug_putstr(purpose); + debug_putstr(" KASLR using"); + + if (has_cpuflag(X86_FEATURE_RDRAND)) { + debug_putstr(" RDRAND"); + if (rdrand_long(&raw)) { + random ^= raw; + use_i8254 = false; + } + } + + if (has_cpuflag(X86_FEATURE_TSC)) { + debug_putstr(" RDTSC"); + raw = rdtsc(); + + random ^= raw; + use_i8254 = false; + } + + if (use_i8254) { + debug_putstr(" i8254"); + random ^= i8254(); + } + + /* Circular multiply for better bit diffusion */ + asm("mul %3" + : "=a" (random), "=d" (raw) + : "a" (random), "rm" (mix_const)); + random += raw; + + debug_putstr("...\n"); + + return random; +} -- cgit v1.2.3 From 59b3d0206d74a700069e49160e8194b2ca93b703 Mon Sep 17 00:00:00 2001 From: Thomas Garnier Date: Tue, 21 Jun 2016 17:46:59 -0700 Subject: x86/mm: Update physical mapping variable names Change the variable names in kernel_physical_mapping_init() and related functions to correctly reflect physical and virtual memory addresses. Also add comments on each function to describe usage and alignment constraints. Signed-off-by: Thomas Garnier Signed-off-by: Kees Cook Cc: Alexander Kuleshov Cc: Alexander Popov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Aneesh Kumar K.V Cc: Baoquan He Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Christian Borntraeger Cc: Dan Williams Cc: Dave Hansen Cc: Dave Young Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: Jan Beulich Cc: Joerg Roedel Cc: Jonathan Corbet Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kirill A. Shutemov Cc: Linus Torvalds Cc: Lv Zheng Cc: Mark Salter Cc: Martin Schwidefsky Cc: Matt Fleming Cc: Peter Zijlstra Cc: Stephen Smalley Cc: Thomas Gleixner Cc: Toshi Kani Cc: Xiao Guangrong Cc: Yinghai Lu Cc: kernel-hardening@lists.openwall.com Cc: linux-doc@vger.kernel.org Link: http://lkml.kernel.org/r/1466556426-32664-3-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 162 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 96 insertions(+), 66 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index bce2e5d9edd4..6714712bd5da 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -328,22 +328,30 @@ void __init cleanup_highmap(void) } } +/* + * Create PTE level page table mapping for physical addresses. + * It returns the last physical address mapped. + */ static unsigned long __meminit -phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, +phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end, pgprot_t prot) { - unsigned long pages = 0, next; - unsigned long last_map_addr = end; + unsigned long pages = 0, paddr_next; + unsigned long paddr_last = paddr_end; + pte_t *pte; int i; - pte_t *pte = pte_page + pte_index(addr); + pte = pte_page + pte_index(paddr); + i = pte_index(paddr); - for (i = pte_index(addr); i < PTRS_PER_PTE; i++, addr = next, pte++) { - next = (addr & PAGE_MASK) + PAGE_SIZE; - if (addr >= end) { + for (; i < PTRS_PER_PTE; i++, paddr = paddr_next, pte++) { + paddr_next = (paddr & PAGE_MASK) + PAGE_SIZE; + if (paddr >= paddr_end) { if (!after_bootmem && - !e820_any_mapped(addr & PAGE_MASK, next, E820_RAM) && - !e820_any_mapped(addr & PAGE_MASK, next, E820_RESERVED_KERN)) + !e820_any_mapped(paddr & PAGE_MASK, paddr_next, + E820_RAM) && + !e820_any_mapped(paddr & PAGE_MASK, paddr_next, + E820_RESERVED_KERN)) set_pte(pte, __pte(0)); continue; } @@ -361,37 +369,44 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, } if (0) - printk(" pte=%p addr=%lx pte=%016lx\n", - pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte); + pr_info(" pte=%p addr=%lx pte=%016lx\n", pte, paddr, + pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL).pte); pages++; - set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, prot)); - last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE; + set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot)); + paddr_last = (paddr & PAGE_MASK) + PAGE_SIZE; } update_page_count(PG_LEVEL_4K, pages); - return last_map_addr; + return paddr_last; } +/* + * Create PMD level page table mapping for physical addresses. The virtual + * and physical address have to be aligned at this level. + * It returns the last physical address mapped. + */ static unsigned long __meminit -phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, +phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end, unsigned long page_size_mask, pgprot_t prot) { - unsigned long pages = 0, next; - unsigned long last_map_addr = end; + unsigned long pages = 0, paddr_next; + unsigned long paddr_last = paddr_end; - int i = pmd_index(address); + int i = pmd_index(paddr); - for (; i < PTRS_PER_PMD; i++, address = next) { - pmd_t *pmd = pmd_page + pmd_index(address); + for (; i < PTRS_PER_PMD; i++, paddr = paddr_next) { + pmd_t *pmd = pmd_page + pmd_index(paddr); pte_t *pte; pgprot_t new_prot = prot; - next = (address & PMD_MASK) + PMD_SIZE; - if (address >= end) { + paddr_next = (paddr & PMD_MASK) + PMD_SIZE; + if (paddr >= paddr_end) { if (!after_bootmem && - !e820_any_mapped(address & PMD_MASK, next, E820_RAM) && - !e820_any_mapped(address & PMD_MASK, next, E820_RESERVED_KERN)) + !e820_any_mapped(paddr & PMD_MASK, paddr_next, + E820_RAM) && + !e820_any_mapped(paddr & PMD_MASK, paddr_next, + E820_RESERVED_KERN)) set_pmd(pmd, __pmd(0)); continue; } @@ -400,8 +415,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, if (!pmd_large(*pmd)) { spin_lock(&init_mm.page_table_lock); pte = (pte_t *)pmd_page_vaddr(*pmd); - last_map_addr = phys_pte_init(pte, address, - end, prot); + paddr_last = phys_pte_init(pte, paddr, + paddr_end, prot); spin_unlock(&init_mm.page_table_lock); continue; } @@ -420,7 +435,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, if (page_size_mask & (1 << PG_LEVEL_2M)) { if (!after_bootmem) pages++; - last_map_addr = next; + paddr_last = paddr_next; continue; } new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd)); @@ -430,42 +445,49 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, pages++; spin_lock(&init_mm.page_table_lock); set_pte((pte_t *)pmd, - pfn_pte((address & PMD_MASK) >> PAGE_SHIFT, + pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT, __pgprot(pgprot_val(prot) | _PAGE_PSE))); spin_unlock(&init_mm.page_table_lock); - last_map_addr = next; + paddr_last = paddr_next; continue; } pte = alloc_low_page(); - last_map_addr = phys_pte_init(pte, address, end, new_prot); + paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot); spin_lock(&init_mm.page_table_lock); pmd_populate_kernel(&init_mm, pmd, pte); spin_unlock(&init_mm.page_table_lock); } update_page_count(PG_LEVEL_2M, pages); - return last_map_addr; + return paddr_last; } +/* + * Create PUD level page table mapping for physical addresses. The virtual + * and physical address have to be aligned at this level. + * It returns the last physical address mapped. + */ static unsigned long __meminit -phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, - unsigned long page_size_mask) +phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, + unsigned long page_size_mask) { - unsigned long pages = 0, next; - unsigned long last_map_addr = end; - int i = pud_index(addr); + unsigned long pages = 0, paddr_next; + unsigned long paddr_last = paddr_end; + int i = pud_index(paddr); - for (; i < PTRS_PER_PUD; i++, addr = next) { - pud_t *pud = pud_page + pud_index(addr); + for (; i < PTRS_PER_PUD; i++, paddr = paddr_next) { + pud_t *pud = pud_page + pud_index(paddr); pmd_t *pmd; pgprot_t prot = PAGE_KERNEL; - next = (addr & PUD_MASK) + PUD_SIZE; - if (addr >= end) { + paddr_next = (paddr & PUD_MASK) + PUD_SIZE; + if (paddr >= paddr_end) { if (!after_bootmem && - !e820_any_mapped(addr & PUD_MASK, next, E820_RAM) && - !e820_any_mapped(addr & PUD_MASK, next, E820_RESERVED_KERN)) + !e820_any_mapped(paddr & PUD_MASK, paddr_next, + E820_RAM) && + !e820_any_mapped(paddr & PUD_MASK, paddr_next, + E820_RESERVED_KERN)) set_pud(pud, __pud(0)); continue; } @@ -473,8 +495,10 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, if (pud_val(*pud)) { if (!pud_large(*pud)) { pmd = pmd_offset(pud, 0); - last_map_addr = phys_pmd_init(pmd, addr, end, - page_size_mask, prot); + paddr_last = phys_pmd_init(pmd, paddr, + paddr_end, + page_size_mask, + prot); __flush_tlb_all(); continue; } @@ -493,7 +517,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, if (page_size_mask & (1 << PG_LEVEL_1G)) { if (!after_bootmem) pages++; - last_map_addr = next; + paddr_last = paddr_next; continue; } prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud)); @@ -503,16 +527,16 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, pages++; spin_lock(&init_mm.page_table_lock); set_pte((pte_t *)pud, - pfn_pte((addr & PUD_MASK) >> PAGE_SHIFT, + pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); spin_unlock(&init_mm.page_table_lock); - last_map_addr = next; + paddr_last = paddr_next; continue; } pmd = alloc_low_page(); - last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask, - prot); + paddr_last = phys_pmd_init(pmd, paddr, paddr_end, + page_size_mask, prot); spin_lock(&init_mm.page_table_lock); pud_populate(&init_mm, pud, pmd); @@ -522,38 +546,44 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, update_page_count(PG_LEVEL_1G, pages); - return last_map_addr; + return paddr_last; } +/* + * Create page table mapping for the physical memory for specific physical + * addresses. The virtual and physical addresses have to be aligned on PUD level + * down. It returns the last physical address mapped. + */ unsigned long __meminit -kernel_physical_mapping_init(unsigned long start, - unsigned long end, +kernel_physical_mapping_init(unsigned long paddr_start, + unsigned long paddr_end, unsigned long page_size_mask) { bool pgd_changed = false; - unsigned long next, last_map_addr = end; - unsigned long addr; + unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last; - start = (unsigned long)__va(start); - end = (unsigned long)__va(end); - addr = start; + paddr_last = paddr_end; + vaddr = (unsigned long)__va(paddr_start); + vaddr_end = (unsigned long)__va(paddr_end); + vaddr_start = vaddr; - for (; start < end; start = next) { - pgd_t *pgd = pgd_offset_k(start); + for (; vaddr < vaddr_end; vaddr = vaddr_next) { + pgd_t *pgd = pgd_offset_k(vaddr); pud_t *pud; - next = (start & PGDIR_MASK) + PGDIR_SIZE; + vaddr_next = (vaddr & PGDIR_MASK) + PGDIR_SIZE; if (pgd_val(*pgd)) { pud = (pud_t *)pgd_page_vaddr(*pgd); - last_map_addr = phys_pud_init(pud, __pa(start), - __pa(end), page_size_mask); + paddr_last = phys_pud_init(pud, __pa(vaddr), + __pa(vaddr_end), + page_size_mask); continue; } pud = alloc_low_page(); - last_map_addr = phys_pud_init(pud, __pa(start), __pa(end), - page_size_mask); + paddr_last = phys_pud_init(pud, __pa(vaddr), __pa(vaddr_end), + page_size_mask); spin_lock(&init_mm.page_table_lock); pgd_populate(&init_mm, pgd, pud); @@ -562,11 +592,11 @@ kernel_physical_mapping_init(unsigned long start, } if (pgd_changed) - sync_global_pgds(addr, end - 1, 0); + sync_global_pgds(vaddr_start, vaddr_end - 1, 0); __flush_tlb_all(); - return last_map_addr; + return paddr_last; } #ifndef CONFIG_NUMA -- cgit v1.2.3 From faa379332f3cb3375db1849e27386f8bc9b97da4 Mon Sep 17 00:00:00 2001 From: Thomas Garnier Date: Tue, 21 Jun 2016 17:47:00 -0700 Subject: x86/mm: Add PUD VA support for physical mapping Minor change that allows early boot physical mapping of PUD level virtual addresses. The current implementation expects the virtual address to be PUD aligned. For KASLR memory randomization, we need to be able to randomize the offset used on the PUD table. It has no impact on current usage. Signed-off-by: Thomas Garnier Signed-off-by: Kees Cook Cc: Alexander Kuleshov Cc: Alexander Popov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Aneesh Kumar K.V Cc: Baoquan He Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Christian Borntraeger Cc: Dan Williams Cc: Dave Hansen Cc: Dave Young Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: Jan Beulich Cc: Joerg Roedel Cc: Jonathan Corbet Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kirill A. Shutemov Cc: Linus Torvalds Cc: Lv Zheng Cc: Mark Salter Cc: Martin Schwidefsky Cc: Matt Fleming Cc: Peter Zijlstra Cc: Stephen Smalley Cc: Thomas Gleixner Cc: Toshi Kani Cc: Xiao Guangrong Cc: Yinghai Lu Cc: kernel-hardening@lists.openwall.com Cc: linux-doc@vger.kernel.org Link: http://lkml.kernel.org/r/1466556426-32664-4-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 6714712bd5da..7bf1ddb54537 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -465,7 +465,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end, /* * Create PUD level page table mapping for physical addresses. The virtual - * and physical address have to be aligned at this level. + * and physical address do not have to be aligned at this level. KASLR can + * randomize virtual addresses up to this level. * It returns the last physical address mapped. */ static unsigned long __meminit @@ -474,14 +475,18 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, { unsigned long pages = 0, paddr_next; unsigned long paddr_last = paddr_end; - int i = pud_index(paddr); + unsigned long vaddr = (unsigned long)__va(paddr); + int i = pud_index(vaddr); for (; i < PTRS_PER_PUD; i++, paddr = paddr_next) { - pud_t *pud = pud_page + pud_index(paddr); + pud_t *pud; pmd_t *pmd; pgprot_t prot = PAGE_KERNEL; + vaddr = (unsigned long)__va(paddr); + pud = pud_page + pud_index(vaddr); paddr_next = (paddr & PUD_MASK) + PUD_SIZE; + if (paddr >= paddr_end) { if (!after_bootmem && !e820_any_mapped(paddr & PUD_MASK, paddr_next, @@ -551,7 +556,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, /* * Create page table mapping for the physical memory for specific physical - * addresses. The virtual and physical addresses have to be aligned on PUD level + * addresses. The virtual and physical addresses have to be aligned on PMD level * down. It returns the last physical address mapped. */ unsigned long __meminit -- cgit v1.2.3 From b234e8a09003af108d3573f0369e25c080676b14 Mon Sep 17 00:00:00 2001 From: Thomas Garnier Date: Tue, 21 Jun 2016 17:47:01 -0700 Subject: x86/mm: Separate variable for trampoline PGD Use a separate global variable to define the trampoline PGD used to start other processors. This change will allow KALSR memory randomization to change the trampoline PGD to be correctly aligned with physical memory. Signed-off-by: Thomas Garnier Signed-off-by: Kees Cook Cc: Alexander Kuleshov Cc: Alexander Popov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Aneesh Kumar K.V Cc: Baoquan He Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Christian Borntraeger Cc: Dan Williams Cc: Dave Hansen Cc: Dave Young Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: Jan Beulich Cc: Joerg Roedel Cc: Jonathan Corbet Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kirill A. Shutemov Cc: Linus Torvalds Cc: Lv Zheng Cc: Mark Salter Cc: Martin Schwidefsky Cc: Matt Fleming Cc: Peter Zijlstra Cc: Stephen Smalley Cc: Thomas Gleixner Cc: Toshi Kani Cc: Xiao Guangrong Cc: Yinghai Lu Cc: kernel-hardening@lists.openwall.com Cc: linux-doc@vger.kernel.org Link: http://lkml.kernel.org/r/1466556426-32664-5-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable.h | 12 ++++++++++++ arch/x86/mm/init.c | 3 +++ arch/x86/realmode/init.c | 5 ++++- 3 files changed, 19 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 1a27396b6ea0..d455bef39e9c 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -729,6 +729,18 @@ extern int direct_gbpages; void init_mem_mapping(void); void early_alloc_pgt_buf(void); +#ifdef CONFIG_X86_64 +/* Realmode trampoline initialization. */ +extern pgd_t trampoline_pgd_entry; +static inline void __meminit init_trampoline(void) +{ + /* Default trampoline pgd value */ + trampoline_pgd_entry = init_level4_pgt[pgd_index(__PAGE_OFFSET)]; +} +#else +static inline void init_trampoline(void) { } +#endif + /* local pte updates need not use xchg for locking */ static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) { diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 372aad2b3291..4252acdfcbbd 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -590,6 +590,9 @@ void __init init_mem_mapping(void) /* the ISA range is always mapped regardless of memory holes */ init_memory_mapping(0, ISA_END_ADDRESS); + /* Init the trampoline, possibly with KASLR memory offset */ + init_trampoline(); + /* * If the allocation is in bottom-up direction, we setup direct mapping * in bottom-up, otherwise we setup direct mapping in top-down. diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index 0b7a63d98440..705e3fffb4a1 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -8,6 +8,9 @@ struct real_mode_header *real_mode_header; u32 *trampoline_cr4_features; +/* Hold the pgd entry used on booting additional CPUs */ +pgd_t trampoline_pgd_entry; + void __init reserve_real_mode(void) { phys_addr_t mem; @@ -84,7 +87,7 @@ void __init setup_real_mode(void) *trampoline_cr4_features = __read_cr4(); trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd); - trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd; + trampoline_pgd[0] = trampoline_pgd_entry.pgd; trampoline_pgd[511] = init_level4_pgt[511].pgd; #endif } -- cgit v1.2.3 From 0483e1fa6e09d4948272680f691dccb1edb9677f Mon Sep 17 00:00:00 2001 From: Thomas Garnier Date: Tue, 21 Jun 2016 17:47:02 -0700 Subject: x86/mm: Implement ASLR for kernel memory regions Randomizes the virtual address space of kernel memory regions for x86_64. This first patch adds the infrastructure and does not randomize any region. The following patches will randomize the physical memory mapping, vmalloc and vmemmap regions. This security feature mitigates exploits relying on predictable kernel addresses. These addresses can be used to disclose the kernel modules base addresses or corrupt specific structures to elevate privileges bypassing the current implementation of KASLR. This feature can be enabled with the CONFIG_RANDOMIZE_MEMORY option. The order of each memory region is not changed. The feature looks at the available space for the regions based on different configuration options and randomizes the base and space between each. The size of the physical memory mapping is the available physical memory. No performance impact was detected while testing the feature. Entropy is generated using the KASLR early boot functions now shared in the lib directory (originally written by Kees Cook). Randomization is done on PGD & PUD page table levels to increase possible addresses. The physical memory mapping code was adapted to support PUD level virtual addresses. This implementation on the best configuration provides 30,000 possible virtual addresses in average for each memory region. An additional low memory page is used to ensure each CPU can start with a PGD aligned virtual address (for realmode). x86/dump_pagetable was updated to correctly display each region. Updated documentation on x86_64 memory layout accordingly. Performance data, after all patches in the series: Kernbench shows almost no difference (-+ less than 1%): Before: Average Optimal load -j 12 Run (std deviation): Elapsed Time 102.63 (1.2695) User Time 1034.89 (1.18115) System Time 87.056 (0.456416) Percent CPU 1092.9 (13.892) Context Switches 199805 (3455.33) Sleeps 97907.8 (900.636) After: Average Optimal load -j 12 Run (std deviation): Elapsed Time 102.489 (1.10636) User Time 1034.86 (1.36053) System Time 87.764 (0.49345) Percent CPU 1095 (12.7715) Context Switches 199036 (4298.1) Sleeps 97681.6 (1031.11) Hackbench shows 0% difference on average (hackbench 90 repeated 10 times): attemp,before,after 1,0.076,0.069 2,0.072,0.069 3,0.066,0.066 4,0.066,0.068 5,0.066,0.067 6,0.066,0.069 7,0.067,0.066 8,0.063,0.067 9,0.067,0.065 10,0.068,0.071 average,0.0677,0.0677 Signed-off-by: Thomas Garnier Signed-off-by: Kees Cook Cc: Alexander Kuleshov Cc: Alexander Popov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Aneesh Kumar K.V Cc: Baoquan He Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Christian Borntraeger Cc: Dan Williams Cc: Dave Hansen Cc: Dave Young Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: Jan Beulich Cc: Joerg Roedel Cc: Jonathan Corbet Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kirill A. Shutemov Cc: Linus Torvalds Cc: Lv Zheng Cc: Mark Salter Cc: Martin Schwidefsky Cc: Matt Fleming Cc: Peter Zijlstra Cc: Stephen Smalley Cc: Thomas Gleixner Cc: Toshi Kani Cc: Xiao Guangrong Cc: Yinghai Lu Cc: kernel-hardening@lists.openwall.com Cc: linux-doc@vger.kernel.org Link: http://lkml.kernel.org/r/1466556426-32664-6-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- Documentation/x86/x86_64/mm.txt | 4 ++ arch/x86/Kconfig | 17 +++++ arch/x86/include/asm/kaslr.h | 6 ++ arch/x86/include/asm/pgtable.h | 7 +- arch/x86/kernel/setup.c | 3 + arch/x86/mm/Makefile | 1 + arch/x86/mm/dump_pagetables.c | 16 +++-- arch/x86/mm/init.c | 1 + arch/x86/mm/kaslr.c | 152 ++++++++++++++++++++++++++++++++++++++++ 9 files changed, 202 insertions(+), 5 deletions(-) create mode 100644 arch/x86/mm/kaslr.c (limited to 'arch') diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index 5aa738346062..8c7dd5957ae1 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -39,4 +39,8 @@ memory window (this size is arbitrary, it can be raised later if needed). The mappings are not part of any other kernel PGD and are only available during EFI runtime calls. +Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all +physical memory, vmalloc/ioremap space and virtual memory map are randomized. +Their order is preserved but their base will be offset early at boot time. + -Andi Kleen, Jul 2004 diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 930fe88095d3..9719b8eb38d3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1993,6 +1993,23 @@ config PHYSICAL_ALIGN Don't change this unless you know what you are doing. +config RANDOMIZE_MEMORY + bool "Randomize the kernel memory sections" + depends on X86_64 + depends on RANDOMIZE_BASE + default RANDOMIZE_BASE + ---help--- + Randomizes the base virtual address of kernel memory sections + (physical memory mapping, vmalloc & vmemmap). This security feature + makes exploits relying on predictable memory locations less reliable. + + The order of allocations remains unchanged. Entropy is generated in + the same way as RANDOMIZE_BASE. Current implementation in the optimal + configuration have in average 30,000 different possible virtual + addresses for each memory section. + + If unsure, say N. + config HOTPLUG_CPU bool "Support for hot-pluggable CPUs" depends on SMP diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h index 5547438db5ea..683c9d736314 100644 --- a/arch/x86/include/asm/kaslr.h +++ b/arch/x86/include/asm/kaslr.h @@ -3,4 +3,10 @@ unsigned long kaslr_get_random_long(const char *purpose); +#ifdef CONFIG_RANDOMIZE_MEMORY +void kernel_randomize_memory(void); +#else +static inline void kernel_randomize_memory(void) { } +#endif /* CONFIG_RANDOMIZE_MEMORY */ + #endif diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index d455bef39e9c..5472682a307f 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -732,11 +732,16 @@ void early_alloc_pgt_buf(void); #ifdef CONFIG_X86_64 /* Realmode trampoline initialization. */ extern pgd_t trampoline_pgd_entry; -static inline void __meminit init_trampoline(void) +static inline void __meminit init_trampoline_default(void) { /* Default trampoline pgd value */ trampoline_pgd_entry = init_level4_pgt[pgd_index(__PAGE_OFFSET)]; } +# ifdef CONFIG_RANDOMIZE_MEMORY +void __meminit init_trampoline(void); +# else +# define init_trampoline init_trampoline_default +# endif #else static inline void init_trampoline(void) { } #endif diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index c4e7b3991b60..a2616584b6e9 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -113,6 +113,7 @@ #include #include #include +#include /* * max_low_pfn_mapped: highest direct mapped pfn under 4GB @@ -942,6 +943,8 @@ void __init setup_arch(char **cmdline_p) x86_init.oem.arch_setup(); + kernel_randomize_memory(); + iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1; setup_memory_map(); parse_setup_data(); diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 62c0043a5fd5..96d2b847e09e 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -37,4 +37,5 @@ obj-$(CONFIG_NUMA_EMU) += numa_emulation.o obj-$(CONFIG_X86_INTEL_MPX) += mpx.o obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o +obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 99bfb192803f..9a17250bcbe0 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -72,9 +72,9 @@ static struct addr_marker address_markers[] = { { 0, "User Space" }, #ifdef CONFIG_X86_64 { 0x8000000000000000UL, "Kernel Space" }, - { PAGE_OFFSET, "Low Kernel Mapping" }, - { VMALLOC_START, "vmalloc() Area" }, - { VMEMMAP_START, "Vmemmap" }, + { 0/* PAGE_OFFSET */, "Low Kernel Mapping" }, + { 0/* VMALLOC_START */, "vmalloc() Area" }, + { 0/* VMEMMAP_START */, "Vmemmap" }, # ifdef CONFIG_X86_ESPFIX64 { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, # endif @@ -434,8 +434,16 @@ void ptdump_walk_pgd_level_checkwx(void) static int __init pt_dump_init(void) { + /* + * Various markers are not compile-time constants, so assign them + * here. + */ +#ifdef CONFIG_X86_64 + address_markers[LOW_KERNEL_NR].start_address = PAGE_OFFSET; + address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; + address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START; +#endif #ifdef CONFIG_X86_32 - /* Not a compile-time constant on x86-32 */ address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; address_markers[VMALLOC_END_NR].start_address = VMALLOC_END; # ifdef CONFIG_HIGHMEM diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 4252acdfcbbd..cc82830bc8c4 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -17,6 +17,7 @@ #include #include /* for MAX_DMA_PFN */ #include +#include /* * We need to define the tracepoints somewhere, and tlb.c diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c new file mode 100644 index 000000000000..d5380a48e8fb --- /dev/null +++ b/arch/x86/mm/kaslr.c @@ -0,0 +1,152 @@ +/* + * This file implements KASLR memory randomization for x86_64. It randomizes + * the virtual address space of kernel memory regions (physical memory + * mapping, vmalloc & vmemmap) for x86_64. This security feature mitigates + * exploits relying on predictable kernel addresses. + * + * Entropy is generated using the KASLR early boot functions now shared in + * the lib directory (originally written by Kees Cook). Randomization is + * done on PGD & PUD page table levels to increase possible addresses. The + * physical memory mapping code was adapted to support PUD level virtual + * addresses. This implementation on the best configuration provides 30,000 + * possible virtual addresses in average for each memory region. An additional + * low memory page is used to ensure each CPU can start with a PGD aligned + * virtual address (for realmode). + * + * The order of each memory region is not changed. The feature looks at + * the available space for the regions based on different configuration + * options and randomizes the base and space between each. The size of the + * physical memory mapping is the available physical memory. + */ + +#include +#include +#include + +#include +#include +#include +#include + +#include "mm_internal.h" + +#define TB_SHIFT 40 + +/* + * Virtual address start and end range for randomization. The end changes base + * on configuration to have the highest amount of space for randomization. + * It increases the possible random position for each randomized region. + * + * You need to add an if/def entry if you introduce a new memory region + * compatible with KASLR. Your entry must be in logical order with memory + * layout. For example, ESPFIX is before EFI because its virtual address is + * before. You also need to add a BUILD_BUG_ON in kernel_randomize_memory to + * ensure that this order is correct and won't be changed. + */ +static const unsigned long vaddr_start; +static const unsigned long vaddr_end; + +/* + * Memory regions randomized by KASLR (except modules that use a separate logic + * earlier during boot). The list is ordered based on virtual addresses. This + * order is kept after randomization. + */ +static __initdata struct kaslr_memory_region { + unsigned long *base; + unsigned long size_tb; +} kaslr_regions[] = { +}; + +/* Get size in bytes used by the memory region */ +static inline unsigned long get_padding(struct kaslr_memory_region *region) +{ + return (region->size_tb << TB_SHIFT); +} + +/* + * Apply no randomization if KASLR was disabled at boot or if KASAN + * is enabled. KASAN shadow mappings rely on regions being PGD aligned. + */ +static inline bool kaslr_memory_enabled(void) +{ + return kaslr_enabled() && !config_enabled(CONFIG_KASAN); +} + +/* Initialize base and padding for each memory region randomized with KASLR */ +void __init kernel_randomize_memory(void) +{ + size_t i; + unsigned long vaddr = vaddr_start; + unsigned long rand; + struct rnd_state rand_state; + unsigned long remain_entropy; + + if (!kaslr_memory_enabled()) + return; + + /* Calculate entropy available between regions */ + remain_entropy = vaddr_end - vaddr_start; + for (i = 0; i < ARRAY_SIZE(kaslr_regions); i++) + remain_entropy -= get_padding(&kaslr_regions[i]); + + prandom_seed_state(&rand_state, kaslr_get_random_long("Memory")); + + for (i = 0; i < ARRAY_SIZE(kaslr_regions); i++) { + unsigned long entropy; + + /* + * Select a random virtual address using the extra entropy + * available. + */ + entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i); + prandom_bytes_state(&rand_state, &rand, sizeof(rand)); + entropy = (rand % (entropy + 1)) & PUD_MASK; + vaddr += entropy; + *kaslr_regions[i].base = vaddr; + + /* + * Jump the region and add a minimum padding based on + * randomization alignment. + */ + vaddr += get_padding(&kaslr_regions[i]); + vaddr = round_up(vaddr + 1, PUD_SIZE); + remain_entropy -= entropy; + } +} + +/* + * Create PGD aligned trampoline table to allow real mode initialization + * of additional CPUs. Consume only 1 low memory page. + */ +void __meminit init_trampoline(void) +{ + unsigned long paddr, paddr_next; + pgd_t *pgd; + pud_t *pud_page, *pud_page_tramp; + int i; + + if (!kaslr_memory_enabled()) { + init_trampoline_default(); + return; + } + + pud_page_tramp = alloc_low_page(); + + paddr = 0; + pgd = pgd_offset_k((unsigned long)__va(paddr)); + pud_page = (pud_t *) pgd_page_vaddr(*pgd); + + for (i = pud_index(paddr); i < PTRS_PER_PUD; i++, paddr = paddr_next) { + pud_t *pud, *pud_tramp; + unsigned long vaddr = (unsigned long)__va(paddr); + + pud_tramp = pud_page_tramp + pud_index(paddr); + pud = pud_page + pud_index(vaddr); + paddr_next = (paddr & PUD_MASK) + PUD_SIZE; + + *pud_tramp = *pud; + } + + set_pgd(&trampoline_pgd_entry, + __pgd(_KERNPG_TABLE | __pa(pud_page_tramp))); +} -- cgit v1.2.3 From 021182e52fe01c1f7b126f97fd6ba048dc4234fd Mon Sep 17 00:00:00 2001 From: Thomas Garnier Date: Tue, 21 Jun 2016 17:47:03 -0700 Subject: x86/mm: Enable KASLR for physical mapping memory regions Add the physical mapping in the list of randomized memory regions. The physical memory mapping holds most allocations from boot and heap allocators. Knowing the base address and physical memory size, an attacker can deduce the PDE virtual address for the vDSO memory page. This attack was demonstrated at CanSecWest 2016, in the following presentation: "Getting Physical: Extreme Abuse of Intel Based Paged Systems": https://github.com/n3k/CansecWest2016_Getting_Physical_Extreme_Abuse_of_Intel_Based_Paging_Systems/blob/master/Presentation/CanSec2016_Presentation.pdf (See second part of the presentation). The exploits used against Linux worked successfully against 4.6+ but fail with KASLR memory enabled: https://github.com/n3k/CansecWest2016_Getting_Physical_Extreme_Abuse_of_Intel_Based_Paging_Systems/tree/master/Demos/Linux/exploits Similar research was done at Google leading to this patch proposal. Variants exists to overwrite /proc or /sys objects ACLs leading to elevation of privileges. These variants were tested against 4.6+. The page offset used by the compressed kernel retains the static value since it is not yet randomized during this boot stage. Signed-off-by: Thomas Garnier Signed-off-by: Kees Cook Cc: Alexander Kuleshov Cc: Alexander Popov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Aneesh Kumar K.V Cc: Baoquan He Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Christian Borntraeger Cc: Dan Williams Cc: Dave Hansen Cc: Dave Young Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: Jan Beulich Cc: Joerg Roedel Cc: Jonathan Corbet Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kirill A. Shutemov Cc: Linus Torvalds Cc: Lv Zheng Cc: Mark Salter Cc: Martin Schwidefsky Cc: Matt Fleming Cc: Peter Zijlstra Cc: Stephen Smalley Cc: Thomas Gleixner Cc: Toshi Kani Cc: Xiao Guangrong Cc: Yinghai Lu Cc: kernel-hardening@lists.openwall.com Cc: linux-doc@vger.kernel.org Link: http://lkml.kernel.org/r/1466556426-32664-7-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/pagetable.c | 3 +++ arch/x86/include/asm/kaslr.h | 2 ++ arch/x86/include/asm/page_64_types.h | 11 ++++++++++- arch/x86/kernel/head_64.S | 2 +- arch/x86/mm/kaslr.c | 18 +++++++++++++++--- 5 files changed, 31 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c index 6e31a6aac4d3..56589d0a804b 100644 --- a/arch/x86/boot/compressed/pagetable.c +++ b/arch/x86/boot/compressed/pagetable.c @@ -20,6 +20,9 @@ /* These actually do the work of building the kernel identity maps. */ #include #include +/* Use the static base for this part of the boot process */ +#undef __PAGE_OFFSET +#define __PAGE_OFFSET __PAGE_OFFSET_BASE #include "../../mm/ident_map.c" /* Used by pgtable.h asm code to force instruction serialization. */ diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h index 683c9d736314..62b1b815a83a 100644 --- a/arch/x86/include/asm/kaslr.h +++ b/arch/x86/include/asm/kaslr.h @@ -4,6 +4,8 @@ unsigned long kaslr_get_random_long(const char *purpose); #ifdef CONFIG_RANDOMIZE_MEMORY +extern unsigned long page_offset_base; + void kernel_randomize_memory(void); #else static inline void kernel_randomize_memory(void) { } diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index d5c2f8b40faa..9215e0527647 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -1,6 +1,10 @@ #ifndef _ASM_X86_PAGE_64_DEFS_H #define _ASM_X86_PAGE_64_DEFS_H +#ifndef __ASSEMBLY__ +#include +#endif + #ifdef CONFIG_KASAN #define KASAN_STACK_ORDER 1 #else @@ -32,7 +36,12 @@ * hypervisor to fit. Choosing 16 slots here is arbitrary, but it's * what Xen requires. */ -#define __PAGE_OFFSET _AC(0xffff880000000000, UL) +#define __PAGE_OFFSET_BASE _AC(0xffff880000000000, UL) +#ifdef CONFIG_RANDOMIZE_MEMORY +#define __PAGE_OFFSET page_offset_base +#else +#define __PAGE_OFFSET __PAGE_OFFSET_BASE +#endif /* CONFIG_RANDOMIZE_MEMORY */ #define __START_KERNEL_map _AC(0xffffffff80000000, UL) diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index c7920ba69563..9f8efc9f0075 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -38,7 +38,7 @@ #define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) -L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET) +L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE) L4_START_KERNEL = pgd_index(__START_KERNEL_map) L3_START_KERNEL = pud_index(__START_KERNEL_map) diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index d5380a48e8fb..609ecf2b37ed 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -43,8 +43,12 @@ * before. You also need to add a BUILD_BUG_ON in kernel_randomize_memory to * ensure that this order is correct and won't be changed. */ -static const unsigned long vaddr_start; -static const unsigned long vaddr_end; +static const unsigned long vaddr_start = __PAGE_OFFSET_BASE; +static const unsigned long vaddr_end = VMALLOC_START; + +/* Default values */ +unsigned long page_offset_base = __PAGE_OFFSET_BASE; +EXPORT_SYMBOL(page_offset_base); /* * Memory regions randomized by KASLR (except modules that use a separate logic @@ -55,6 +59,7 @@ static __initdata struct kaslr_memory_region { unsigned long *base; unsigned long size_tb; } kaslr_regions[] = { + { &page_offset_base, 64/* Maximum */ }, }; /* Get size in bytes used by the memory region */ @@ -77,13 +82,20 @@ void __init kernel_randomize_memory(void) { size_t i; unsigned long vaddr = vaddr_start; - unsigned long rand; + unsigned long rand, memory_tb; struct rnd_state rand_state; unsigned long remain_entropy; if (!kaslr_memory_enabled()) return; + BUG_ON(kaslr_regions[0].base != &page_offset_base); + memory_tb = ((max_pfn << PAGE_SHIFT) >> TB_SHIFT); + + /* Adapt phyiscal memory region size based on available memory */ + if (memory_tb < kaslr_regions[0].size_tb) + kaslr_regions[0].size_tb = memory_tb; + /* Calculate entropy available between regions */ remain_entropy = vaddr_end - vaddr_start; for (i = 0; i < ARRAY_SIZE(kaslr_regions); i++) -- cgit v1.2.3 From a95ae27c2ee1cba5f4f6b9dea43ffe88252e79b1 Mon Sep 17 00:00:00 2001 From: Thomas Garnier Date: Tue, 21 Jun 2016 17:47:04 -0700 Subject: x86/mm: Enable KASLR for vmalloc memory regions Add vmalloc to the list of randomized memory regions. The vmalloc memory region contains the allocation made through the vmalloc() API. The allocations are done sequentially to prevent fragmentation and each allocation address can easily be deduced especially from boot. Signed-off-by: Thomas Garnier Signed-off-by: Kees Cook Cc: Alexander Kuleshov Cc: Alexander Popov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Aneesh Kumar K.V Cc: Baoquan He Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Christian Borntraeger Cc: Dan Williams Cc: Dave Hansen Cc: Dave Young Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: Jan Beulich Cc: Joerg Roedel Cc: Jonathan Corbet Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kirill A. Shutemov Cc: Linus Torvalds Cc: Lv Zheng Cc: Mark Salter Cc: Martin Schwidefsky Cc: Matt Fleming Cc: Peter Zijlstra Cc: Stephen Smalley Cc: Thomas Gleixner Cc: Toshi Kani Cc: Xiao Guangrong Cc: Yinghai Lu Cc: kernel-hardening@lists.openwall.com Cc: linux-doc@vger.kernel.org Link: http://lkml.kernel.org/r/1466556426-32664-8-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/kaslr.h | 1 + arch/x86/include/asm/pgtable_64_types.h | 15 +++++++++++---- arch/x86/mm/kaslr.c | 5 ++++- 3 files changed, 16 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h index 62b1b815a83a..2674ee3de748 100644 --- a/arch/x86/include/asm/kaslr.h +++ b/arch/x86/include/asm/kaslr.h @@ -5,6 +5,7 @@ unsigned long kaslr_get_random_long(const char *purpose); #ifdef CONFIG_RANDOMIZE_MEMORY extern unsigned long page_offset_base; +extern unsigned long vmalloc_base; void kernel_randomize_memory(void); #else diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index e6844dfb4471..6fdef9eef2d5 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -5,6 +5,7 @@ #ifndef __ASSEMBLY__ #include +#include /* * These are used to make use of C type-checking.. @@ -53,10 +54,16 @@ typedef struct { pteval_t pte; } pte_t; #define PGDIR_MASK (~(PGDIR_SIZE - 1)) /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ -#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) -#define VMALLOC_START _AC(0xffffc90000000000, UL) -#define VMALLOC_END _AC(0xffffe8ffffffffff, UL) -#define VMEMMAP_START _AC(0xffffea0000000000, UL) +#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) +#define VMALLOC_SIZE_TB _AC(32, UL) +#define __VMALLOC_BASE _AC(0xffffc90000000000, UL) +#define VMEMMAP_START _AC(0xffffea0000000000, UL) +#ifdef CONFIG_RANDOMIZE_MEMORY +#define VMALLOC_START vmalloc_base +#else +#define VMALLOC_START __VMALLOC_BASE +#endif /* CONFIG_RANDOMIZE_MEMORY */ +#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL)) #define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) #define MODULES_END _AC(0xffffffffff000000, UL) #define MODULES_LEN (MODULES_END - MODULES_VADDR) diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index 609ecf2b37ed..c939cfe1b516 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -44,11 +44,13 @@ * ensure that this order is correct and won't be changed. */ static const unsigned long vaddr_start = __PAGE_OFFSET_BASE; -static const unsigned long vaddr_end = VMALLOC_START; +static const unsigned long vaddr_end = VMEMMAP_START; /* Default values */ unsigned long page_offset_base = __PAGE_OFFSET_BASE; EXPORT_SYMBOL(page_offset_base); +unsigned long vmalloc_base = __VMALLOC_BASE; +EXPORT_SYMBOL(vmalloc_base); /* * Memory regions randomized by KASLR (except modules that use a separate logic @@ -60,6 +62,7 @@ static __initdata struct kaslr_memory_region { unsigned long size_tb; } kaslr_regions[] = { { &page_offset_base, 64/* Maximum */ }, + { &vmalloc_base, VMALLOC_SIZE_TB }, }; /* Get size in bytes used by the memory region */ -- cgit v1.2.3 From 90397a41779645d3abba5599f6bb538fdcab9339 Mon Sep 17 00:00:00 2001 From: Thomas Garnier Date: Tue, 21 Jun 2016 17:47:06 -0700 Subject: x86/mm: Add memory hotplug support for KASLR memory randomization Add a new option (CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING) to define the padding used for the physical memory mapping section when KASLR memory is enabled. It ensures there is enough virtual address space when CONFIG_MEMORY_HOTPLUG is used. The default value is 10 terabytes. If CONFIG_MEMORY_HOTPLUG is not used, no space is reserved increasing the entropy available. Signed-off-by: Thomas Garnier Signed-off-by: Kees Cook Cc: Alexander Kuleshov Cc: Alexander Popov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Aneesh Kumar K.V Cc: Baoquan He Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Christian Borntraeger Cc: Dan Williams Cc: Dave Hansen Cc: Dave Young Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: Jan Beulich Cc: Joerg Roedel Cc: Jonathan Corbet Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kirill A. Shutemov Cc: Linus Torvalds Cc: Lv Zheng Cc: Mark Salter Cc: Martin Schwidefsky Cc: Matt Fleming Cc: Peter Zijlstra Cc: Stephen Smalley Cc: Thomas Gleixner Cc: Toshi Kani Cc: Xiao Guangrong Cc: Yinghai Lu Cc: kernel-hardening@lists.openwall.com Cc: linux-doc@vger.kernel.org Link: http://lkml.kernel.org/r/1466556426-32664-10-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 15 +++++++++++++++ arch/x86/mm/kaslr.c | 7 ++++++- 2 files changed, 21 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9719b8eb38d3..703413fb233a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2010,6 +2010,21 @@ config RANDOMIZE_MEMORY If unsure, say N. +config RANDOMIZE_MEMORY_PHYSICAL_PADDING + hex "Physical memory mapping padding" if EXPERT + depends on RANDOMIZE_MEMORY + default "0xa" if MEMORY_HOTPLUG + default "0x0" + range 0x1 0x40 if MEMORY_HOTPLUG + range 0x0 0x40 + ---help--- + Define the padding in terabytes added to the existing physical + memory size during kernel memory randomization. It is useful + for memory hotplug support but reduces the entropy available for + address randomization. + + If unsure, leave at the default value. + config HOTPLUG_CPU bool "Support for hot-pluggable CPUs" depends on SMP diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index c939cfe1b516..26dccd6c0df1 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -92,8 +92,13 @@ void __init kernel_randomize_memory(void) if (!kaslr_memory_enabled()) return; + /* + * Update Physical memory mapping to available and + * add padding if needed (especially for memory hotplug support). + */ BUG_ON(kaslr_regions[0].base != &page_offset_base); - memory_tb = ((max_pfn << PAGE_SHIFT) >> TB_SHIFT); + memory_tb = ((max_pfn << PAGE_SHIFT) >> TB_SHIFT) + + CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING; /* Adapt phyiscal memory region size based on available memory */ if (memory_tb < kaslr_regions[0].size_tb) -- cgit v1.2.3 From 62d855d3e725f4e4b0d2786f7cad3f0660a03a59 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Sat, 18 Jun 2016 18:51:34 +0300 Subject: x86/platform/intel-mid: Rename mrfl.c to mrfld.c Use mrfld as an abbreviation of Merrifield to be consistent with the rest of the code. In the future we are going to add more files here prefixed with 'mrfld'. Signed-off-by: Andy Shevchenko Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1466265094-146113-1-git-send-email-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/platform/intel-mid/Makefile | 2 +- arch/x86/platform/intel-mid/mrfl.c | 100 ----------------------------------- arch/x86/platform/intel-mid/mrfld.c | 100 +++++++++++++++++++++++++++++++++++ 3 files changed, 101 insertions(+), 101 deletions(-) delete mode 100644 arch/x86/platform/intel-mid/mrfl.c create mode 100644 arch/x86/platform/intel-mid/mrfld.c (limited to 'arch') diff --git a/arch/x86/platform/intel-mid/Makefile b/arch/x86/platform/intel-mid/Makefile index aebb5b9ea80a..fa021dfab088 100644 --- a/arch/x86/platform/intel-mid/Makefile +++ b/arch/x86/platform/intel-mid/Makefile @@ -1,4 +1,4 @@ -obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o intel_mid_vrtc.o mfld.o mrfl.o pwr.o +obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o intel_mid_vrtc.o mfld.o mrfld.o pwr.o # SFI specific code ifdef CONFIG_X86_INTEL_MID diff --git a/arch/x86/platform/intel-mid/mrfl.c b/arch/x86/platform/intel-mid/mrfl.c deleted file mode 100644 index bd1adc621781..000000000000 --- a/arch/x86/platform/intel-mid/mrfl.c +++ /dev/null @@ -1,100 +0,0 @@ -/* - * mrfl.c: Intel Merrifield platform specific setup code - * - * (C) Copyright 2013 Intel Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - */ - -#include - -#include -#include - -#include "intel_mid_weak_decls.h" - -static unsigned long __init tangier_calibrate_tsc(void) -{ - unsigned long fast_calibrate; - u32 lo, hi, ratio, fsb, bus_freq; - - /* *********************** */ - /* Compute TSC:Ratio * FSB */ - /* *********************** */ - - /* Compute Ratio */ - rdmsr(MSR_PLATFORM_INFO, lo, hi); - pr_debug("IA32 PLATFORM_INFO is 0x%x : %x\n", hi, lo); - - ratio = (lo >> 8) & 0xFF; - pr_debug("ratio is %d\n", ratio); - if (!ratio) { - pr_err("Read a zero ratio, force tsc ratio to 4 ...\n"); - ratio = 4; - } - - /* Compute FSB */ - rdmsr(MSR_FSB_FREQ, lo, hi); - pr_debug("Actual FSB frequency detected by SOC 0x%x : %x\n", - hi, lo); - - bus_freq = lo & 0x7; - pr_debug("bus_freq = 0x%x\n", bus_freq); - - if (bus_freq == 0) - fsb = FSB_FREQ_100SKU; - else if (bus_freq == 1) - fsb = FSB_FREQ_100SKU; - else if (bus_freq == 2) - fsb = FSB_FREQ_133SKU; - else if (bus_freq == 3) - fsb = FSB_FREQ_167SKU; - else if (bus_freq == 4) - fsb = FSB_FREQ_83SKU; - else if (bus_freq == 5) - fsb = FSB_FREQ_400SKU; - else if (bus_freq == 6) - fsb = FSB_FREQ_267SKU; - else if (bus_freq == 7) - fsb = FSB_FREQ_333SKU; - else { - BUG(); - pr_err("Invalid bus_freq! Setting to minimal value!\n"); - fsb = FSB_FREQ_100SKU; - } - - /* TSC = FSB Freq * Resolved HFM Ratio */ - fast_calibrate = ratio * fsb; - pr_debug("calculate tangier tsc %lu KHz\n", fast_calibrate); - - /* ************************************ */ - /* Calculate Local APIC Timer Frequency */ - /* ************************************ */ - lapic_timer_frequency = (fsb * 1000) / HZ; - - pr_debug("Setting lapic_timer_frequency = %d\n", - lapic_timer_frequency); - - /* mark tsc clocksource as reliable */ - set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE); - - return fast_calibrate; -} - -static void __init tangier_arch_setup(void) -{ - x86_platform.calibrate_tsc = tangier_calibrate_tsc; -} - -/* tangier arch ops */ -static struct intel_mid_ops tangier_ops = { - .arch_setup = tangier_arch_setup, -}; - -void *get_tangier_ops(void) -{ - return &tangier_ops; -} diff --git a/arch/x86/platform/intel-mid/mrfld.c b/arch/x86/platform/intel-mid/mrfld.c new file mode 100644 index 000000000000..59253db41bbc --- /dev/null +++ b/arch/x86/platform/intel-mid/mrfld.c @@ -0,0 +1,100 @@ +/* + * Intel Merrifield platform specific setup code + * + * (C) Copyright 2013 Intel Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include + +#include +#include + +#include "intel_mid_weak_decls.h" + +static unsigned long __init tangier_calibrate_tsc(void) +{ + unsigned long fast_calibrate; + u32 lo, hi, ratio, fsb, bus_freq; + + /* *********************** */ + /* Compute TSC:Ratio * FSB */ + /* *********************** */ + + /* Compute Ratio */ + rdmsr(MSR_PLATFORM_INFO, lo, hi); + pr_debug("IA32 PLATFORM_INFO is 0x%x : %x\n", hi, lo); + + ratio = (lo >> 8) & 0xFF; + pr_debug("ratio is %d\n", ratio); + if (!ratio) { + pr_err("Read a zero ratio, force tsc ratio to 4 ...\n"); + ratio = 4; + } + + /* Compute FSB */ + rdmsr(MSR_FSB_FREQ, lo, hi); + pr_debug("Actual FSB frequency detected by SOC 0x%x : %x\n", + hi, lo); + + bus_freq = lo & 0x7; + pr_debug("bus_freq = 0x%x\n", bus_freq); + + if (bus_freq == 0) + fsb = FSB_FREQ_100SKU; + else if (bus_freq == 1) + fsb = FSB_FREQ_100SKU; + else if (bus_freq == 2) + fsb = FSB_FREQ_133SKU; + else if (bus_freq == 3) + fsb = FSB_FREQ_167SKU; + else if (bus_freq == 4) + fsb = FSB_FREQ_83SKU; + else if (bus_freq == 5) + fsb = FSB_FREQ_400SKU; + else if (bus_freq == 6) + fsb = FSB_FREQ_267SKU; + else if (bus_freq == 7) + fsb = FSB_FREQ_333SKU; + else { + BUG(); + pr_err("Invalid bus_freq! Setting to minimal value!\n"); + fsb = FSB_FREQ_100SKU; + } + + /* TSC = FSB Freq * Resolved HFM Ratio */ + fast_calibrate = ratio * fsb; + pr_debug("calculate tangier tsc %lu KHz\n", fast_calibrate); + + /* ************************************ */ + /* Calculate Local APIC Timer Frequency */ + /* ************************************ */ + lapic_timer_frequency = (fsb * 1000) / HZ; + + pr_debug("Setting lapic_timer_frequency = %d\n", + lapic_timer_frequency); + + /* mark tsc clocksource as reliable */ + set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE); + + return fast_calibrate; +} + +static void __init tangier_arch_setup(void) +{ + x86_platform.calibrate_tsc = tangier_calibrate_tsc; +} + +/* tangier arch ops */ +static struct intel_mid_ops tangier_ops = { + .arch_setup = tangier_arch_setup, +}; + +void *get_tangier_ops(void) +{ + return &tangier_ops; +} -- cgit v1.2.3 From 8709ed4d4b0eab04561c1ec9e6ea50fd1e3897ff Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 17 Jun 2016 17:15:03 -0700 Subject: x86/cpu: Fix duplicated X86_BUG(9) macro cpufeatures.h currently defines X86_BUG(9) twice on 32-bit: #define X86_BUG_NULL_SEG X86_BUG(9) /* Nulling a selector preserves the base */ ... #ifdef CONFIG_X86_32 #define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */ #endif I think what happened was that this added the X86_BUG_ESPFIX, but in an #ifdef below most of the bugs: 58a5aac53313 x86/entry/32: Introduce and use X86_BUG_ESPFIX instead of paravirt_enabled Then this came along and added X86_BUG_NULL_SEG, but collided with the earlier one that did the bug below the main block defining all the X86_BUG()s. 7a5d67048745 x86/cpu: Probe the behavior of nulling out a segment at boot time Signed-off-by: Dave Hansen Acked-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20160618001503.CEE1B141@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeatures.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 4a413485f9eb..c64b1e9c5d1a 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -301,10 +301,6 @@ #define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ #define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ #define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ -#define X86_BUG_NULL_SEG X86_BUG(9) /* Nulling a selector preserves the base */ -#define X86_BUG_SWAPGS_FENCE X86_BUG(10) /* SWAPGS without input dep on GS */ - - #ifdef CONFIG_X86_32 /* * 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional @@ -312,5 +308,7 @@ */ #define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */ #endif +#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */ +#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ #endif /* _ASM_X86_CPUFEATURES_H */ -- cgit v1.2.3 From a11836fa5a67ba56d8338138e37b42384af73e5e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Sat, 9 Jul 2016 16:45:29 +0300 Subject: x86/platform/intel-mid: Mark regulators explicitly defined Intel MID platforms are using explicitly defined regulators. Let the regulator core know that we do not have any additional regulators left. This lets it substitute unprovided regulators with dummy ones. Without this change when CONFIG_REGULATOR=y the USB driver fails on getting "vbus" regulator and SDHCI can't get "vmmc" and "vqmmc" regulators either. Signed-off-by: Andy Shevchenko Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1468071929-77383-1-git-send-email-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/platform/intel-mid/intel-mid.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch') diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c index 90bb997ed0a2..abbf49c6e9d3 100644 --- a/arch/x86/platform/intel-mid/intel-mid.c +++ b/arch/x86/platform/intel-mid/intel-mid.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -144,6 +145,15 @@ static void intel_mid_arch_setup(void) out: if (intel_mid_ops->arch_setup) intel_mid_ops->arch_setup(); + + /* + * Intel MID platforms are using explicitly defined regulators. + * + * Let the regulator core know that we do not have any additional + * regulators left. This lets it substitute unprovided regulators with + * dummy ones: + */ + regulator_has_full_constraints(); } /* MID systems don't have i8042 controller */ -- cgit v1.2.3 From 2e9d1e150abf88cb63e5d34ca286edbb95b4c53d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 20 Jun 2016 16:58:29 +0200 Subject: x86/entry: Avoid interrupt flag save and restore Thanks to all the work that was done by Andy Lutomirski and others, enter_from_user_mode() and prepare_exit_to_usermode() are now called only with interrupts disabled. Let's provide them a version of user_enter()/user_exit() that skips saving and restoring the interrupt flag. On an AMD-based machine I tested this patch on, with force-enabled context tracking, the speed-up in system calls was 90 clock cycles or 6%, measured with the following simple benchmark: #include #include #include #include unsigned long rdtsc() { unsigned long result; asm volatile("rdtsc; shl $32, %%rdx; mov %%eax, %%eax\n" "or %%rdx, %%rax" : "=a" (result) : : "rdx"); return result; } int main() { unsigned long tsc1, tsc2; int pid = getpid(); int i; tsc1 = rdtsc(); for (i = 0; i < 100000000; i++) kill(pid, SIGWINCH); tsc2 = rdtsc(); printf("%ld\n", tsc2 - tsc1); } Signed-off-by: Paolo Bonzini Reviewed-by: Rik van Riel Reviewed-by: Andy Lutomirski Acked-by: Paolo Bonzini Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kvm@vger.kernel.org Link: http://lkml.kernel.org/r/1466434712-31440-2-git-send-email-pbonzini@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/entry/common.c | 4 ++-- include/linux/context_tracking.h | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index ec138e538c44..618bc61d35b7 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -43,7 +43,7 @@ static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs) __visible void enter_from_user_mode(void) { CT_WARN_ON(ct_state() != CONTEXT_USER); - user_exit(); + user_exit_irqoff(); } #else static inline void enter_from_user_mode(void) {} @@ -274,7 +274,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) ti->status &= ~TS_COMPAT; #endif - user_enter(); + user_enter_irqoff(); } #define SYSCALL_EXIT_WORK_FLAGS \ diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index d259274238db..d9aef2a0ec8e 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -31,6 +31,19 @@ static inline void user_exit(void) context_tracking_exit(CONTEXT_USER); } +/* Called with interrupts disabled. */ +static inline void user_enter_irqoff(void) +{ + if (context_tracking_is_enabled()) + __context_tracking_enter(CONTEXT_USER); + +} +static inline void user_exit_irqoff(void) +{ + if (context_tracking_is_enabled()) + __context_tracking_exit(CONTEXT_USER); +} + static inline enum ctx_state exception_enter(void) { enum ctx_state prev_ctx; @@ -69,6 +82,8 @@ static inline enum ctx_state ct_state(void) #else static inline void user_enter(void) { } static inline void user_exit(void) { } +static inline void user_enter_irqoff(void) { } +static inline void user_exit_irqoff(void) { } static inline enum ctx_state exception_enter(void) { return 0; } static inline void exception_exit(enum ctx_state prev_ctx) { } static inline enum ctx_state ct_state(void) { return CONTEXT_DISABLED; } -- cgit v1.2.3 From be8a18e2e98e04a5def5887d913b267865562448 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 20 Jun 2016 16:58:30 +0200 Subject: x86/entry: Inline enter_from_user_mode() This matches what is already done for prepare_exit_to_usermode(), and saves about 60 clock cycles (4% speedup) with the benchmark in the previous commit message. Signed-off-by: Paolo Bonzini Reviewed-by: Rik van Riel Reviewed-by: Andy Lutomirski Acked-by: Paolo Bonzini Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kvm@vger.kernel.org Link: http://lkml.kernel.org/r/1466434712-31440-3-git-send-email-pbonzini@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/entry/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 618bc61d35b7..9e1e27d31c6d 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -40,7 +40,7 @@ static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs) #ifdef CONFIG_CONTEXT_TRACKING /* Called on entry from user mode with IRQs off. */ -__visible void enter_from_user_mode(void) +__visible inline void enter_from_user_mode(void) { CT_WARN_ON(ct_state() != CONTEXT_USER); user_exit_irqoff(); -- cgit v1.2.3 From fc5f3ac24720012909c224a63ca3217f4759967d Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 17 Jun 2016 01:22:43 -0400 Subject: Revert "x86/tsc: Add missing Cherrytrail frequency to the table" This reverts commit: e2724e9d9692 ("x86/tsc: Add missing Cherrytrail frequency to the table") ... as it is incomplete, and is replaced by a more complete patch later in this series. Signed-off-by: Len Brown Reviewed-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/2199d0e959f7f71a18827268b5d060f8d3831639.1466138954.git.len.brown@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc_msr.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c index 9911a0620f9a..6aa0f4d9eea6 100644 --- a/arch/x86/kernel/tsc_msr.c +++ b/arch/x86/kernel/tsc_msr.c @@ -23,7 +23,6 @@ #include /* CPU reference clock frequency: in KHz */ -#define FREQ_80 80000 #define FREQ_83 83200 #define FREQ_100 99840 #define FREQ_133 133200 @@ -57,8 +56,6 @@ static struct freq_desc freq_desc_tables[] = { { 6, 0x37, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_166, 0, 0, 0, 0 } }, /* ANN */ { 6, 0x5a, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_100, 0, 0, 0, 0 } }, - /* AIRMONT */ - { 6, 0x4c, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_166, FREQ_80, 0, 0, 0 } }, }; static int match_cpu(u8 family, u8 model) -- cgit v1.2.3 From ba8268330dc18d309a39175ea4d2c5d86c2cef09 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 17 Jun 2016 01:22:44 -0400 Subject: x86/tsc_msr: Identify Intel-specific code try_msr_calibrate_tsc() is currently Intel-specific, and should not execute on any other vendor's parts. Signed-off-by: Len Brown Reviewed-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1fe23c052826bdcfeb3d45045aa02246078cb5a7.1466138954.git.len.brown@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc_msr.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c index 6aa0f4d9eea6..4ec5e560ed73 100644 --- a/arch/x86/kernel/tsc_msr.c +++ b/arch/x86/kernel/tsc_msr.c @@ -86,6 +86,9 @@ unsigned long try_msr_calibrate_tsc(void) unsigned long res; int cpu_index; + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return 0; + cpu_index = match_cpu(boot_cpu_data.x86, boot_cpu_data.x86_model); if (cpu_index < 0) return 0; -- cgit v1.2.3 From 14bb4e34860af48ef1ea0f52b11611ce4db987fe Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 17 Jun 2016 01:22:45 -0400 Subject: x86/tsc_msr: Remove debugging messages Debugging messages are not necessary after all of the possible hardware failures that never occur. Instead, this code can simply return 0. This code also doesn't need to print in the success case. tsc_init() already prints the TSC frequency, and apic=debug is available if anybody really is interested in printing the LAPIC frequency. Signed-off-by: Len Brown Reviewed-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/cf03279a125b95dfa9b8d3d5b4a66de09cd04050.1466138954.git.len.brown@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc_msr.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c index 4ec5e560ed73..f7ba44b89cc4 100644 --- a/arch/x86/kernel/tsc_msr.c +++ b/arch/x86/kernel/tsc_msr.c @@ -76,9 +76,10 @@ static int match_cpu(u8 family, u8 model) (freq_desc_tables[cpu_index].freqs[freq_id]) /* - * Do MSR calibration only for known/supported CPUs. + * MSR-based CPU/TSC frequency discovery for certain CPUs. * - * Returns the calibration value or 0 if MSR calibration failed. + * Set global "lapic_timer_frequency" to bus_clock_cycles/jiffy + * Return processor base frequency in KHz, or 0 on failure. */ unsigned long try_msr_calibrate_tsc(void) { @@ -100,31 +101,17 @@ unsigned long try_msr_calibrate_tsc(void) rdmsr(MSR_IA32_PERF_STATUS, lo, hi); ratio = (hi >> 8) & 0x1f; } - pr_info("Maximum core-clock to bus-clock ratio: 0x%x\n", ratio); - - if (!ratio) - goto fail; /* Get FSB FREQ ID */ rdmsr(MSR_FSB_FREQ, lo, hi); freq_id = lo & 0x7; freq = id_to_freq(cpu_index, freq_id); - pr_info("Resolved frequency ID: %u, frequency: %u KHz\n", - freq_id, freq); - if (!freq) - goto fail; /* TSC frequency = maximum resolved freq * maximum resolved bus ratio */ res = freq * ratio; - pr_info("TSC runs at %lu KHz\n", res); #ifdef CONFIG_X86_LOCAL_APIC lapic_timer_frequency = (freq * 1000) / HZ; - pr_info("lapic_timer_frequency = %d\n", lapic_timer_frequency); #endif return res; - -fail: - pr_warn("Fast TSC calibration using MSR failed\n"); - return 0; } -- cgit v1.2.3 From 9e0cae9f6227f946fb0076b6a68c88156137f618 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 17 Jun 2016 01:22:46 -0400 Subject: x86/tsc_msr: Update comments, expand definitions Syntax only, no functional change. Signed-off-by: Len Brown Reviewed-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/8653a2dba21fef122fc7b29eafb750e2004d3976.1466138954.git.len.brown@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc_msr.c | 36 ++++++++++-------------------------- 1 file changed, 10 insertions(+), 26 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c index f7ba44b89cc4..4110f723fd0f 100644 --- a/arch/x86/kernel/tsc_msr.c +++ b/arch/x86/kernel/tsc_msr.c @@ -1,14 +1,5 @@ /* - * tsc_msr.c - MSR based TSC calibration on Intel Atom SoC platforms. - * - * TSC in Intel Atom SoC runs at a constant rate which can be figured - * by this formula: - * * - * See Intel 64 and IA-32 System Programming Guid section 16.12 and 30.11.5 - * for details. - * Especially some Intel Atom SoCs don't have PIT(i8254) or HPET, so MSR - * based calibration is the only option. - * + * tsc_msr.c - TSC frequency enumeration via MSR * * Copyright (C) 2013 Intel Corporation * Author: Bin Gao @@ -22,17 +13,10 @@ #include #include -/* CPU reference clock frequency: in KHz */ -#define FREQ_83 83200 -#define FREQ_100 99840 -#define FREQ_133 133200 -#define FREQ_166 166400 - #define MAX_NUM_FREQS 8 /* - * According to Intel 64 and IA-32 System Programming Guide, - * if MSR_PERF_STAT[31] is set, the maximum resolved bus ratio can be + * If MSR_PERF_STAT[31] is set, the maximum resolved bus ratio can be * read in MSR_PLATFORM_ID[12:8], otherwise in MSR_PERF_STAT[44:40]. * Unfortunately some Intel Atom SoCs aren't quite compliant to this, * so we need manually differentiate SoC families. This is what the @@ -47,15 +31,15 @@ struct freq_desc { static struct freq_desc freq_desc_tables[] = { /* PNW */ - { 6, 0x27, 0, { 0, 0, 0, 0, 0, FREQ_100, 0, FREQ_83 } }, + { 6, 0x27, 0, { 0, 0, 0, 0, 0, 99840, 0, 83200 } }, /* CLV+ */ - { 6, 0x35, 0, { 0, FREQ_133, 0, 0, 0, FREQ_100, 0, FREQ_83 } }, - /* TNG */ - { 6, 0x4a, 1, { 0, FREQ_100, FREQ_133, 0, 0, 0, 0, 0 } }, - /* VLV2 */ - { 6, 0x37, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_166, 0, 0, 0, 0 } }, - /* ANN */ - { 6, 0x5a, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_100, 0, 0, 0, 0 } }, + { 6, 0x35, 0, { 0, 133200, 0, 0, 0, 99840, 0, 83200 } }, + /* TNG - Intel Atom processor Z3400 series */ + { 6, 0x4a, 1, { 0, 99840, 133200, 0, 0, 0, 0, 0 } }, + /* VLV2 - Intel Atom processor E3000, Z3600, Z3700 series */ + { 6, 0x37, 1, { 83200, 99840, 133200, 166400, 0, 0, 0, 0 } }, + /* ANN - Intel Atom processor Z3500 series */ + { 6, 0x5a, 1, { 83200, 99840, 133200, 99840, 0, 0, 0, 0 } }, }; static int match_cpu(u8 family, u8 model) -- cgit v1.2.3 From 05680e7fa8a4e700e031a5e72cd8c18265f0031a Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 17 Jun 2016 01:22:47 -0400 Subject: x86/tsc_msr: Correct Silvermont reference clock values Atom processors use a 19.2 MHz crystal oscillator. Early processors generate 100 MHz via 19.2 MHz * 26 / 5 = 99.84 MHz. Later preocessor generate 100 MHz via 19.2 MHz * 125 / 24 = 100 MHz. Update the Silvermont-based tables accordingly, matching the Software Developers Manual. Also, correct a 166 MHz entry that should have been 116 MHz, and add a missing 80 MHz entry. Reported-by: Stephane Gasparini Signed-off-by: Len Brown Reviewed-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5d7561655dfb066ff10801b423405bae4d1cfbe2.1466138954.git.len.brown@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc_msr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c index 4110f723fd0f..20487e2382c6 100644 --- a/arch/x86/kernel/tsc_msr.c +++ b/arch/x86/kernel/tsc_msr.c @@ -35,11 +35,11 @@ static struct freq_desc freq_desc_tables[] = { /* CLV+ */ { 6, 0x35, 0, { 0, 133200, 0, 0, 0, 99840, 0, 83200 } }, /* TNG - Intel Atom processor Z3400 series */ - { 6, 0x4a, 1, { 0, 99840, 133200, 0, 0, 0, 0, 0 } }, + { 6, 0x4a, 1, { 0, 100000, 133300, 0, 0, 0, 0, 0 } }, /* VLV2 - Intel Atom processor E3000, Z3600, Z3700 series */ - { 6, 0x37, 1, { 83200, 99840, 133200, 166400, 0, 0, 0, 0 } }, + { 6, 0x37, 1, { 83300, 100000, 133300, 116700, 80000, 0, 0, 0 } }, /* ANN - Intel Atom processor Z3500 series */ - { 6, 0x5a, 1, { 83200, 99840, 133200, 99840, 0, 0, 0, 0 } }, + { 6, 0x5a, 1, { 83300, 100000, 133300, 100000, 0, 0, 0, 0 } }, }; static int match_cpu(u8 family, u8 model) -- cgit v1.2.3 From 6fcb41cdaee5056c96de88ee095bddd27a7697de Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 17 Jun 2016 01:22:48 -0400 Subject: x86/tsc_msr: Add Airmont reference clock values per the Intel 64 and IA-32 Architecture Software Developer's Manual... Add the reference clock for Intel Atom Processors Based on the Airmont Microarchitecture. Reported-by: Stephane Gasparini Signed-off-by: Len Brown Reviewed-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/abc6a0f4b18281410da1a3f26e2819d8e03e144f.1466138954.git.len.brown@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc_msr.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c index 20487e2382c6..65b3d8cb8325 100644 --- a/arch/x86/kernel/tsc_msr.c +++ b/arch/x86/kernel/tsc_msr.c @@ -13,7 +13,7 @@ #include #include -#define MAX_NUM_FREQS 8 +#define MAX_NUM_FREQS 9 /* * If MSR_PERF_STAT[31] is set, the maximum resolved bus ratio can be @@ -40,6 +40,9 @@ static struct freq_desc freq_desc_tables[] = { { 6, 0x37, 1, { 83300, 100000, 133300, 116700, 80000, 0, 0, 0 } }, /* ANN - Intel Atom processor Z3500 series */ { 6, 0x5a, 1, { 83300, 100000, 133300, 100000, 0, 0, 0, 0 } }, + /* AMT - Intel Atom processor X7-Z8000 and X5-Z8000 series */ + { 6, 0x4c, 1, { 83300, 100000, 133300, 116700, + 80000, 93300, 90000, 88900, 87500 } }, }; static int match_cpu(u8 family, u8 model) -- cgit v1.2.3 From 03482e08a87d24e5c8c23e6981c482e832cf3bdc Mon Sep 17 00:00:00 2001 From: Yu-cheng Yu Date: Fri, 17 Jun 2016 13:07:15 -0700 Subject: x86/fpu/xstate: Align xstate components according to CPUID CPUID function 0x0d, sub function (i, i > 1) returns in ecx[1] the alignment requirement of component 'i' when the compacted format is used. If ecx[1] is 0, component 'i' is located immediately following the preceding component. If ecx[1] is 1, component 'i' is located on the next 64-byte boundary following the preceding component. Signed-off-by: Yu-cheng Yu Reviewed-by: Dave Hansen Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Ravi V. Shankar Cc: Sai Praneeth Prakhya Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/331e2bef1a0a7a584f06adde095b6bbfbe166472.1466179491.git.yu-cheng.yu@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/xstate.c | 60 +++++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 28 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 0b01f003df8b..7963029cb4ad 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -269,6 +269,33 @@ static void __init print_xstate_features(void) print_xstate_feature(XFEATURE_MASK_PKRU); } +/* + * This check is important because it is easy to get XSTATE_* + * confused with XSTATE_BIT_*. + */ +#define CHECK_XFEATURE(nr) do { \ + WARN_ON(nr < FIRST_EXTENDED_XFEATURE); \ + WARN_ON(nr >= XFEATURE_MAX); \ +} while (0) + +/* + * We could cache this like xstate_size[], but we only use + * it here, so it would be a waste of space. + */ +static int xfeature_is_aligned(int xfeature_nr) +{ + u32 eax, ebx, ecx, edx; + + CHECK_XFEATURE(xfeature_nr); + cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx); + /* + * The value returned by ECX[1] indicates the alignment + * of state component 'i' when the compacted format + * of the extended region of an XSAVE area is used: + */ + return !!(ecx & 2); +} + /* * This function sets up offsets and sizes of all extended states in * xsave area. This supports both standard format and compacted format @@ -306,10 +333,14 @@ static void __init setup_xstate_comp(void) else xstate_comp_sizes[i] = 0; - if (i > FIRST_EXTENDED_XFEATURE) + if (i > FIRST_EXTENDED_XFEATURE) { xstate_comp_offsets[i] = xstate_comp_offsets[i-1] + xstate_comp_sizes[i-1]; + if (xfeature_is_aligned(i)) + xstate_comp_offsets[i] = + ALIGN(xstate_comp_offsets[i], 64); + } } } @@ -366,33 +397,6 @@ static int xfeature_is_user(int xfeature_nr) } */ -/* - * This check is important because it is easy to get XSTATE_* - * confused with XSTATE_BIT_*. - */ -#define CHECK_XFEATURE(nr) do { \ - WARN_ON(nr < FIRST_EXTENDED_XFEATURE); \ - WARN_ON(nr >= XFEATURE_MAX); \ -} while (0) - -/* - * We could cache this like xstate_size[], but we only use - * it here, so it would be a waste of space. - */ -static int xfeature_is_aligned(int xfeature_nr) -{ - u32 eax, ebx, ecx, edx; - - CHECK_XFEATURE(xfeature_nr); - cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx); - /* - * The value returned by ECX[1] indicates the alignment - * of state component i when the compacted format - * of the extended region of an XSAVE area is used - */ - return !!(ecx & 2); -} - static int xfeature_uncompacted_offset(int xfeature_nr) { u32 eax, ebx, ecx, edx; -- cgit v1.2.3 From 1499ce2dd45afddea2e84f9f920890cf88384c4e Mon Sep 17 00:00:00 2001 From: Yu-cheng Yu Date: Fri, 17 Jun 2016 13:07:16 -0700 Subject: x86/fpu/xstate: Fix supervisor xstate component offset CPUID function 0x0d, sub function (i, i > 1) returns in ebx the offset of xstate component i. Zero is returned for a supervisor state. A supervisor state can only be saved by XSAVES and XSAVES uses a compacted format. There is no fixed offset for a supervisor state. This patch checks and makes sure a supervisor state offset is not recorded or mis-used. This has no effect in practice as we currently use no supervisor states, but it would be good to fix. Signed-off-by: Yu-cheng Yu Reviewed-by: Dave Hansen Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Ravi V. Shankar Cc: Sai Praneeth Prakhya Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/81b29e40d35d4cec9f2511a856fe769f34935a3f.1466179491.git.yu-cheng.yu@intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/types.h | 1 + arch/x86/include/asm/fpu/xstate.h | 3 ++ arch/x86/kernel/fpu/xstate.c | 62 ++++++++++++++++++++++++--------------- 3 files changed, 43 insertions(+), 23 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 36b90bbfc69f..12dd648735b6 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -122,6 +122,7 @@ enum xfeature { #define XFEATURE_MASK_OPMASK (1 << XFEATURE_OPMASK) #define XFEATURE_MASK_ZMM_Hi256 (1 << XFEATURE_ZMM_Hi256) #define XFEATURE_MASK_Hi16_ZMM (1 << XFEATURE_Hi16_ZMM) +#define XFEATURE_MASK_PT (1 << XFEATURE_PT_UNIMPLEMENTED_SO_FAR) #define XFEATURE_MASK_PKRU (1 << XFEATURE_PKRU) #define XFEATURE_MASK_FPSSE (XFEATURE_MASK_FP | XFEATURE_MASK_SSE) diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index d812cf361282..92f376ccc999 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -18,6 +18,9 @@ #define XSAVE_YMM_SIZE 256 #define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) +/* Supervisor features */ +#define XFEATURE_MASK_SUPERVISOR (XFEATURE_MASK_PT) + /* Supported features which support lazy state saving */ #define XFEATURE_MASK_LAZY (XFEATURE_MASK_FP | \ XFEATURE_MASK_SSE | \ diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 7963029cb4ad..02786fb7a1e8 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -112,6 +112,27 @@ int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name) } EXPORT_SYMBOL_GPL(cpu_has_xfeatures); +static int xfeature_is_supervisor(int xfeature_nr) +{ + /* + * We currently do not support supervisor states, but if + * we did, we could find out like this. + * + * SDM says: If state component 'i' is a user state component, + * ECX[0] return 0; if state component i is a supervisor + * state component, ECX[0] returns 1. + */ + u32 eax, ebx, ecx, edx; + + cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx); + return !!(ecx & 1); +} + +static int xfeature_is_user(int xfeature_nr) +{ + return !xfeature_is_supervisor(xfeature_nr); +} + /* * When executing XSAVEOPT (or other optimized XSAVE instructions), if * a processor implementation detects that an FPU state component is still @@ -230,7 +251,14 @@ static void __init setup_xstate_features(void) continue; cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx); - xstate_offsets[i] = ebx; + + /* + * If an xfeature is supervisor state, the offset + * in EBX is invalid. We leave it to -1. + */ + if (xfeature_is_user(i)) + xstate_offsets[i] = ebx; + xstate_sizes[i] = eax; /* * In our xstate size checks, we assume that the @@ -375,32 +403,20 @@ static void __init setup_init_fpu_buf(void) copy_xregs_to_kernel_booting(&init_fpstate.xsave); } -static int xfeature_is_supervisor(int xfeature_nr) -{ - /* - * We currently do not support supervisor states, but if - * we did, we could find out like this. - * - * SDM says: If state component i is a user state component, - * ECX[0] return 0; if state component i is a supervisor - * state component, ECX[0] returns 1. - u32 eax, ebx, ecx, edx; - cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx; - return !!(ecx & 1); - */ - return 0; -} -/* -static int xfeature_is_user(int xfeature_nr) -{ - return !xfeature_is_supervisor(xfeature_nr); -} -*/ - static int xfeature_uncompacted_offset(int xfeature_nr) { u32 eax, ebx, ecx, edx; + /* + * Only XSAVES supports supervisor states and it uses compacted + * format. Checking a supervisor state's uncompacted offset is + * an error. + */ + if (XFEATURE_MASK_SUPERVISOR & (1 << xfeature_nr)) { + WARN_ONCE(1, "No fixed offset for xstate %d\n", xfeature_nr); + return -1; + } + CHECK_XFEATURE(xfeature_nr); cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx); return ebx; -- cgit v1.2.3 From 91c3dba7dbc199191272f4a9863f86ea3bfd679f Mon Sep 17 00:00:00 2001 From: Yu-cheng Yu Date: Fri, 17 Jun 2016 13:07:17 -0700 Subject: x86/fpu/xstate: Fix PTRACE frames for XSAVES XSAVES uses compacted format and is a kernel instruction. The kernel should use standard-format, non-supervisor state data for PTRACE. Signed-off-by: Yu-cheng Yu [ Edited away artificial linebreaks. ] Reviewed-by: Dave Hansen Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Ravi V. Shankar Cc: Sai Praneeth Prakhya Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/de3d80949001305fe389799973b675cab055c457.1466179491.git.yu-cheng.yu@intel.com [ Made various readability edits. ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/xstate.h | 5 +- arch/x86/kernel/fpu/regset.c | 52 +++++++---- arch/x86/kernel/fpu/xstate.c | 183 ++++++++++++++++++++++++++++++++++++-- 3 files changed, 216 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 92f376ccc999..ae55a43e09c0 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -51,5 +51,8 @@ void fpu__xstate_clear_all_cpu_caps(void); void *get_xsave_addr(struct xregs_state *xsave, int xstate); const void *get_xsave_field_ptr(int xstate_field); int using_compacted_format(void); - +int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf, + void __user *ubuf, struct xregs_state *xsave); +int copyin_to_xsaves(const void *kbuf, const void __user *ubuf, + struct xregs_state *xsave); #endif diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 81422dfb152b..c114b132d121 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -4,6 +4,7 @@ #include #include #include +#include /* * The xstateregs_active() routine is the same as the regset_fpregs_active() routine, @@ -85,21 +86,26 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, if (!boot_cpu_has(X86_FEATURE_XSAVE)) return -ENODEV; - fpu__activate_fpstate_read(fpu); - xsave = &fpu->state.xsave; - /* - * Copy the 48bytes defined by the software first into the xstate - * memory layout in the thread struct, so that we can copy the entire - * xstateregs to the user using one user_regset_copyout(). - */ - memcpy(&xsave->i387.sw_reserved, - xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes)); - /* - * Copy the xstate memory layout. - */ - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); + fpu__activate_fpstate_read(fpu); + + if (using_compacted_format()) { + ret = copyout_from_xsaves(pos, count, kbuf, ubuf, xsave); + } else { + fpstate_sanitize_xstate(fpu); + /* + * Copy the 48 bytes defined by the software into the xsave + * area in the thread struct, so that we can copy the whole + * area to user using one user_regset_copyout(). + */ + memcpy(&xsave->i387.sw_reserved, xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes)); + + /* + * Copy the xstate memory layout. + */ + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); + } return ret; } @@ -114,11 +120,27 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, if (!boot_cpu_has(X86_FEATURE_XSAVE)) return -ENODEV; - fpu__activate_fpstate_write(fpu); + /* + * A whole standard-format XSAVE buffer is needed: + */ + if ((pos != 0) || (count < fpu_user_xstate_size)) + return -EFAULT; xsave = &fpu->state.xsave; - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); + fpu__activate_fpstate_write(fpu); + + if (boot_cpu_has(X86_FEATURE_XSAVES)) + ret = copyin_to_xsaves(kbuf, ubuf, xsave); + else + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); + + /* + * In case of failure, mark all states as init: + */ + if (ret) + fpstate_init(&fpu->state); + /* * mxcsr reserved bits must be masked to zero for security reasons. */ diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 02786fb7a1e8..56c0e707af21 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -11,6 +11,7 @@ #include #include #include +#include #include @@ -697,7 +698,12 @@ void __init fpu__init_system_xstate(void) return; } - update_regset_xstate_info(fpu_kernel_xstate_size, xfeatures_mask); + /* + * Update info used for ptrace frames; use standard-format size and no + * supervisor xstates: + */ + update_regset_xstate_info(fpu_user_xstate_size, xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR); + fpu__init_prepare_fx_sw_frame(); setup_init_fpu_buf(); setup_xstate_comp(); @@ -925,16 +931,16 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, if (!boot_cpu_has(X86_FEATURE_OSPKE)) return -EINVAL; - /* Set the bits we need in PKRU */ + /* Set the bits we need in PKRU: */ if (init_val & PKEY_DISABLE_ACCESS) new_pkru_bits |= PKRU_AD_BIT; if (init_val & PKEY_DISABLE_WRITE) new_pkru_bits |= PKRU_WD_BIT; - /* Shift the bits in to the correct place in PKRU for pkey. */ + /* Shift the bits in to the correct place in PKRU for pkey: */ new_pkru_bits <<= pkey_shift; - /* Locate old copy of the state in the xsave buffer */ + /* Locate old copy of the state in the xsave buffer: */ old_pkru_state = get_xsave_addr(xsave, XFEATURE_MASK_PKRU); /* @@ -947,9 +953,10 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, else new_pkru_state.pkru = old_pkru_state->pkru; - /* mask off any old bits in place */ + /* Mask off any old bits in place: */ new_pkru_state.pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift); - /* Set the newly-requested bits */ + + /* Set the newly-requested bits: */ new_pkru_state.pkru |= new_pkru_bits; /* @@ -963,8 +970,168 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, */ new_pkru_state.pad = 0; - fpu__xfeature_set_state(XFEATURE_MASK_PKRU, &new_pkru_state, - sizeof(new_pkru_state)); + fpu__xfeature_set_state(XFEATURE_MASK_PKRU, &new_pkru_state, sizeof(new_pkru_state)); + + return 0; +} + +/* + * This is similar to user_regset_copyout(), but will not add offset to + * the source data pointer or increment pos, count, kbuf, and ubuf. + */ +static inline int xstate_copyout(unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf, + const void *data, const int start_pos, + const int end_pos) +{ + if ((count == 0) || (pos < start_pos)) + return 0; + + if (end_pos < 0 || pos < end_pos) { + unsigned int copy = (end_pos < 0 ? count : min(count, end_pos - pos)); + + if (kbuf) { + memcpy(kbuf + pos, data, copy); + } else { + if (__copy_to_user(ubuf + pos, data, copy)) + return -EFAULT; + } + } + return 0; +} + +/* + * Convert from kernel XSAVES compacted format to standard format and copy + * to a ptrace buffer. It supports partial copy but pos always starts from + * zero. This is called from xstateregs_get() and there we check the CPU + * has XSAVES. + */ +int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf, + void __user *ubuf, struct xregs_state *xsave) +{ + unsigned int offset, size; + int ret, i; + struct xstate_header header; + + /* + * Currently copy_regset_to_user() starts from pos 0: + */ + if (unlikely(pos != 0)) + return -EFAULT; + + /* + * The destination is a ptrace buffer; we put in only user xstates: + */ + memset(&header, 0, sizeof(header)); + header.xfeatures = xsave->header.xfeatures; + header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR; + + /* + * Copy xregs_state->header: + */ + offset = offsetof(struct xregs_state, header); + size = sizeof(header); + + ret = xstate_copyout(offset, size, kbuf, ubuf, &header, 0, count); + + if (ret) + return ret; + + for (i = 0; i < XFEATURE_MAX; i++) { + /* + * Copy only in-use xstates: + */ + if ((header.xfeatures >> i) & 1) { + void *src = __raw_xsave_addr(xsave, 1 << i); + + offset = xstate_offsets[i]; + size = xstate_sizes[i]; + + ret = xstate_copyout(offset, size, kbuf, ubuf, src, 0, count); + + if (ret) + return ret; + + if (offset + size >= count) + break; + } + + } + + /* + * Fill xsave->i387.sw_reserved value for ptrace frame: + */ + offset = offsetof(struct fxregs_state, sw_reserved); + size = sizeof(xstate_fx_sw_bytes); + + ret = xstate_copyout(offset, size, kbuf, ubuf, xstate_fx_sw_bytes, 0, count); + + if (ret) + return ret; + + return 0; +} + +/* + * Convert from a ptrace standard-format buffer to kernel XSAVES format + * and copy to the target thread. This is called from xstateregs_set() and + * there we check the CPU has XSAVES and a whole standard-sized buffer + * exists. + */ +int copyin_to_xsaves(const void *kbuf, const void __user *ubuf, + struct xregs_state *xsave) +{ + unsigned int offset, size; + int i; + u64 xfeatures; + u64 allowed_features; + + offset = offsetof(struct xregs_state, header); + size = sizeof(xfeatures); + + if (kbuf) { + memcpy(&xfeatures, kbuf + offset, size); + } else { + if (__copy_from_user(&xfeatures, ubuf + offset, size)) + return -EFAULT; + } + + /* + * Reject if the user sets any disabled or supervisor features: + */ + allowed_features = xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR; + + if (xfeatures & ~allowed_features) + return -EINVAL; + + for (i = 0; i < XFEATURE_MAX; i++) { + u64 mask = ((u64)1 << i); + + if (xfeatures & mask) { + void *dst = __raw_xsave_addr(xsave, 1 << i); + + offset = xstate_offsets[i]; + size = xstate_sizes[i]; + + if (kbuf) { + memcpy(dst, kbuf + offset, size); + } else { + if (__copy_from_user(dst, ubuf + offset, size)) + return -EFAULT; + } + } + } + + /* + * The state that came in from userspace was user-state only. + * Mask all the user states out of 'xfeatures': + */ + xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR; + + /* + * Add back in the features that came in from userspace: + */ + xsave->header.xfeatures |= xfeatures; return 0; } -- cgit v1.2.3 From 996952e0148026ac0e512db5cad26e14f4267e8b Mon Sep 17 00:00:00 2001 From: Yu-cheng Yu Date: Fri, 17 Jun 2016 13:07:18 -0700 Subject: x86/fpu/xstate: Fix XSTATE component offset print out Component offset print out was incorrect for XSAVES. Correct it and move to a separate function. Signed-off-by: Yu-cheng Yu Reviewed-by: Dave Hansen Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Ravi V. Shankar Cc: Sai Praneeth Prakhya Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/86602a8ac400626c6eca7125c3e15934866fc38e.1466179491.git.yu-cheng.yu@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/xstate.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 56c0e707af21..09bac979b8a2 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -269,8 +269,6 @@ static void __init setup_xstate_features(void) WARN_ONCE(last_good_offset > xstate_offsets[i], "x86/fpu: misordered xstate at %d\n", last_good_offset); last_good_offset = xstate_offsets[i]; - - printk(KERN_INFO "x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n", i, ebx, i, eax); } } @@ -373,6 +371,21 @@ static void __init setup_xstate_comp(void) } } +/* + * Print out xstate component offsets and sizes + */ +static void __init print_xstate_offset_size(void) +{ + int i; + + for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { + if (!xfeature_enabled(i)) + continue; + pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n", + i, xstate_comp_offsets[i], i, xstate_sizes[i]); + } +} + /* * setup the xstate image representing the init state */ @@ -707,6 +720,7 @@ void __init fpu__init_system_xstate(void) fpu__init_prepare_fx_sw_frame(); setup_init_fpu_buf(); setup_xstate_comp(); + print_xstate_offset_size(); pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n", xfeatures_mask, -- cgit v1.2.3 From ac73b27aea4eacdd7555f664d5fc6e1d4d1c8bf6 Mon Sep 17 00:00:00 2001 From: Yu-cheng Yu Date: Fri, 17 Jun 2016 13:07:19 -0700 Subject: x86/fpu/xstate: Fix xstate_offsets, xstate_sizes for non-extended xstates The arrays xstate_offsets[] and xstate_sizes[] record XSAVE standard- format offsets and sizes. Values for non-extended state components fpu and xmm's were not initialized or used. Ptrace format conversion needs them. Fix it. Signed-off-by: Yu-cheng Yu Reviewed-by: Dave Hansen Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Ravi V. Shankar Cc: Sai Praneeth Prakhya Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/cf3ea36cf30e2a99e37da6483e65446d018ff0a7.1466179491.git.yu-cheng.yu@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/xstate.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 09bac979b8a2..f8d1aff10f69 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -246,6 +246,15 @@ static void __init setup_xstate_features(void) /* start at the beginnning of the "extended state" */ unsigned int last_good_offset = offsetof(struct xregs_state, extended_state_area); + /* + * The FP xstates and SSE xstates are legacy states. They are always + * in the fixed offsets in the xsave area in either compacted form + * or standard form. + */ + xstate_offsets[0] = 0; + xstate_sizes[0] = offsetof(struct fxregs_state, xmm_space); + xstate_offsets[1] = xstate_sizes[0]; + xstate_sizes[1] = FIELD_SIZEOF(struct fxregs_state, xmm_space); for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { if (!xfeature_enabled(i)) -- cgit v1.2.3 From 4ff5308744f5858e4e49e56a0445e2f8b73e47e0 Mon Sep 17 00:00:00 2001 From: Thomas Garnier Date: Wed, 15 Jun 2016 12:05:45 -0700 Subject: x86/mm: Do not reference phys addr beyond kernel The new physical address randomized KASLR implementation can cause the kernel to be aligned close to the end of physical memory. In this case, _brk_end aligned to PMD will go beyond what is expected safe and hit the assert in __phys_addr_symbol(): VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE); Instead, perform an inclusive range check to avoid incorrectly triggering the assert: kernel BUG at arch/x86/mm/physaddr.c:38! invalid opcode: 0000 [#1] SMP ... RIP: 0010:[] __phys_addr_symbol+0x41/0x50 ... Call Trace: [] cpa_process_alias+0xa9/0x210 [] ? do_raw_spin_unlock+0xc1/0x100 [] __change_page_attr_set_clr+0x8cf/0xbd0 [] ? vm_unmap_aliases+0x7d/0x210 [] change_page_attr_set_clr+0x18c/0x4e0 [] set_memory_4k+0x2c/0x40 [] check_bugs+0x28/0x2a [] start_kernel+0x49d/0x4b9 [] ? early_idt_handler_array+0x120/0x120 [] x86_64_start_reservations+0x29/0x2b [] x86_64_start_kernel+0x143/0x152 Signed-off-by: Thomas Garnier Signed-off-by: Kees Cook Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Chris Wilson Cc: Christian Borntraeger Cc: Denys Vlasenko Cc: Dexuan Cui Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Sai Praneeth Cc: Thomas Gleixner Cc: Toshi Kani Link: http://lkml.kernel.org/r/20160615190545.GA26071@www.outflux.net Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 7a1f7bbf4105..379b5111ac6b 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -101,7 +101,8 @@ static inline unsigned long highmap_start_pfn(void) static inline unsigned long highmap_end_pfn(void) { - return __pa_symbol(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT; + /* Do not reference physical address outside the kernel. */ + return __pa_symbol(roundup(_brk_end, PMD_SIZE) - 1) >> PAGE_SHIFT; } #endif @@ -112,6 +113,12 @@ within(unsigned long addr, unsigned long start, unsigned long end) return addr >= start && addr < end; } +static inline int +within_inclusive(unsigned long addr, unsigned long start, unsigned long end) +{ + return addr >= start && addr <= end; +} + /* * Flushing functions */ @@ -1316,7 +1323,8 @@ static int cpa_process_alias(struct cpa_data *cpa) * to touch the high mapped kernel as well: */ if (!within(vaddr, (unsigned long)_text, _brk_end) && - within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn())) { + within_inclusive(cpa->pfn, highmap_start_pfn(), + highmap_end_pfn())) { unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base; alias_cpa = *cpa; -- cgit v1.2.3 From 06a3fcc44d98d6b05afeeae2fbb32938dc3233c7 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 15 Jun 2016 15:04:20 +0300 Subject: x86/platform/intel-mid: Make vertical indentation consistent The vertical indentation is kinda chaotic in intel-mid.h. Let's be consistent with it. Suggested-by: Ingo Molnar Signed-off-by: Andy Shevchenko Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1465992260-29897-1-git-send-email-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/intel-mid.h | 65 +++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 31 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h index 38498a4fb44f..59013a2ac713 100644 --- a/arch/x86/include/asm/intel-mid.h +++ b/arch/x86/include/asm/intel-mid.h @@ -42,11 +42,11 @@ struct devs_id { void *(*get_platform_data)(void *info); /* Custom handler for devices */ void (*device_handler)(struct sfi_device_table_entry *pentry, - struct devs_id *dev); + struct devs_id *dev); }; -#define sfi_device(i) \ - static const struct devs_id *const __intel_mid_sfi_##i##_dev __used \ +#define sfi_device(i) \ + static const struct devs_id *const __intel_mid_sfi_##i##_dev __used \ __attribute__((__section__(".x86_intel_mid_dev.init"))) = &i /* @@ -68,7 +68,7 @@ extern enum intel_mid_cpu_type __intel_mid_cpu_chip; /** * struct intel_mid_ops - Interface between intel-mid & sub archs * @arch_setup: arch_setup function to re-initialize platform - * structures (x86_init, x86_platform_init) + * structures (x86_init, x86_platform_init) * * This structure can be extended if any new interface is required * between intel-mid & its sub arch files. @@ -78,20 +78,20 @@ struct intel_mid_ops { }; /* Helper API's for INTEL_MID_OPS_INIT */ -#define DECLARE_INTEL_MID_OPS_INIT(cpuname, cpuid) \ - [cpuid] = get_##cpuname##_ops +#define DECLARE_INTEL_MID_OPS_INIT(cpuname, cpuid) \ + [cpuid] = get_##cpuname##_ops /* Maximum number of CPU ops */ -#define MAX_CPU_OPS(a) (sizeof(a)/sizeof(void *)) +#define MAX_CPU_OPS(a) (sizeof(a)/sizeof(void *)) /* * For every new cpu addition, a weak get__ops() function needs be * declared in arch/x86/platform/intel_mid/intel_mid_weak_decls.h. */ -#define INTEL_MID_OPS_INIT {\ - DECLARE_INTEL_MID_OPS_INIT(penwell, INTEL_MID_CPU_CHIP_PENWELL), \ - DECLARE_INTEL_MID_OPS_INIT(cloverview, INTEL_MID_CPU_CHIP_CLOVERVIEW), \ - DECLARE_INTEL_MID_OPS_INIT(tangier, INTEL_MID_CPU_CHIP_TANGIER) \ +#define INTEL_MID_OPS_INIT { \ + DECLARE_INTEL_MID_OPS_INIT(penwell, INTEL_MID_CPU_CHIP_PENWELL), \ + DECLARE_INTEL_MID_OPS_INIT(cloverview, INTEL_MID_CPU_CHIP_CLOVERVIEW), \ + DECLARE_INTEL_MID_OPS_INIT(tangier, INTEL_MID_CPU_CHIP_TANGIER) \ }; #ifdef CONFIG_X86_INTEL_MID @@ -108,8 +108,8 @@ static inline bool intel_mid_has_msic(void) #else /* !CONFIG_X86_INTEL_MID */ -#define intel_mid_identify_cpu() (0) -#define intel_mid_has_msic() (0) +#define intel_mid_identify_cpu() 0 +#define intel_mid_has_msic() 0 #endif /* !CONFIG_X86_INTEL_MID */ @@ -125,35 +125,38 @@ extern enum intel_mid_timer_options intel_mid_timer_options; * Penwell uses spread spectrum clock, so the freq number is not exactly * the same as reported by MSR based on SDM. */ -#define FSB_FREQ_83SKU 83200 -#define FSB_FREQ_100SKU 99840 -#define FSB_FREQ_133SKU 133000 +#define FSB_FREQ_83SKU 83200 +#define FSB_FREQ_100SKU 99840 +#define FSB_FREQ_133SKU 133000 -#define FSB_FREQ_167SKU 167000 -#define FSB_FREQ_200SKU 200000 -#define FSB_FREQ_267SKU 267000 -#define FSB_FREQ_333SKU 333000 -#define FSB_FREQ_400SKU 400000 +#define FSB_FREQ_167SKU 167000 +#define FSB_FREQ_200SKU 200000 +#define FSB_FREQ_267SKU 267000 +#define FSB_FREQ_333SKU 333000 +#define FSB_FREQ_400SKU 400000 /* Bus Select SoC Fuse value */ -#define BSEL_SOC_FUSE_MASK 0x7 -#define BSEL_SOC_FUSE_001 0x1 /* FSB 133MHz */ -#define BSEL_SOC_FUSE_101 0x5 /* FSB 100MHz */ -#define BSEL_SOC_FUSE_111 0x7 /* FSB 83MHz */ +#define BSEL_SOC_FUSE_MASK 0x7 +/* FSB 133MHz */ +#define BSEL_SOC_FUSE_001 0x1 +/* FSB 100MHz */ +#define BSEL_SOC_FUSE_101 0x5 +/* FSB 83MHz */ +#define BSEL_SOC_FUSE_111 0x7 -#define SFI_MTMR_MAX_NUM 8 -#define SFI_MRTC_MAX 8 +#define SFI_MTMR_MAX_NUM 8 +#define SFI_MRTC_MAX 8 extern void intel_scu_devices_create(void); extern void intel_scu_devices_destroy(void); /* VRTC timer */ -#define MRST_VRTC_MAP_SZ (1024) -/*#define MRST_VRTC_PGOFFSET (0xc00) */ +#define MRST_VRTC_MAP_SZ 1024 +/* #define MRST_VRTC_PGOFFSET 0xc00 */ extern void intel_mid_rtc_init(void); -/* the offset for the mapping of global gpio pin to irq */ -#define INTEL_MID_IRQ_OFFSET 0x100 +/* The offset for the mapping of global gpio pin to irq */ +#define INTEL_MID_IRQ_OFFSET 0x100 #endif /* _ASM_X86_INTEL_MID_H */ -- cgit v1.2.3 From eb019503569c8c701f1e9c70e848d99c6680839b Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Sun, 10 Jul 2016 19:14:01 +0200 Subject: perf/x86: Fix bogus kernel printk, again This showed up as "6Failed to access..." here. Signed-off-by: Vegard Nossum Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Chen Yucong Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 1b74dde7c47c ("x86/cpu: Convert printk(KERN_ ...) to pr_(...)") Link: http://lkml.kernel.org/r/1468170841-17045-1-git-send-email-vegard.nossum@oracle.com Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 26ced536005a..91eac39625be 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -263,7 +263,7 @@ static bool check_hw_exists(void) msr_fail: pr_cont("Broken PMU hardware detected, using software events only.\n"); - pr_info("%sFailed to access perfctr msr (MSR %x is %Lx)\n", + printk("%sFailed to access perfctr msr (MSR %x is %Lx)\n", boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR, reg, val_new); -- cgit v1.2.3 From 447d29d1d3aed839e74c2401ef63387780ac51ed Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sun, 12 Jun 2016 12:31:53 +0200 Subject: x86/quirks: Apply nvidia_bugs quirk only on root bus Since the following commit: 8659c406ade3 ("x86: only scan the root bus in early PCI quirks") ... early quirks are only applied to devices on the root bus. The motivation was to prevent application of the nvidia_bugs quirk on secondary buses. We're about to reintroduce scanning of secondary buses for a quirk to reset the Broadcom 4331 wireless card on 2011/2012 Macs. To prevent regressions, open code the requirement to apply nvidia_bugs only on the root bus. Signed-off-by: Lukas Wunner Cc: Andy Lutomirski Cc: Bjorn Helgaas Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yinghai Lu Link: http://lkml.kernel.org/r/4d5477c1d76b2f0387a780f2142bbcdd9fee869b.1465690253.git.lukas@wunner.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/early-quirks.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index bca14c899137..256976fe2666 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -75,6 +75,13 @@ static void __init nvidia_bugs(int num, int slot, int func) { #ifdef CONFIG_ACPI #ifdef CONFIG_X86_IO_APIC + /* + * Only applies to Nvidia root ports (bus 0) and not to + * Nvidia graphics cards with PCI ports on secondary buses. + */ + if (num) + return; + /* * All timer overrides on Nvidia are * wrong unless HPET is enabled. -- cgit v1.2.3 From 850c321027c2e31d0afc71588974719a4b565550 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sun, 12 Jun 2016 12:31:53 +0200 Subject: x86/quirks: Reintroduce scanning of secondary buses We used to scan secondary buses until the following commit that was applied in 2009: 8659c406ade3 ("x86: only scan the root bus in early PCI quirks") which commit constrained early quirks to the root bus only. Its motivation was to prevent application of the nvidia_bugs quirk on secondary buses. We're about to add a quirk to reset the Broadcom 4331 wireless card on 2011/2012 Macs, which is located on a secondary bus behind a PCIe root port. To facilitate that, reintroduce scanning of secondary buses. The commit message of 8659c406ade3 notes that scanning only the root bus "saves quite some unnecessary scanning work". The algorithm used prior to 8659c406ade3 was particularly time consuming because it scanned buses 0 to 31 brute force. To avoid lengthening boot time, employ a recursive strategy which only scans buses that are actually reachable from the root bus. Yinghai Lu pointed out that the secondary bus number read from a bridge's config space may be invalid, in particular a value of 0 would cause an infinite loop. The PCI core goes beyond that and recurses to a child bus only if its bus number is greater than the parent bus number (see pci_scan_bridge()). Since the root bus is numbered 0, this implies that secondary buses may not be 0. Do the same on early scanning. If this algorithm is found to significantly impact boot time or cause infinite loops on broken hardware, it would be possible to limit its recursion depth: The Broadcom 4331 quirk applies at depth 1, all others at depth 0, so the bus need not be scanned deeper than that for now. An alternative approach would be to revert to scanning only the root bus, and apply the Broadcom 4331 quirk to the root ports 8086:1c12, 8086:1e12 and 8086:1e16. Apple always positioned the card behind either of these three ports. The quirk would then check presence of the card in slot 0 below the root port and do its deed. Signed-off-by: Lukas Wunner Cc: Andy Lutomirski Cc: Bjorn Helgaas Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yinghai Lu Cc: linux-pci@vger.kernel.org Link: http://lkml.kernel.org/r/f0daa70dac1a9b2483abdb31887173eb6ab77bdf.1465690253.git.lukas@wunner.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/early-quirks.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 256976fe2666..ea60c05c2487 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -610,12 +610,6 @@ struct chipset { void (*f)(int num, int slot, int func); }; -/* - * Only works for devices on the root bus. If you add any devices - * not on bus 0 readd another loop level in early_quirks(). But - * be careful because at least the Nvidia quirk here relies on - * only matching on bus 0. - */ static struct chipset early_qrk[] __initdata = { { PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, nvidia_bugs }, @@ -648,6 +642,8 @@ static struct chipset early_qrk[] __initdata = { {} }; +static void __init early_pci_scan_bus(int bus); + /** * check_dev_quirk - apply early quirks to a given PCI device * @num: bus number @@ -656,7 +652,7 @@ static struct chipset early_qrk[] __initdata = { * * Check the vendor & device ID against the early quirks table. * - * If the device is single function, let early_quirks() know so we don't + * If the device is single function, let early_pci_scan_bus() know so we don't * poke at this device again. */ static int __init check_dev_quirk(int num, int slot, int func) @@ -665,6 +661,7 @@ static int __init check_dev_quirk(int num, int slot, int func) u16 vendor; u16 device; u8 type; + u8 sec; int i; class = read_pci_config_16(num, slot, func, PCI_CLASS_DEVICE); @@ -692,25 +689,36 @@ static int __init check_dev_quirk(int num, int slot, int func) type = read_pci_config_byte(num, slot, func, PCI_HEADER_TYPE); + + if ((type & 0x7f) == PCI_HEADER_TYPE_BRIDGE) { + sec = read_pci_config_byte(num, slot, func, PCI_SECONDARY_BUS); + if (sec > num) + early_pci_scan_bus(sec); + } + if (!(type & 0x80)) return -1; return 0; } -void __init early_quirks(void) +static void __init early_pci_scan_bus(int bus) { int slot, func; - if (!early_pci_allowed()) - return; - /* Poor man's PCI discovery */ - /* Only scan the root bus */ for (slot = 0; slot < 32; slot++) for (func = 0; func < 8; func++) { /* Only probe function 0 on single fn devices */ - if (check_dev_quirk(0, slot, func)) + if (check_dev_quirk(bus, slot, func)) break; } } + +void __init early_quirks(void) +{ + if (!early_pci_allowed()) + return; + + early_pci_scan_bus(0); +} -- cgit v1.2.3 From abb2bafd295fe962bbadc329dbfb2146457283ac Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sun, 12 Jun 2016 12:31:53 +0200 Subject: x86/quirks: Add early quirk to reset Apple AirPort card MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The EFI firmware on Macs contains a full-fledged network stack for downloading OS X images from osrecovery.apple.com. Unfortunately on Macs introduced 2011 and 2012, EFI brings up the Broadcom 4331 wireless card on every boot and leaves it enabled even after ExitBootServices has been called. The card continues to assert its IRQ line, causing spurious interrupts if the IRQ is shared. It also corrupts memory by DMAing received packets, allowing for remote code execution over the air. This only stops when a driver is loaded for the wireless card, which may be never if the driver is not installed or blacklisted. The issue seems to be constrained to the Broadcom 4331. Chris Milsted has verified that the newer Broadcom 4360 built into the MacBookPro11,3 (2013/2014) does not exhibit this behaviour. The chances that Apple will ever supply a firmware fix for the older machines appear to be zero. The solution is to reset the card on boot by writing to a reset bit in its mmio space. This must be done as an early quirk and not as a plain vanilla PCI quirk to successfully combat memory corruption by DMAed packets: Matthew Garrett found out in 2012 that the packets are written to EfiBootServicesData memory (http://mjg59.dreamwidth.org/11235.html). This type of memory is made available to the page allocator by efi_free_boot_services(). Plain vanilla PCI quirks run much later, in subsys initcall level. In-between a time window would be open for memory corruption. Random crashes occurring in this time window and attributed to DMAed packets have indeed been observed in the wild by Chris Bainbridge. When Matthew Garrett analyzed the memory corruption issue in 2012, he sought to fix it with a grub quirk which transitions the card to D3hot: http://git.savannah.gnu.org/cgit/grub.git/commit/?id=9d34bb85da56 This approach does not help users with other bootloaders and while it may prevent DMAed packets, it does not cure the spurious interrupts emanating from the card. Unfortunately the card's mmio space is inaccessible in D3hot, so to reset it, we have to undo the effect of Matthew's grub patch and transition the card back to D0. Note that the quirk takes a few shortcuts to reduce the amount of code: The size of BAR 0 and the location of the PM capability is identical on all affected machines and therefore hardcoded. Only the address of BAR 0 differs between models. Also, it is assumed that the BCMA core currently mapped is the 802.11 core. The EFI driver seems to always take care of this. Michael Büsch, Bjorn Helgaas and Matt Fleming contributed feedback towards finding the best solution to this problem. The following should be a comprehensive list of affected models: iMac13,1 2012 21.5" [Root Port 00:1c.3 = 8086:1e16] iMac13,2 2012 27" [Root Port 00:1c.3 = 8086:1e16] Macmini5,1 2011 i5 2.3 GHz [Root Port 00:1c.1 = 8086:1c12] Macmini5,2 2011 i5 2.5 GHz [Root Port 00:1c.1 = 8086:1c12] Macmini5,3 2011 i7 2.0 GHz [Root Port 00:1c.1 = 8086:1c12] Macmini6,1 2012 i5 2.5 GHz [Root Port 00:1c.1 = 8086:1e12] Macmini6,2 2012 i7 2.3 GHz [Root Port 00:1c.1 = 8086:1e12] MacBookPro8,1 2011 13" [Root Port 00:1c.1 = 8086:1c12] MacBookPro8,2 2011 15" [Root Port 00:1c.1 = 8086:1c12] MacBookPro8,3 2011 17" [Root Port 00:1c.1 = 8086:1c12] MacBookPro9,1 2012 15" [Root Port 00:1c.1 = 8086:1e12] MacBookPro9,2 2012 13" [Root Port 00:1c.1 = 8086:1e12] MacBookPro10,1 2012 15" [Root Port 00:1c.1 = 8086:1e12] MacBookPro10,2 2012 13" [Root Port 00:1c.1 = 8086:1e12] For posterity, spurious interrupts caused by the Broadcom 4331 wireless card resulted in splats like this (stacktrace omitted): irq 17: nobody cared (try booting with the "irqpoll" option) handlers: [] pcie_isr [] sdhci_irq [sdhci] threaded [] sdhci_thread_irq [sdhci] [] azx_interrupt [snd_hda_codec] Disabling IRQ #17 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=79301 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=111781 Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=728916 Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=895951#c16 Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1009819 Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1098621 Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1149632#c5 Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1279130 Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1332732 Tested-by: Konstantin Simanov # [MacBookPro8,1] Tested-by: Lukas Wunner # [MacBookPro9,1] Tested-by: Bryan Paradis # [MacBookPro9,2] Tested-by: Andrew Worsley # [MacBookPro10,1] Tested-by: Chris Bainbridge # [MacBookPro10,2] Signed-off-by: Lukas Wunner Acked-by: Rafał Miłecki Acked-by: Matt Fleming Cc: Andy Lutomirski Cc: Bjorn Helgaas Cc: Borislav Petkov Cc: Brian Gerst Cc: Chris Milsted Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Matthew Garrett Cc: Michael Buesch Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yinghai Lu Cc: b43-dev@lists.infradead.org Cc: linux-pci@vger.kernel.org Cc: linux-wireless@vger.kernel.org Cc: stable@vger.kernel.org Cc: stable@vger.kernel.org # 123456789abc: x86/quirks: Apply nvidia_bugs quirk only on root bus Cc: stable@vger.kernel.org # 123456789abc: x86/quirks: Reintroduce scanning of secondary buses Link: http://lkml.kernel.org/r/48d0972ac82a53d460e5fce77a07b2560db95203.1465690253.git.lukas@wunner.de [ Did minor readability edits. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/early-quirks.c | 64 ++++++++++++++++++++++++++++++++++++++++++ drivers/bcma/bcma_private.h | 2 -- include/linux/bcma/bcma.h | 1 + 3 files changed, 65 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index ea60c05c2487..57b71373bae3 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -11,7 +11,11 @@ #include #include +#include +#include #include +#include +#include #include #include #include @@ -21,6 +25,9 @@ #include #include #include +#include + +#define dev_err(msg) pr_err("pci 0000:%02x:%02x.%d: %s", bus, slot, func, msg) static void __init fix_hypertransport_config(int num, int slot, int func) { @@ -597,6 +604,61 @@ static void __init force_disable_hpet(int num, int slot, int func) #endif } +#define BCM4331_MMIO_SIZE 16384 +#define BCM4331_PM_CAP 0x40 +#define bcma_aread32(reg) ioread32(mmio + 1 * BCMA_CORE_SIZE + reg) +#define bcma_awrite32(reg, val) iowrite32(val, mmio + 1 * BCMA_CORE_SIZE + reg) + +static void __init apple_airport_reset(int bus, int slot, int func) +{ + void __iomem *mmio; + u16 pmcsr; + u64 addr; + int i; + + if (!dmi_match(DMI_SYS_VENDOR, "Apple Inc.")) + return; + + /* Card may have been put into PCI_D3hot by grub quirk */ + pmcsr = read_pci_config_16(bus, slot, func, BCM4331_PM_CAP + PCI_PM_CTRL); + + if ((pmcsr & PCI_PM_CTRL_STATE_MASK) != PCI_D0) { + pmcsr &= ~PCI_PM_CTRL_STATE_MASK; + write_pci_config_16(bus, slot, func, BCM4331_PM_CAP + PCI_PM_CTRL, pmcsr); + mdelay(10); + + pmcsr = read_pci_config_16(bus, slot, func, BCM4331_PM_CAP + PCI_PM_CTRL); + if ((pmcsr & PCI_PM_CTRL_STATE_MASK) != PCI_D0) { + dev_err("Cannot power up Apple AirPort card\n"); + return; + } + } + + addr = read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0); + addr |= (u64)read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_1) << 32; + addr &= PCI_BASE_ADDRESS_MEM_MASK; + + mmio = early_ioremap(addr, BCM4331_MMIO_SIZE); + if (!mmio) { + dev_err("Cannot iomap Apple AirPort card\n"); + return; + } + + pr_info("Resetting Apple AirPort card (left enabled by EFI)\n"); + + for (i = 0; bcma_aread32(BCMA_RESET_ST) && i < 30; i++) + udelay(10); + + bcma_awrite32(BCMA_RESET_CTL, BCMA_RESET_CTL_RESET); + bcma_aread32(BCMA_RESET_CTL); + udelay(1); + + bcma_awrite32(BCMA_RESET_CTL, 0); + bcma_aread32(BCMA_RESET_CTL); + udelay(10); + + early_iounmap(mmio, BCM4331_MMIO_SIZE); +} #define QFLAG_APPLY_ONCE 0x1 #define QFLAG_APPLIED 0x2 @@ -639,6 +701,8 @@ static struct chipset early_qrk[] __initdata = { */ { PCI_VENDOR_ID_INTEL, 0x0f00, PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, + { PCI_VENDOR_ID_BROADCOM, 0x4331, + PCI_CLASS_NETWORK_OTHER, PCI_ANY_ID, 0, apple_airport_reset}, {} }; diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h index eda09090cb52..f642c4264c27 100644 --- a/drivers/bcma/bcma_private.h +++ b/drivers/bcma/bcma_private.h @@ -8,8 +8,6 @@ #include #include -#define BCMA_CORE_SIZE 0x1000 - #define bcma_err(bus, fmt, ...) \ pr_err("bus%d: " fmt, (bus)->num, ##__VA_ARGS__) #define bcma_warn(bus, fmt, ...) \ diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index e6b41f42602b..3db25df396cb 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -159,6 +159,7 @@ struct bcma_host_ops { #define BCMA_CORE_DEFAULT 0xFFF #define BCMA_MAX_NR_CORES 16 +#define BCMA_CORE_SIZE 0x1000 /* Chip IDs of PCIe devices */ #define BCMA_CHIP_ID_BCM4313 0x4313 -- cgit v1.2.3 From 44530d588e142a96cf0cd345a7cb8911c4f88720 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 10 Jul 2016 20:58:36 +0200 Subject: Revert "perf/x86/intel, watchdog: Switch NMI watchdog to ref cycles on x86" This reverts commit 2c95afc1e83d93fac3be6923465e1753c2c53b0a. Stephane reported the following regression: > Since Andi added: > > commit 2c95afc1e83d93fac3be6923465e1753c2c53b0a > Author: Andi Kleen > Date: Thu Jun 9 06:14:38 2016 -0700 > > perf/x86/intel, watchdog: Switch NMI watchdog to ref cycles on x86 > > $ perf stat -e ref-cycles ls > .... > > fails systematically because the ref-cycles is now used by the > watchdog and given this is a system-wide pinned event, it monopolizes > the fixed counter 2 which is the only counter able to measure this event. Since the next merge window is near, fix the regression for now by reverting the commit. Reported-by: Stephane Eranian Cc: Andi Kleen Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Vince Weaver Cc: Alexander Shishkin Cc: Linus Torvalds Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/hw_nmi.c | 8 -------- include/linux/nmi.h | 1 - kernel/watchdog.c | 7 ------- 3 files changed, 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index 016f4263fad4..7788ce643bf4 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -18,16 +18,8 @@ #include #include #include -#include #ifdef CONFIG_HARDLOCKUP_DETECTOR -int hw_nmi_get_event(void) -{ - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - return PERF_COUNT_HW_REF_CPU_CYCLES; - return PERF_COUNT_HW_CPU_CYCLES; -} - u64 hw_nmi_get_sample_period(int watchdog_thresh) { return (u64)(cpu_khz) * 1000 * watchdog_thresh; diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 79858af27209..4630eeae18e0 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -66,7 +66,6 @@ static inline bool trigger_allbutself_cpu_backtrace(void) #ifdef CONFIG_LOCKUP_DETECTOR u64 hw_nmi_get_sample_period(int watchdog_thresh); -int hw_nmi_get_event(void); extern int nmi_watchdog_enabled; extern int soft_watchdog_enabled; extern int watchdog_user_enabled; diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 8dd30fcd91be..9acb29f280ec 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -315,12 +315,6 @@ static int is_softlockup(unsigned long touch_ts) #ifdef CONFIG_HARDLOCKUP_DETECTOR -/* Can be overriden by architecture */ -__weak int hw_nmi_get_event(void) -{ - return PERF_COUNT_HW_CPU_CYCLES; -} - static struct perf_event_attr wd_hw_attr = { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, @@ -610,7 +604,6 @@ static int watchdog_nmi_enable(unsigned int cpu) wd_attr = &wd_hw_attr; wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh); - wd_attr->config = hw_nmi_get_event(); /* Try to register using hardware perf events */ event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL); -- cgit v1.2.3 From 1fc2b67b43d5001b92b3a002b988884ad0137e99 Mon Sep 17 00:00:00 2001 From: Yu-cheng Yu Date: Mon, 11 Jul 2016 09:18:54 -0700 Subject: x86/fpu/xstate: Fix __fpu_restore_sig() for XSAVES When the kernel is using XSAVES compacted format, we cannot do __copy_from_user() from a signal frame, which has standard-format data. Fix it by using copyin_to_xsaves(), which converts between formats and filters out all supervisor states that we do not allow userspace to write. Signed-off-by: Yu-cheng Yu Signed-off-by: Fenghua Yu Reviewed-by: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ravi V Shankar Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1468253937-40008-2-git-send-email-fenghua.yu@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/signal.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 8aa96cbb5dfb..9e231d88bb33 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -323,8 +323,15 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) */ fpu__drop(fpu); - if (__copy_from_user(&fpu->state.xsave, buf_fx, state_size) || - __copy_from_user(&env, buf, sizeof(env))) { + if (using_compacted_format()) { + err = copyin_to_xsaves(NULL, buf_fx, + &fpu->state.xsave); + } else { + err = __copy_from_user(&fpu->state.xsave, + buf_fx, state_size); + } + + if (err || __copy_from_user(&env, buf, sizeof(env))) { fpstate_init(&fpu->state); trace_x86_fpu_init_state(fpu); err = -1; -- cgit v1.2.3 From 5060b91513b866f774da15dfd82157864c4b1683 Mon Sep 17 00:00:00 2001 From: Yu-cheng Yu Date: Mon, 11 Jul 2016 09:18:55 -0700 Subject: x86/fpu/xstate: Return NULL for disabled xstate component address It is an error to request a disabled XSAVE/XSAVES component address. For that case, make __raw_xsave_addr() return a NULL and issue a warning. Signed-off-by: Yu-cheng Yu Signed-off-by: Fenghua Yu Reviewed-by: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ravi V Shankar Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1468253937-40008-3-git-send-email-fenghua.yu@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/xstate.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index f8d1aff10f69..4fb8dd7697c5 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -760,6 +760,11 @@ void *__raw_xsave_addr(struct xregs_state *xsave, int xstate_feature_mask) { int feature_nr = fls64(xstate_feature_mask) - 1; + if (!xfeature_enabled(feature_nr)) { + WARN_ON_FPU(1); + return NULL; + } + return (void *)xsave + xstate_comp_offsets[feature_nr]; } /* -- cgit v1.2.3 From 35ac2d7ba787eb4b7418a5a6f5919c25e10a780a Mon Sep 17 00:00:00 2001 From: Yu-cheng Yu Date: Mon, 11 Jul 2016 09:18:56 -0700 Subject: x86/fpu/xstate: Fix fpstate_init() for XRSTORS In XSAVES mode if fpstate_init() is used to initialize a task's extended state area, xsave.header.xcomp_bv[63] must be set. Otherwise, when the task is scheduled, a warning is triggered from copy_kernel_to_xregs(). One such test case is: setting an invalid extended state through PTRACE. When xstateregs_set() rejects the syscall and re-initializes the task's extended state area. This triggers the warning mentioned above. Signed-off-by: Yu-cheng Yu Signed-off-by: Fenghua Yu Reviewed-by: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ravi V Shankar Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1468253937-40008-4-git-send-email-fenghua.yu@intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/types.h | 6 ++++++ arch/x86/kernel/fpu/core.c | 8 ++++++++ 2 files changed, 14 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 12dd648735b6..48df486b02f9 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -231,6 +231,12 @@ struct xstate_header { u64 reserved[6]; } __attribute__((packed)); +/* + * xstate_header.xcomp_bv[63] indicates that the extended_state_area + * is in compacted format. + */ +#define XCOMP_BV_COMPACTED_FORMAT ((u64)1 << 63) + /* * This is our most modern FPU state format, as saved by the XSAVE * and restored by the XRSTOR instructions. diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index c759bd01ec99..3fc03a09a93b 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -229,6 +230,13 @@ void fpstate_init(union fpregs_state *state) memset(state, 0, fpu_kernel_xstate_size); + /* + * XRSTORS requires that this bit is set in xcomp_bv, or + * it will #GP. Make sure it is replaced after the memset(). + */ + if (static_cpu_has(X86_FEATURE_XSAVES)) + state->xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT; + if (static_cpu_has(X86_FEATURE_FXSR)) fpstate_init_fxstate(&state->fxsave); else -- cgit v1.2.3 From b8be15d588060a03569ac85dc4a0247460988f5b Mon Sep 17 00:00:00 2001 From: Yu-cheng Yu Date: Mon, 11 Jul 2016 09:18:57 -0700 Subject: x86/fpu/xstate: Re-enable XSAVES We did not handle XSAVES instructions correctly. There were issues in converting between standard and compacted format when interfacing with user-space. These issues have been corrected. Add a WARN_ONCE() to make it clear that XSAVES supervisor states are not yet implemented. Signed-off-by: Yu-cheng Yu Signed-off-by: Fenghua Yu Reviewed-by: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ravi V Shankar Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1468253937-40008-5-git-send-email-fenghua.yu@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/init.c | 15 --------------- arch/x86/kernel/fpu/xstate.c | 9 +++++++++ 2 files changed, 9 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 60f3839c5bfa..93982aebb398 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -230,21 +230,6 @@ static void __init fpu__init_system_xstate_size_legacy(void) } fpu_user_xstate_size = fpu_kernel_xstate_size; - - /* - * Quirk: we don't yet handle the XSAVES* instructions - * correctly, as we don't correctly convert between - * standard and compacted format when interfacing - * with user-space - so disable it for now. - * - * The difference is small: with recent CPUs the - * compacted format is only marginally smaller than - * the standard FPU state format. - * - * ( This is easy to backport while we are fixing - * XSAVES* support. ) - */ - setup_clear_cpu_cap(X86_FEATURE_XSAVES); } /* diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 4fb8dd7697c5..3169bcaf9391 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -221,6 +221,15 @@ void fpu__init_cpu_xstate(void) { if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask) return; + /* + * Make it clear that XSAVES supervisor states are not yet + * implemented should anyone expect it to work by changing + * bits in XFEATURE_MASK_* macros and XCR0. + */ + WARN_ONCE((xfeatures_mask & XFEATURE_MASK_SUPERVISOR), + "x86/fpu: XSAVES supervisor states are not yet implemented.\n"); + + xfeatures_mask &= ~XFEATURE_MASK_SUPERVISOR; cr4_set_bits(X86_CR4_OSXSAVE); xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask); -- cgit v1.2.3 From 02c0cd2dcf7fdc47d054b855b148ea8b82dbb7eb Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 17 Jun 2016 01:22:50 -0400 Subject: x86/tsc_msr: Remove irqoff around MSR-based TSC enumeration Remove the irqoff/irqon around MSR-based TSC enumeration, as it is not necessary. Also rename: try_msr_calibrate_tsc() to cpu_khz_from_msr(), as that better describes what the routine does. Signed-off-by: Len Brown Reviewed-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/a6b5c3ecd3b068175d2309599ab28163fc34215e.1466138954.git.len.brown@intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/tsc.h | 3 +-- arch/x86/kernel/tsc.c | 5 +---- arch/x86/kernel/tsc_msr.c | 2 +- 3 files changed, 3 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 7428697c5b8d..db1f779a3766 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -52,7 +52,6 @@ extern int notsc_setup(char *); extern void tsc_save_sched_clock_state(void); extern void tsc_restore_sched_clock_state(void); -/* MSR based TSC calibration for Intel Atom SoC platforms */ -unsigned long try_msr_calibrate_tsc(void); +unsigned long cpu_khz_from_msr(void); #endif /* _ASM_X86_TSC_H */ diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 38ba6de56ede..35a3976c19cc 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -674,10 +674,7 @@ unsigned long native_calibrate_tsc(void) unsigned long flags, latch, ms, fast_calibrate; int hpet = is_hpet_enabled(), i, loopmin; - /* Calibrate TSC using MSR for Intel Atom SoCs */ - local_irq_save(flags); - fast_calibrate = try_msr_calibrate_tsc(); - local_irq_restore(flags); + fast_calibrate = cpu_khz_from_msr(); if (fast_calibrate) return fast_calibrate; diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c index 65b3d8cb8325..0fe720d64fef 100644 --- a/arch/x86/kernel/tsc_msr.c +++ b/arch/x86/kernel/tsc_msr.c @@ -68,7 +68,7 @@ static int match_cpu(u8 family, u8 model) * Set global "lapic_timer_frequency" to bus_clock_cycles/jiffy * Return processor base frequency in KHz, or 0 on failure. */ -unsigned long try_msr_calibrate_tsc(void) +unsigned long cpu_khz_from_msr(void) { u32 lo, hi, ratio, freq_id, freq; unsigned long res; -- cgit v1.2.3 From aa297292d708e89773b3b2cdcaf33f01bfa095d8 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 17 Jun 2016 01:22:51 -0400 Subject: x86/tsc: Enumerate SKL cpu_khz and tsc_khz via CPUID Skylake CPU base-frequency and TSC frequency may differ by up to 2%. Enumerate CPU and TSC frequencies separately, allowing cpu_khz and tsc_khz to differ. The existing CPU frequency calibration mechanism is unchanged. However, CPUID extensions are preferred, when available. CPUID.0x16 is preferred over MSR and timer calibration for CPU frequency discovery. CPUID.0x15 takes precedence over CPU-frequency for TSC frequency discovery. Signed-off-by: Len Brown Reviewed-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/b27ec289fd005833b27d694d9c2dbb716c5cdff7.1466138954.git.len.brown@intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/tsc.h | 1 + arch/x86/include/asm/x86_init.h | 4 ++- arch/x86/kernel/tsc.c | 75 +++++++++++++++++++++++++++++++++++++---- arch/x86/kernel/x86_init.c | 1 + 4 files changed, 73 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index db1f779a3766..a30591e1567c 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -36,6 +36,7 @@ extern void mark_tsc_unstable(char *reason); extern int unsynchronized_tsc(void); extern int check_tsc_unstable(void); extern int check_tsc_disabled(void); +extern unsigned long native_calibrate_cpu(void); extern unsigned long native_calibrate_tsc(void); extern unsigned long long native_sched_clock_from_tsc(u64 tsc); diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 4dcdf74dfed8..08a08a800e17 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -181,7 +181,8 @@ struct x86_legacy_features { /** * struct x86_platform_ops - platform specific runtime functions - * @calibrate_tsc: calibrate TSC + * @calibrate_cpu: calibrate CPU + * @calibrate_tsc: calibrate TSC, if different from CPU * @get_wallclock: get time from HW clock like RTC etc. * @set_wallclock: set time back to HW clock * @is_untracked_pat_range exclude from PAT logic @@ -200,6 +201,7 @@ struct x86_legacy_features { * semantics. */ struct x86_platform_ops { + unsigned long (*calibrate_cpu)(void); unsigned long (*calibrate_tsc)(void); void (*get_wallclock)(struct timespec *ts); int (*set_wallclock)(const struct timespec *ts); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 35a3976c19cc..e1496b79c28a 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -239,7 +239,7 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc) return ns; } -static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) +static void set_cyc2ns_scale(unsigned long khz, int cpu) { unsigned long long tsc_now, ns_now; struct cyc2ns_data *data; @@ -248,7 +248,7 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) local_irq_save(flags); sched_clock_idle_sleep_event(); - if (!cpu_khz) + if (!khz) goto done; data = cyc2ns_write_begin(cpu); @@ -261,7 +261,7 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) * time function is continuous; see the comment near struct * cyc2ns_data. */ - clocks_calc_mult_shift(&data->cyc2ns_mul, &data->cyc2ns_shift, cpu_khz, + clocks_calc_mult_shift(&data->cyc2ns_mul, &data->cyc2ns_shift, khz, NSEC_PER_MSEC, 0); /* @@ -665,15 +665,72 @@ success: } /** - * native_calibrate_tsc - calibrate the tsc on boot + * native_calibrate_tsc + * Determine TSC frequency via CPUID, else return 0. */ unsigned long native_calibrate_tsc(void) +{ + unsigned int eax_denominator, ebx_numerator, ecx_hz, edx; + unsigned int crystal_khz; + + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return 0; + + if (boot_cpu_data.cpuid_level < 0x15) + return 0; + + eax_denominator = ebx_numerator = ecx_hz = edx = 0; + + /* CPUID 15H TSC/Crystal ratio, plus optionally Crystal Hz */ + cpuid(0x15, &eax_denominator, &ebx_numerator, &ecx_hz, &edx); + + if (ebx_numerator == 0 || eax_denominator == 0) + return 0; + + crystal_khz = ecx_hz / 1000; + + if (crystal_khz == 0) { + switch (boot_cpu_data.x86_model) { + case 0x4E: /* SKL */ + case 0x5E: /* SKL */ + crystal_khz = 24000; /* 24 MHz */ + } + } + + return crystal_khz * ebx_numerator / eax_denominator; +} + +static unsigned long cpu_khz_from_cpuid(void) +{ + unsigned int eax_base_mhz, ebx_max_mhz, ecx_bus_mhz, edx; + + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return 0; + + if (boot_cpu_data.cpuid_level < 0x16) + return 0; + + eax_base_mhz = ebx_max_mhz = ecx_bus_mhz = edx = 0; + + cpuid(0x16, &eax_base_mhz, &ebx_max_mhz, &ecx_bus_mhz, &edx); + + return eax_base_mhz * 1000; +} + +/** + * native_calibrate_cpu - calibrate the cpu on boot + */ +unsigned long native_calibrate_cpu(void) { u64 tsc1, tsc2, delta, ref1, ref2; unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; unsigned long flags, latch, ms, fast_calibrate; int hpet = is_hpet_enabled(), i, loopmin; + fast_calibrate = cpu_khz_from_cpuid(); + if (fast_calibrate) + return fast_calibrate; + fast_calibrate = cpu_khz_from_msr(); if (fast_calibrate) return fast_calibrate; @@ -834,8 +891,10 @@ int recalibrate_cpu_khz(void) if (!boot_cpu_has(X86_FEATURE_TSC)) return -ENODEV; + cpu_khz = x86_platform.calibrate_cpu(); tsc_khz = x86_platform.calibrate_tsc(); - cpu_khz = tsc_khz; + if (tsc_khz == 0) + tsc_khz = cpu_khz; cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy, cpu_khz_old, cpu_khz); @@ -1241,8 +1300,10 @@ void __init tsc_init(void) return; } + cpu_khz = x86_platform.calibrate_cpu(); tsc_khz = x86_platform.calibrate_tsc(); - cpu_khz = tsc_khz; + if (tsc_khz == 0) + tsc_khz = cpu_khz; if (!tsc_khz) { mark_tsc_unstable("could not calculate TSC khz"); @@ -1262,7 +1323,7 @@ void __init tsc_init(void) */ for_each_possible_cpu(cpu) { cyc2ns_init(cpu); - set_cyc2ns_scale(cpu_khz, cpu); + set_cyc2ns_scale(tsc_khz, cpu); } if (tsc_disabled > 0) diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index dad5fe9633a3..58b459296e13 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -92,6 +92,7 @@ static void default_nmi_init(void) { }; static int default_i8042_detect(void) { return 1; }; struct x86_platform_ops x86_platform = { + .calibrate_cpu = native_calibrate_cpu, .calibrate_tsc = native_calibrate_tsc, .get_wallclock = mach_get_cmos_time, .set_wallclock = mach_set_rtc_mmss, -- cgit v1.2.3 From ff4c86635ee12461fd3bd911d7d5253394da8f9d Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 17 Jun 2016 01:22:52 -0400 Subject: x86/tsc: Enumerate BXT tsc_khz via CPUID Hard code the BXT crystal clock (aka ART - Always Running Timer) to 19.200 MHz, and use CPUID leaf 0x15 to determine the BXT TSC frequency. Use tsc_khz to sanity check BXT cpu_khz, which can be erroneous in some configurations. (I simplified the original patch from Bin Gao.) Original-From: Bin Gao Signed-off-by: Len Brown Reviewed-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/bf4e7c175acd6d09719c47c319b10ff1f0627ff8.1466138954.git.len.brown@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index e1496b79c28a..2a952fcb1516 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -693,7 +693,11 @@ unsigned long native_calibrate_tsc(void) switch (boot_cpu_data.x86_model) { case 0x4E: /* SKL */ case 0x5E: /* SKL */ - crystal_khz = 24000; /* 24 MHz */ + crystal_khz = 24000; /* 24.0 MHz */ + break; + case 0x5C: /* BXT */ + crystal_khz = 19200; /* 19.2 MHz */ + break; } } @@ -895,6 +899,8 @@ int recalibrate_cpu_khz(void) tsc_khz = x86_platform.calibrate_tsc(); if (tsc_khz == 0) tsc_khz = cpu_khz; + else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz) + cpu_khz = tsc_khz; cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy, cpu_khz_old, cpu_khz); @@ -1302,8 +1308,16 @@ void __init tsc_init(void) cpu_khz = x86_platform.calibrate_cpu(); tsc_khz = x86_platform.calibrate_tsc(); + + /* + * Trust non-zero tsc_khz as authorative, + * and use it to sanity check cpu_khz, + * which will be off if system timer is off. + */ if (tsc_khz == 0) tsc_khz = cpu_khz; + else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz) + cpu_khz = tsc_khz; if (!tsc_khz) { mark_tsc_unstable("could not calculate TSC khz"); -- cgit v1.2.3 From f2d3adf46d5763e7154e303e972c891999a4da43 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 12 Jul 2016 10:54:45 -0300 Subject: kvm arm/arm64: Remove trailing whitespace from headers Noticed while making a copy of these files to tools/ where those kernel files were being directly accessed, which we're not allowing anymore to avoid that changes in the kernel side break tooling. Cc: Christoffer Dall Cc: Eric Auger Cc: Marc Zyngier Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-82thftcdhj2j5wt6ir4vuyhk@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- arch/arm/include/uapi/asm/kvm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index df3f60cb1168..a2b3eb313a25 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -139,8 +139,8 @@ struct kvm_arch_memory_slot { #define ARM_CP15_REG64(...) __ARM_CP15_REG64(__VA_ARGS__) #define KVM_REG_ARM_TIMER_CTL ARM_CP15_REG32(0, 14, 3, 1) -#define KVM_REG_ARM_TIMER_CNT ARM_CP15_REG64(1, 14) -#define KVM_REG_ARM_TIMER_CVAL ARM_CP15_REG64(3, 14) +#define KVM_REG_ARM_TIMER_CNT ARM_CP15_REG64(1, 14) +#define KVM_REG_ARM_TIMER_CVAL ARM_CP15_REG64(3, 14) /* Normal registers are mapped as coprocessor 16. */ #define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT) -- cgit v1.2.3 From 707a605b5a1732e548f4ff51ccf0199a14d95f0f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 12 Jul 2016 14:04:22 +0300 Subject: x86/pci: Use MRFLD abbreviation for Merrifield Everywhere in the kernel the MRFLD is used as abbreviation of Intel Merrifield. Do the same in intel_mid_pci.c module. Signed-off-by: Andy Shevchenko Cc: Bjorn Helgaas Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1468321462-136016-1-git-send-email-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/pci/intel_mid_pci.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c index 5413d6a9817c..5a18aedcb341 100644 --- a/arch/x86/pci/intel_mid_pci.c +++ b/arch/x86/pci/intel_mid_pci.c @@ -36,8 +36,8 @@ #define PCIE_CAP_OFFSET 0x100 /* Quirks for the listed devices */ -#define PCI_DEVICE_ID_INTEL_MRFL_MMC 0x1190 -#define PCI_DEVICE_ID_INTEL_MRFL_HSU 0x1191 +#define PCI_DEVICE_ID_INTEL_MRFLD_MMC 0x1190 +#define PCI_DEVICE_ID_INTEL_MRFLD_HSU 0x1191 /* Fixed BAR fields */ #define PCIE_VNDR_CAP_ID_FIXED_BAR 0x00 /* Fixed BAR (TBD) */ @@ -229,7 +229,7 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev) * Skip HS UART common registers device since it has * IRQ0 assigned and not used by the kernel. */ - if (dev->device == PCI_DEVICE_ID_INTEL_MRFL_HSU) + if (dev->device == PCI_DEVICE_ID_INTEL_MRFLD_HSU) return -EBUSY; /* * TNG has IRQ0 assigned to eMMC controller. But there @@ -238,7 +238,7 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev) * eMMC gets it. The rest of devices still could be * enabled without interrupt line being allocated. */ - if (dev->device != PCI_DEVICE_ID_INTEL_MRFL_MMC) + if (dev->device != PCI_DEVICE_ID_INTEL_MRFLD_MMC) return 0; } break; -- cgit v1.2.3 From 05f310e26fe9d97fec0ce1752edc16bf1ea55a2d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 12 Jul 2016 14:16:32 +0300 Subject: x86/sfi: Enable enumeration of SD devices SFI specification v0.8.2 defines type of devices which are connected to SD bus. In particularly WiFi dongle is a such. Add a callback to enumerate the devices connected to SD bus. Signed-off-by: Andy Shevchenko Cc: Bjorn Helgaas Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1468322192-62080-1-git-send-email-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/intel-mid.h | 15 +++++++++++++++ arch/x86/platform/intel-mid/sfi.c | 29 +++++++++++++++++++++++++++++ include/linux/sfi.h | 1 + 3 files changed, 45 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h index 59013a2ac713..9d6b097aa73d 100644 --- a/arch/x86/include/asm/intel-mid.h +++ b/arch/x86/include/asm/intel-mid.h @@ -49,6 +49,21 @@ struct devs_id { static const struct devs_id *const __intel_mid_sfi_##i##_dev __used \ __attribute__((__section__(".x86_intel_mid_dev.init"))) = &i +/** +* struct mid_sd_board_info - template for SD device creation +* @name: identifies the driver +* @bus_num: board-specific identifier for a given SD controller +* @max_clk: the maximum frequency device supports +* @platform_data: the particular data stored there is driver-specific +*/ +struct mid_sd_board_info { + char name[SFI_NAME_LEN]; + int bus_num; + unsigned short addr; + u32 max_clk; + void *platform_data; +}; + /* * Medfield is the follow-up of Moorestown, it combines two chip solution into * one. Other than that it also added always-on and constant tsc and lapic diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c index 5ee360a951ce..1555672d436f 100644 --- a/arch/x86/platform/intel-mid/sfi.c +++ b/arch/x86/platform/intel-mid/sfi.c @@ -407,6 +407,32 @@ static void __init sfi_handle_i2c_dev(struct sfi_device_table_entry *pentry, i2c_register_board_info(pentry->host_num, &i2c_info, 1); } +static void __init sfi_handle_sd_dev(struct sfi_device_table_entry *pentry, + struct devs_id *dev) +{ + struct mid_sd_board_info sd_info; + void *pdata; + + memset(&sd_info, 0, sizeof(sd_info)); + strncpy(sd_info.name, pentry->name, SFI_NAME_LEN); + sd_info.bus_num = pentry->host_num; + sd_info.max_clk = pentry->max_freq; + sd_info.addr = pentry->addr; + pr_debug("SD bus = %d, name = %16.16s, max_clk = %d, addr = 0x%x\n", + sd_info.bus_num, + sd_info.name, + sd_info.max_clk, + sd_info.addr); + pdata = intel_mid_sfi_get_pdata(dev, &sd_info); + if (IS_ERR(pdata)) + return; + + /* Nothing we can do with this for now */ + sd_info.platform_data = pdata; + + pr_debug("Successfully registered %16.16s", sd_info.name); +} + extern struct devs_id *const __x86_intel_mid_dev_start[], *const __x86_intel_mid_dev_end[]; @@ -490,6 +516,9 @@ static int __init sfi_parse_devs(struct sfi_table_header *table) case SFI_DEV_TYPE_I2C: sfi_handle_i2c_dev(pentry, dev); break; + case SFI_DEV_TYPE_SD: + sfi_handle_sd_dev(pentry, dev); + break; case SFI_DEV_TYPE_UART: case SFI_DEV_TYPE_HSI: default: diff --git a/include/linux/sfi.h b/include/linux/sfi.h index d9b436f09925..e0e1597ef9e6 100644 --- a/include/linux/sfi.h +++ b/include/linux/sfi.h @@ -156,6 +156,7 @@ struct sfi_device_table_entry { #define SFI_DEV_TYPE_UART 2 #define SFI_DEV_TYPE_HSI 3 #define SFI_DEV_TYPE_IPC 4 +#define SFI_DEV_TYPE_SD 5 u8 host_num; /* attached to host 0, 1...*/ u16 addr; -- cgit v1.2.3 From 00839ee3b299303c6a5e26a0a2485427a3afcbbf Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 7 Jul 2016 17:19:11 -0700 Subject: x86/mm: Move swap offset/type up in PTE to work around erratum This erratum can result in Accessed/Dirty getting set by the hardware when we do not expect them to be (on !Present PTEs). Instead of trying to fix them up after this happens, we just allow the bits to get set and try to ignore them. We do this by shifting the layout of the bits we use for swap offset/type in our 64-bit PTEs. It looks like this: bitnrs: | ... | 11| 10| 9|8|7|6|5| 4| 3|2|1|0| names: | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| before: | OFFSET (9-63) |0|X|X| TYPE(1-5) |0| after: | OFFSET (14-63) | TYPE (9-13) |0|X|X|X| X| X|X|X|0| Note that D was already a don't care (X) even before. We just move TYPE up and turn its old spot (which could be hit by the A bit) into all don't cares. We take 5 bits away from the offset, but that still leaves us with 50 bits which lets us index into a 62-bit swapfile (4 EiB). I think that's probably fine for the moment. We could theoretically reclaim 5 of the bits (1, 2, 3, 4, 7) but it doesn't gain us anything. Signed-off-by: Dave Hansen Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Cc: dave.hansen@intel.com Cc: linux-mm@kvack.org Cc: mhocko@suse.com Link: http://lkml.kernel.org/r/20160708001911.9A3FD2B6@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable_64.h | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 2ee781114d34..7e8ec7ae10fa 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -140,18 +140,32 @@ static inline int pgd_large(pgd_t pgd) { return 0; } #define pte_offset_map(dir, address) pte_offset_kernel((dir), (address)) #define pte_unmap(pte) ((void)(pte))/* NOP */ -/* Encode and de-code a swap entry */ +/* + * Encode and de-code a swap entry + * + * | ... | 11| 10| 9|8|7|6|5| 4| 3|2|1|0| <- bit number + * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| <- bit names + * | OFFSET (14->63) | TYPE (10-13) |0|X|X|X| X| X|X|X|0| <- swp entry + * + * G (8) is aliased and used as a PROT_NONE indicator for + * !present ptes. We need to start storing swap entries above + * there. We also need to avoid using A and D because of an + * erratum where they can be incorrectly set by hardware on + * non-present PTEs. + */ +#define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1) #define SWP_TYPE_BITS 5 -#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1) +/* Place the offset above the type: */ +#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS + 1) #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) -#define __swp_type(x) (((x).val >> (_PAGE_BIT_PRESENT + 1)) \ +#define __swp_type(x) (((x).val >> (SWP_TYPE_FIRST_BIT)) \ & ((1U << SWP_TYPE_BITS) - 1)) -#define __swp_offset(x) ((x).val >> SWP_OFFSET_SHIFT) +#define __swp_offset(x) ((x).val >> SWP_OFFSET_FIRST_BIT) #define __swp_entry(type, offset) ((swp_entry_t) { \ - ((type) << (_PAGE_BIT_PRESENT + 1)) \ - | ((offset) << SWP_OFFSET_SHIFT) }) + ((type) << (SWP_TYPE_FIRST_BIT)) \ + | ((offset) << SWP_OFFSET_FIRST_BIT) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) -- cgit v1.2.3 From 97e3c602ccbdd7db54e92fe05675c664c052a466 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 7 Jul 2016 17:19:12 -0700 Subject: x86/mm: Ignore A/D bits in pte/pmd/pud_none() The erratum we are fixing here can lead to stray setting of the A and D bits. That means that a pte that we cleared might suddenly have A/D set. So, stop considering those bits when determining if a pte is pte_none(). The same goes for the other pmd_none() and pud_none(). pgd_none() can be skipped because it is not affected; we do not use PGD entries for anything other than pagetables on affected configurations. This adds a tiny amount of overhead to all pte_none() checks. I doubt we'll be able to measure it anywhere. Signed-off-by: Dave Hansen Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Cc: dave.hansen@intel.com Cc: linux-mm@kvack.org Cc: mhocko@suse.com Link: http://lkml.kernel.org/r/20160708001912.5216F89C@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable.h | 13 ++++++++++--- arch/x86/include/asm/pgtable_types.h | 6 ++++++ 2 files changed, 16 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 1a27396b6ea0..2815d268af8b 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -480,7 +480,7 @@ pte_t *populate_extra_pte(unsigned long vaddr); static inline int pte_none(pte_t pte) { - return !pte.pte; + return !(pte.pte & ~(_PAGE_KNL_ERRATUM_MASK)); } #define __HAVE_ARCH_PTE_SAME @@ -552,7 +552,8 @@ static inline int pmd_none(pmd_t pmd) { /* Only check low word on 32-bit platforms, since it might be out of sync with upper half. */ - return (unsigned long)native_pmd_val(pmd) == 0; + unsigned long val = native_pmd_val(pmd); + return (val & ~_PAGE_KNL_ERRATUM_MASK) == 0; } static inline unsigned long pmd_page_vaddr(pmd_t pmd) @@ -616,7 +617,7 @@ static inline unsigned long pages_to_mb(unsigned long npg) #if CONFIG_PGTABLE_LEVELS > 2 static inline int pud_none(pud_t pud) { - return native_pud_val(pud) == 0; + return (native_pud_val(pud) & ~(_PAGE_KNL_ERRATUM_MASK)) == 0; } static inline int pud_present(pud_t pud) @@ -694,6 +695,12 @@ static inline int pgd_bad(pgd_t pgd) static inline int pgd_none(pgd_t pgd) { + /* + * There is no need to do a workaround for the KNL stray + * A/D bit erratum here. PGDs only point to page tables + * except on 32-bit non-PAE which is not supported on + * KNL. + */ return !native_pgd_val(pgd); } #endif /* CONFIG_PGTABLE_LEVELS > 3 */ diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 7b5efe264eff..d14d0a55322a 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -70,6 +70,12 @@ _PAGE_PKEY_BIT2 | \ _PAGE_PKEY_BIT3) +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) +#define _PAGE_KNL_ERRATUM_MASK (_PAGE_DIRTY | _PAGE_ACCESSED) +#else +#define _PAGE_KNL_ERRATUM_MASK 0 +#endif + #ifdef CONFIG_KMEMCHECK #define _PAGE_HIDDEN (_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN) #else -- cgit v1.2.3 From e4a84be6f05eab4778732d799f63b3cd15427885 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 7 Jul 2016 17:19:14 -0700 Subject: x86/mm: Disallow running with 32-bit PTEs to work around erratum The Intel(R) Xeon Phi(TM) Processor x200 Family (codename: Knights Landing) has an erratum where a processor thread setting the Accessed or Dirty bits may not do so atomically against its checks for the Present bit. This may cause a thread (which is about to page fault) to set A and/or D, even though the Present bit had already been atomically cleared. These bits are truly "stray". In the case of the Dirty bit, the thread associated with the stray set was *not* allowed to write to the page. This means that we do not have to launder the bit(s); we can simply ignore them. If the PTE is used for storing a swap index or a NUMA migration index, the A bit could be misinterpreted as part of the swap type. The stray bits being set cause a software-cleared PTE to be interpreted as a swap entry. In some cases (like when the swap index ends up being for a non-existent swapfile), the kernel detects the stray value and WARN()s about it, but there is no guarantee that the kernel can always detect it. When we have 64-bit PTEs (64-bit mode or 32-bit PAE), we were able to move the swap PTE format around to avoid these troublesome bits. But, 32-bit non-PAE is tight on bits. So, disallow it from running on this hardware. I can't imagine anyone wanting to run 32-bit non-highmem kernels on this hardware, but disallowing them from running entirely is surely the safe thing to do. Signed-off-by: Dave Hansen Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Cc: dave.hansen@intel.com Cc: linux-mm@kvack.org Cc: mhocko@suse.com Link: http://lkml.kernel.org/r/20160708001914.D0B50110@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/x86/boot/boot.h | 1 + arch/x86/boot/cpu.c | 2 ++ arch/x86/boot/cpucheck.c | 33 +++++++++++++++++++++++++++++++++ arch/x86/boot/cpuflags.c | 1 + arch/x86/boot/cpuflags.h | 1 + 5 files changed, 38 insertions(+) (limited to 'arch') diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index 9011a88353de..a5ce666a2423 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -294,6 +294,7 @@ static inline int cmdline_find_option_bool(const char *option) /* cpu.c, cpucheck.c */ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr); +int check_knl_erratum(void); int validate_cpu(void); /* early_serial_console.c */ diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c index 29207f69ae8c..26240dde081e 100644 --- a/arch/x86/boot/cpu.c +++ b/arch/x86/boot/cpu.c @@ -93,6 +93,8 @@ int validate_cpu(void) show_cap_strs(err_flags); putchar('\n'); return -1; + } else if (check_knl_erratum()) { + return -1; } else { return 0; } diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c index 1fd7d575092e..4ad7d70e8739 100644 --- a/arch/x86/boot/cpucheck.c +++ b/arch/x86/boot/cpucheck.c @@ -24,6 +24,7 @@ # include "boot.h" #endif #include +#include #include #include #include @@ -175,6 +176,8 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) puts("WARNING: PAE disabled. Use parameter 'forcepae' to enable at your own risk!\n"); } } + if (!err) + err = check_knl_erratum(); if (err_flags_ptr) *err_flags_ptr = err ? err_flags : NULL; @@ -185,3 +188,33 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) return (cpu.level < req_level || err) ? -1 : 0; } + +int check_knl_erratum(void) +{ + /* + * First check for the affected model/family: + */ + if (!is_intel() || + cpu.family != 6 || + cpu.model != INTEL_FAM6_XEON_PHI_KNL) + return 0; + + /* + * This erratum affects the Accessed/Dirty bits, and can + * cause stray bits to be set in !Present PTEs. We have + * enough bits in our 64-bit PTEs (which we have on real + * 64-bit mode or PAE) to avoid using these troublesome + * bits. But, we do not have enough space in our 32-bit + * PTEs. So, refuse to run on 32-bit non-PAE kernels. + */ + if (IS_ENABLED(CONFIG_X86_64) || IS_ENABLED(CONFIG_X86_PAE)) + return 0; + + puts("This 32-bit kernel can not run on this Xeon Phi x200\n" + "processor due to a processor erratum. Use a 64-bit\n" + "kernel, or enable PAE in this 32-bit kernel.\n\n"); + + return -1; +} + + diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c index 431fa5f84537..6687ab953257 100644 --- a/arch/x86/boot/cpuflags.c +++ b/arch/x86/boot/cpuflags.c @@ -102,6 +102,7 @@ void get_cpuflags(void) cpuid(0x1, &tfms, &ignored, &cpu.flags[4], &cpu.flags[0]); cpu.level = (tfms >> 8) & 15; + cpu.family = cpu.level; cpu.model = (tfms >> 4) & 15; if (cpu.level >= 6) cpu.model += ((tfms >> 16) & 0xf) << 4; diff --git a/arch/x86/boot/cpuflags.h b/arch/x86/boot/cpuflags.h index 4cb404fd45ce..15ad56a3f905 100644 --- a/arch/x86/boot/cpuflags.h +++ b/arch/x86/boot/cpuflags.h @@ -6,6 +6,7 @@ struct cpu_features { int level; /* Family, or 64 for x86-64 */ + int family; /* Family, always */ int model; u32 flags[NCAPINTS]; }; -- cgit v1.2.3 From dcb32d9913b7ed527b135a7e221f8d14b67bb952 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 7 Jul 2016 17:19:15 -0700 Subject: x86/mm: Use pte_none() to test for empty PTE The page table manipulation code seems to have grown a couple of sites that are looking for empty PTEs. Just in case one of these entries got a stray bit set, use pte_none() instead of checking for a zero pte_val(). The use pte_same() makes me a bit nervous. If we were doing a pte_same() check against two cleared entries and one of them had a stray bit set, it might fail the pte_same() check. But, I don't think we ever _do_ pte_same() for cleared entries. It is almost entirely used for checking for races in fault-in paths. Signed-off-by: Dave Hansen Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Cc: dave.hansen@intel.com Cc: linux-mm@kvack.org Cc: mhocko@suse.com Link: http://lkml.kernel.org/r/20160708001915.813703D9@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 12 ++++++------ arch/x86/mm/pageattr.c | 2 +- arch/x86/mm/pgtable_32.c | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index bce2e5d9edd4..bb88fbc0a288 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -354,7 +354,7 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, * pagetable pages as RO. So assume someone who pre-setup * these mappings are more intelligent. */ - if (pte_val(*pte)) { + if (!pte_none(*pte)) { if (!after_bootmem) pages++; continue; @@ -396,7 +396,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, continue; } - if (pmd_val(*pmd)) { + if (!pmd_none(*pmd)) { if (!pmd_large(*pmd)) { spin_lock(&init_mm.page_table_lock); pte = (pte_t *)pmd_page_vaddr(*pmd); @@ -470,7 +470,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, continue; } - if (pud_val(*pud)) { + if (!pud_none(*pud)) { if (!pud_large(*pud)) { pmd = pmd_offset(pud, 0); last_map_addr = phys_pmd_init(pmd, addr, end, @@ -673,7 +673,7 @@ static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) for (i = 0; i < PTRS_PER_PTE; i++) { pte = pte_start + i; - if (pte_val(*pte)) + if (!pte_none(*pte)) return; } @@ -691,7 +691,7 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) for (i = 0; i < PTRS_PER_PMD; i++) { pmd = pmd_start + i; - if (pmd_val(*pmd)) + if (!pmd_none(*pmd)) return; } @@ -710,7 +710,7 @@ static bool __meminit free_pud_table(pud_t *pud_start, pgd_t *pgd) for (i = 0; i < PTRS_PER_PUD; i++) { pud = pud_start + i; - if (pud_val(*pud)) + if (!pud_none(*pud)) return false; } diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 7a1f7bbf4105..75142159b0a5 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1185,7 +1185,7 @@ repeat: return __cpa_process_fault(cpa, address, primary); old_pte = *kpte; - if (!pte_val(old_pte)) + if (pte_none(old_pte)) return __cpa_process_fault(cpa, address, primary); if (level == PG_LEVEL_4K) { diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 75cc0978d45d..e67ae0e6c59d 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -47,7 +47,7 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval) return; } pte = pte_offset_kernel(pmd, vaddr); - if (pte_val(pteval)) + if (!pte_none(pteval)) set_pte_at(&init_mm, vaddr, pte, pteval); else pte_clear(&init_mm, vaddr, pte); -- cgit v1.2.3 From 4d581259b7d44c8120a614b4e9244094c824d51f Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Thu, 14 Jul 2016 18:05:56 +0800 Subject: x86/reboot: Add Dell Optiplex 7450 AIO reboot quirk Dell Optiplex 7450 AIO works with BOOT_ACPI; however, the quirk for "OptiPlex 745" changes its boot method to BOOT_BIOS and causes 7450 AIO hangs when rebooting; as a result, 7450 AIO is appended to overwrite BOOT_BIOS by BOOT_ACPI in order not to break the original 745 series Signed-off-by: Alex Hung Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index a9b31eb815f2..15ed70f8278b 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -54,6 +54,19 @@ bool port_cf9_safe = false; * Dell Inc. so their systems "just work". :-) */ +/* + * Some machines require the "reboot=a" commandline options + */ +static int __init set_acpi_reboot(const struct dmi_system_id *d) +{ + if (reboot_type != BOOT_ACPI) { + reboot_type = BOOT_ACPI; + pr_info("%s series board detected. Selecting %s-method for reboots.\n", + d->ident, "ACPI"); + } + return 0; +} + /* * Some machines require the "reboot=b" or "reboot=k" commandline options, * this quirk makes that automatic. @@ -395,6 +408,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Dell XPS710"), }, }, + { /* Handle problems with rebooting on Dell Optiplex 7450 AIO */ + .callback = set_acpi_reboot, + .ident = "Dell OptiPlex 7450 AIO", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 7450 AIO"), + }, + }, /* Hewlett-Packard */ { /* Handle problems with rebooting on HP laptops */ -- cgit v1.2.3 From 2ba78056acfe8d63a29565f91dae4678ed6b81ca Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Thu, 14 Jul 2016 12:06:53 -0700 Subject: kasan: add newline to messages Currently GPF messages with KASAN look as follows: kasan: GPF could be caused by NULL-ptr deref or user memory accessgeneral protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN Add newlines. Link: http://lkml.kernel.org/r/1467294357-98002-1-git-send-email-dvyukov@google.com Signed-off-by: Dmitry Vyukov Acked-by: Andrey Ryabinin Cc: Alexander Potapenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/kasan_init_64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 1b1110fa0057..0493c17b8a51 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -54,8 +54,8 @@ static int kasan_die_handler(struct notifier_block *self, void *data) { if (val == DIE_GPF) { - pr_emerg("CONFIG_KASAN_INLINE enabled"); - pr_emerg("GPF could be caused by NULL-ptr deref or user memory access"); + pr_emerg("CONFIG_KASAN_INLINE enabled\n"); + pr_emerg("GPF could be caused by NULL-ptr deref or user memory access\n"); } return NOTIFY_OK; } -- cgit v1.2.3 From 9babed6a66b5577628d9e76e5a6cb6104d7ddd4c Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Thu, 14 Jul 2016 12:07:43 -0700 Subject: m32r: fix build warning about putc We were getting build warning: arch/m32r/boot/compressed/m32r_sio.c:11:13: warning: conflicting types for built-in function 'putc' Here putc is used as a static function so lets just rename it to avoid the conflict with the builtin putc. Link: http://lkml.kernel.org/r/1466977046-24724-1-git-send-email-sudipm.mukherjee@gmail.com Signed-off-by: Sudip Mukherjee Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/m32r/boot/compressed/m32r_sio.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/m32r/boot/compressed/m32r_sio.c b/arch/m32r/boot/compressed/m32r_sio.c index 01d877c6868f..cf3023dced49 100644 --- a/arch/m32r/boot/compressed/m32r_sio.c +++ b/arch/m32r/boot/compressed/m32r_sio.c @@ -8,12 +8,13 @@ #include -static void putc(char c); +static void m32r_putc(char c); static int puts(const char *s) { char c; - while ((c = *s++)) putc(c); + while ((c = *s++)) + m32r_putc(c); return 0; } @@ -41,7 +42,7 @@ static int puts(const char *s) #define BOOT_SIO0TXB PLD_ESIO0TXB #endif -static void putc(char c) +static void m32r_putc(char c) { while ((*BOOT_SIO0STS & 0x3) != 0x3) cpu_relax(); @@ -61,7 +62,7 @@ static void putc(char c) #define SIO0TXB (volatile unsigned short *)(0x00efd000 + 30) #endif -static void putc(char c) +static void m32r_putc(char c) { while ((*SIO0STS & 0x1) == 0) cpu_relax(); -- cgit v1.2.3 From af2cf278ef4f9289f88504c3e03cb12f76027575 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Jul 2016 13:22:49 -0700 Subject: x86/mm/hotplug: Don't remove PGD entries in remove_pagetable() So when memory hotplug removes a piece of physical memory from pagetable mappings, it also frees the underlying PGD entry. This complicates PGD management, so don't do this. We can keep the PGD mapped and the PUD table all clear - it's only a single 4K page per 512 GB of memory hotplugged. Signed-off-by: Ingo Molnar Signed-off-by: Andy Lutomirski Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Waiman Long Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/064ff6c7275734537f969e876f6cd0baa954d2cc.1468527351.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 27 --------------------------- 1 file changed, 27 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index bb88fbc0a288..e14f87057c3f 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -702,27 +702,6 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) spin_unlock(&init_mm.page_table_lock); } -/* Return true if pgd is changed, otherwise return false. */ -static bool __meminit free_pud_table(pud_t *pud_start, pgd_t *pgd) -{ - pud_t *pud; - int i; - - for (i = 0; i < PTRS_PER_PUD; i++) { - pud = pud_start + i; - if (!pud_none(*pud)) - return false; - } - - /* free a pud table */ - free_pagetable(pgd_page(*pgd), 0); - spin_lock(&init_mm.page_table_lock); - pgd_clear(pgd); - spin_unlock(&init_mm.page_table_lock); - - return true; -} - static void __meminit remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, bool direct) @@ -913,7 +892,6 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct) unsigned long addr; pgd_t *pgd; pud_t *pud; - bool pgd_changed = false; for (addr = start; addr < end; addr = next) { next = pgd_addr_end(addr, end); @@ -924,13 +902,8 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct) pud = (pud_t *)pgd_page_vaddr(*pgd); remove_pud_table(pud, addr, next, direct); - if (free_pud_table(pud, pgd)) - pgd_changed = true; } - if (pgd_changed) - sync_global_pgds(start, end - 1, 1); - flush_tlb_all(); } -- cgit v1.2.3 From 360cb4d15567a7eca07a5f3ade6de308bbfb4e70 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 14 Jul 2016 13:22:50 -0700 Subject: x86/mm/cpa: In populate_pgd(), don't set the PGD entry until it's populated This avoids pointless races in which another CPU or task might see a partially populated global PGD entry. These races should normally be harmless, but, if another CPU propagates the entry via vmalloc_fault() and then populate_pgd() fails (due to memory allocation failure, for example), this prevents a use-after-free of the PGD entry. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/bf99df27eac6835f687005364bd1fbd89130946c.1468527351.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 75142159b0a5..26aa487ae4ef 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1104,8 +1104,6 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) pud = (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); if (!pud) return -1; - - set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE)); } pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr); @@ -1113,11 +1111,16 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) ret = populate_pud(cpa, addr, pgd_entry, pgprot); if (ret < 0) { - unmap_pgd_range(cpa->pgd, addr, + if (pud) + free_page((unsigned long)pud); + unmap_pud_range(pgd_entry, addr, addr + (cpa->numpages << PAGE_SHIFT)); return ret; } + if (pud) + set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE)); + cpa->numpages = ret; return 0; } -- cgit v1.2.3 From d92fc69ccac4c0a20679fdbdc81b2010685a6f33 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 14 Jul 2016 13:22:51 -0700 Subject: x86/mm: Remove kernel_unmap_pages_in_pgd() and efi_cleanup_page_tables() kernel_unmap_pages_in_pgd() is dangerous: if a PGD entry in init_mm.pgd were to be cleared, callers would need to ensure that the pgd entry hadn't been propagated to any other pgd. Its only caller was efi_cleanup_page_tables(), and that, in turn, was unused, so just delete both functions. This leaves a couple of other helpers unused, so delete them, too. Signed-off-by: Andy Lutomirski Reviewed-by: Matt Fleming Acked-by: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/77ff20fdde3b75cd393be5559ad8218870520248.1468527351.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 1 - arch/x86/include/asm/pgtable_types.h | 2 -- arch/x86/mm/pageattr.c | 28 ---------------------------- arch/x86/platform/efi/efi.c | 2 -- arch/x86/platform/efi/efi_32.c | 3 --- arch/x86/platform/efi/efi_64.c | 5 ----- 6 files changed, 41 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 78d1e7467eae..45ea38df86d4 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -125,7 +125,6 @@ extern void __init efi_map_region_fixed(efi_memory_desc_t *md); extern void efi_sync_low_kernel_mappings(void); extern int __init efi_alloc_page_tables(void); extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages); -extern void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages); extern void __init old_map_region(efi_memory_desc_t *md); extern void __init runtime_code_page_mkexec(void); extern void __init efi_runtime_update_mappings(void); diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index d14d0a55322a..f1218f512f62 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -481,8 +481,6 @@ extern pmd_t *lookup_pmd_address(unsigned long address); extern phys_addr_t slow_virt_to_phys(void *__address); extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, unsigned numpages, unsigned long page_flags); -void kernel_unmap_pages_in_pgd(pgd_t *root, unsigned long address, - unsigned numpages); #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_PGTABLE_DEFS_H */ diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 26aa487ae4ef..26c93c6e04a0 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -746,18 +746,6 @@ static bool try_to_free_pmd_page(pmd_t *pmd) return true; } -static bool try_to_free_pud_page(pud_t *pud) -{ - int i; - - for (i = 0; i < PTRS_PER_PUD; i++) - if (!pud_none(pud[i])) - return false; - - free_page((unsigned long)pud); - return true; -} - static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end) { pte_t *pte = pte_offset_kernel(pmd, start); @@ -871,16 +859,6 @@ static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) */ } -static void unmap_pgd_range(pgd_t *root, unsigned long addr, unsigned long end) -{ - pgd_t *pgd_entry = root + pgd_index(addr); - - unmap_pud_range(pgd_entry, addr, end); - - if (try_to_free_pud_page((pud_t *)pgd_page_vaddr(*pgd_entry))) - pgd_clear(pgd_entry); -} - static int alloc_pte_page(pmd_t *pmd) { pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); @@ -1994,12 +1972,6 @@ out: return retval; } -void kernel_unmap_pages_in_pgd(pgd_t *root, unsigned long address, - unsigned numpages) -{ - unmap_pgd_range(root, address, address + (numpages << PAGE_SHIFT)); -} - /* * The testcases use internal knowledge of the implementation that shouldn't * be exposed to the rest of the kernel. Include these directly here. diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index f93545e7dc54..62986e5fbdba 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -978,8 +978,6 @@ static void __init __efi_enter_virtual_mode(void) * EFI mixed mode we need all of memory to be accessible when * we pass parameters to the EFI runtime services in the * thunking code. - * - * efi_cleanup_page_tables(__pa(new_memmap), 1 << pg_shift); */ free_pages((unsigned long)new_memmap, pg_shift); diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 338402b91d2e..cef39b097649 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -49,9 +49,6 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) { return 0; } -void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) -{ -} void __init efi_map_region(efi_memory_desc_t *md) { diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index b226b3f497f1..d288dcea1ffe 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -285,11 +285,6 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) return 0; } -void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) -{ - kernel_unmap_pages_in_pgd(efi_pgd, pa_memmap, num_pages); -} - static void __init __map_region(efi_memory_desc_t *md, u64 va) { unsigned long flags = _PAGE_RW; -- cgit v1.2.3 From 9a2e9da3e003112399f2863b7b6b911043c01895 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 14 Jul 2016 13:22:52 -0700 Subject: x86/dumpstack: Try harder to get a call trace on stack overflow If we overflow the stack, print_context_stack() will abort. Detect this case and rewind back into the valid part of the stack so that we can trace it. Signed-off-by: Andy Lutomirski Reviewed-by: Josh Poimboeuf Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/ee1690eb2715ccc5dc187fde94effa4ca0ccbbcd.1468527351.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index ef8017ca5ba9..cc88e25d73e9 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -87,7 +87,7 @@ static inline int valid_stack_ptr(struct task_struct *task, else return 0; } - return p > t && p < t + THREAD_SIZE - size; + return p >= t && p < t + THREAD_SIZE - size; } unsigned long @@ -98,6 +98,14 @@ print_context_stack(struct task_struct *task, { struct stack_frame *frame = (struct stack_frame *)bp; + /* + * If we overflowed the stack into a guard page, jump back to the + * bottom of the usable stack. + */ + if ((unsigned long)task_stack_page(task) - (unsigned long)stack < + PAGE_SIZE) + stack = (unsigned long *)task_stack_page(task); + while (valid_stack_ptr(task, stack, sizeof(*stack), end)) { unsigned long addr; -- cgit v1.2.3 From 98f30b1207932b6553ea605c99393d8afca12324 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 14 Jul 2016 13:22:53 -0700 Subject: x86/dumpstack/64: Handle faults when printing the "Stack: " part of an OOPS If we overflow the stack into a guard page, we'll recursively fault when trying to dump the contents of the guard page. Use probe_kernel_address() so we can recover if this happens. Signed-off-by: Andy Lutomirski Reviewed-by: Josh Poimboeuf Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/e626d47a55d7b04dcb1b4d33faa95e8505b217c8.1468527351.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack_64.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index d558a8a49016..2552a1eadfed 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -272,6 +272,8 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, stack = sp; for (i = 0; i < kstack_depth_to_print; i++) { + unsigned long word; + if (stack >= irq_stack && stack <= irq_stack_end) { if (stack == irq_stack_end) { stack = (unsigned long *) (irq_stack_end[-1]); @@ -281,12 +283,18 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, if (kstack_end(stack)) break; } + + if (probe_kernel_address(stack, word)) + break; + if ((i % STACKSLOTS_PER_LINE) == 0) { if (i != 0) pr_cont("\n"); - printk("%s %016lx", log_lvl, *stack++); + printk("%s %016lx", log_lvl, word); } else - pr_cont(" %016lx", *stack++); + pr_cont(" %016lx", word); + + stack++; touch_nmi_watchdog(); } preempt_enable(); -- cgit v1.2.3 From 46aea3873401836abb7f01200e7946e7d518b359 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 14 Jul 2016 13:22:54 -0700 Subject: x86/mm/64: In vmalloc_fault(), use CR3 instead of current->active_mm If we get a vmalloc fault while current->active_mm->pgd doesn't match CR3, we'll crash without this change. I've seen this failure mode on heavily instrumented kernels with virtually mapped stacks. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/4650d7674185f165ed8fdf9ac4c5c35c5c179ba8.1468527351.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 7d1fa7cd2374..ca44e2e7fd00 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -439,7 +439,7 @@ static noinline int vmalloc_fault(unsigned long address) * happen within a race in page table update. In the later * case just flush: */ - pgd = pgd_offset(current->active_mm, address); + pgd = (pgd_t *)__va(read_cr3()) + pgd_index(address); pgd_ref = pgd_offset_k(address); if (pgd_none(*pgd_ref)) return -1; -- cgit v1.2.3 From 2deb4be28077638591fe5fc593b7f8aabc140f42 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 14 Jul 2016 13:22:55 -0700 Subject: x86/dumpstack: When OOPSing, rewind the stack before do_exit() If we call do_exit() with a clean stack, we greatly reduce the risk of recursive oopses due to stack overflow in do_exit, and we allow do_exit to work even if we OOPS from an IST stack. The latter gives us a much better chance of surviving long enough after we detect a stack overflow to write out our logs. Signed-off-by: Andy Lutomirski Reviewed-by: Josh Poimboeuf Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/32f73ceb372ec61889598da5e5b145889b9f2e19.1468527351.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_32.S | 11 +++++++++++ arch/x86/entry/entry_64.S | 11 +++++++++++ arch/x86/kernel/dumpstack.c | 10 +++++++++- 3 files changed, 31 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 983e5d3a0d27..0b56666e6039 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1153,3 +1153,14 @@ ENTRY(async_page_fault) jmp error_code END(async_page_fault) #endif + +ENTRY(rewind_stack_do_exit) + /* Prevent any naive code from trying to unwind to our caller. */ + xorl %ebp, %ebp + + movl PER_CPU_VAR(cpu_current_top_of_stack), %esi + leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp + + call do_exit +1: jmp 1b +END(rewind_stack_do_exit) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 9ee0da1807ed..b846875aeea6 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1423,3 +1423,14 @@ ENTRY(ignore_sysret) mov $-ENOSYS, %eax sysret END(ignore_sysret) + +ENTRY(rewind_stack_do_exit) + /* Prevent any naive code from trying to unwind to our caller. */ + xorl %ebp, %ebp + + movq PER_CPU_VAR(cpu_current_top_of_stack), %rax + leaq -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%rax), %rsp + + call do_exit +1: jmp 1b +END(rewind_stack_do_exit) diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index cc88e25d73e9..de8242d8bb61 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -234,6 +234,8 @@ unsigned long oops_begin(void) EXPORT_SYMBOL_GPL(oops_begin); NOKPROBE_SYMBOL(oops_begin); +void __noreturn rewind_stack_do_exit(int signr); + void oops_end(unsigned long flags, struct pt_regs *regs, int signr) { if (regs && kexec_should_crash(current)) @@ -255,7 +257,13 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr) panic("Fatal exception in interrupt"); if (panic_on_oops) panic("Fatal exception"); - do_exit(signr); + + /* + * We're not going to return, but we might be on an IST stack or + * have very little stack space left. Rewind the stack and kill + * the task. + */ + rewind_stack_do_exit(signr); } NOKPROBE_SYMBOL(oops_end); -- cgit v1.2.3 From dfa9a942fd7951c8f333cf3f377dde51ebd21685 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 14 Jul 2016 13:22:56 -0700 Subject: x86/uaccess: Move thread_info::uaccess_err and thread_info::sig_on_uaccess_err to thread_struct struct thread_info is a legacy mess. To prepare for its partial removal, move the uaccess control fields out -- they're straightforward. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/d0ac4d01c8e4d4d756264604e47445d5acc7900e.1468527351.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/vsyscall/vsyscall_64.c | 6 +++--- arch/x86/include/asm/processor.h | 3 +++ arch/x86/include/asm/thread_info.h | 2 -- arch/x86/include/asm/uaccess.h | 4 ++-- arch/x86/mm/extable.c | 2 +- arch/x86/mm/fault.c | 2 +- 6 files changed, 10 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 174c2549939d..3aba2b043050 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -221,8 +221,8 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) * With a real vsyscall, page faults cause SIGSEGV. We want to * preserve that behavior to make writing exploits harder. */ - prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error; - current_thread_info()->sig_on_uaccess_error = 1; + prev_sig_on_uaccess_error = current->thread.sig_on_uaccess_error; + current->thread.sig_on_uaccess_error = 1; ret = -EFAULT; switch (vsyscall_nr) { @@ -243,7 +243,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) break; } - current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error; + current->thread.sig_on_uaccess_error = prev_sig_on_uaccess_error; check_fault: if (ret == -EFAULT) { diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 62c6cc3cc5d3..f53ae57bd985 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -419,6 +419,9 @@ struct thread_struct { /* Max allowed port in the bitmap, in bytes: */ unsigned io_bitmap_max; + unsigned int sig_on_uaccess_error:1; + unsigned int uaccess_err:1; /* uaccess failed */ + /* Floating point and extended processor state */ struct fpu fpu; /* diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 30c133ac05cd..7c47bb659ecd 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -58,8 +58,6 @@ struct thread_info { __u32 status; /* thread synchronous flags */ __u32 cpu; /* current CPU */ mm_segment_t addr_limit; - unsigned int sig_on_uaccess_error:1; - unsigned int uaccess_err:1; /* uaccess failed */ }; #define INIT_THREAD_INFO(tsk) \ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index d40ec723f799..8f66e5655c23 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -487,13 +487,13 @@ struct __large_struct { unsigned long buf[100]; }; * uaccess_try and catch */ #define uaccess_try do { \ - current_thread_info()->uaccess_err = 0; \ + current->thread.uaccess_err = 0; \ __uaccess_begin(); \ barrier(); #define uaccess_catch(err) \ __uaccess_end(); \ - (err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0); \ + (err) |= (current->thread.uaccess_err ? -EFAULT : 0); \ } while (0) /** diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 4bb53b89f3c5..0f90cc218d04 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -37,7 +37,7 @@ bool ex_handler_ext(const struct exception_table_entry *fixup, struct pt_regs *regs, int trapnr) { /* Special hack for uaccess_err */ - current_thread_info()->uaccess_err = 1; + current->thread.uaccess_err = 1; regs->ip = ex_fixup_addr(fixup); return true; } diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index ca44e2e7fd00..69be03d4aca6 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -737,7 +737,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, * In this case we need to make sure we're not recursively * faulting through the emulate_vsyscall() logic. */ - if (current_thread_info()->sig_on_uaccess_error && signal) { + if (current->thread.sig_on_uaccess_error && signal) { tsk->thread.trap_nr = X86_TRAP_PF; tsk->thread.error_code = error_code | PF_USER; tsk->thread.cr2 = address; -- cgit v1.2.3 From 2a53ccbc0de1b1950aeedd24680f7eca65c86ff5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 15 Jul 2016 10:21:11 +0200 Subject: x86/dumpstack: Rename thread_struct::sig_on_uaccess_error to sig_on_uaccess_err Rename it to match the thread_struct::uaccess_err pattern and also because it was too long. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/vsyscall/vsyscall_64.c | 10 +++++----- arch/x86/include/asm/processor.h | 2 +- arch/x86/mm/fault.c | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 3aba2b043050..75fc719b7f31 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -96,7 +96,7 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size) { /* * XXX: if access_ok, get_user, and put_user handled - * sig_on_uaccess_error, this could go away. + * sig_on_uaccess_err, this could go away. */ if (!access_ok(VERIFY_WRITE, (void __user *)ptr, size)) { @@ -125,7 +125,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) struct task_struct *tsk; unsigned long caller; int vsyscall_nr, syscall_nr, tmp; - int prev_sig_on_uaccess_error; + int prev_sig_on_uaccess_err; long ret; /* @@ -221,8 +221,8 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) * With a real vsyscall, page faults cause SIGSEGV. We want to * preserve that behavior to make writing exploits harder. */ - prev_sig_on_uaccess_error = current->thread.sig_on_uaccess_error; - current->thread.sig_on_uaccess_error = 1; + prev_sig_on_uaccess_err = current->thread.sig_on_uaccess_err; + current->thread.sig_on_uaccess_err = 1; ret = -EFAULT; switch (vsyscall_nr) { @@ -243,7 +243,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) break; } - current->thread.sig_on_uaccess_error = prev_sig_on_uaccess_error; + current->thread.sig_on_uaccess_err = prev_sig_on_uaccess_err; check_fault: if (ret == -EFAULT) { diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index f53ae57bd985..cbdfe5f76347 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -419,7 +419,7 @@ struct thread_struct { /* Max allowed port in the bitmap, in bytes: */ unsigned io_bitmap_max; - unsigned int sig_on_uaccess_error:1; + unsigned int sig_on_uaccess_err:1; unsigned int uaccess_err:1; /* uaccess failed */ /* Floating point and extended processor state */ diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 69be03d4aca6..d22161ab941d 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -737,7 +737,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, * In this case we need to make sure we're not recursively * faulting through the emulate_vsyscall() logic. */ - if (current->thread.sig_on_uaccess_error && signal) { + if (current->thread.sig_on_uaccess_err && signal) { tsk->thread.trap_nr = X86_TRAP_PF; tsk->thread.error_code = error_code | PF_USER; tsk->thread.cr2 = address; -- cgit v1.2.3 From 13d4ea097d18b419ad2a2b696063d44bf59acec0 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 14 Jul 2016 13:22:57 -0700 Subject: x86/uaccess: Move thread_info::addr_limit to thread_struct struct thread_info is a legacy mess. To prepare for its partial removal, move thread_info::addr_limit out. As an added benefit, this way is simpler. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/15bee834d09402b47ac86f2feccdf6529f9bc5b0.1468527351.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/checksum_32.h | 3 +-- arch/x86/include/asm/processor.h | 17 ++++++++++------- arch/x86/include/asm/thread_info.h | 7 ------- arch/x86/include/asm/uaccess.h | 6 +++--- arch/x86/kernel/asm-offsets.c | 4 +++- arch/x86/lib/copy_user_64.S | 8 ++++---- arch/x86/lib/csum-wrappers_64.c | 1 + arch/x86/lib/getuser.S | 20 ++++++++++---------- arch/x86/lib/putuser.S | 10 +++++----- arch/x86/lib/usercopy_64.c | 2 +- drivers/pnp/isapnp/proc.c | 2 +- lib/bitmap.c | 2 +- 12 files changed, 40 insertions(+), 42 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/checksum_32.h b/arch/x86/include/asm/checksum_32.h index 532f85e6651f..7b53743ed267 100644 --- a/arch/x86/include/asm/checksum_32.h +++ b/arch/x86/include/asm/checksum_32.h @@ -2,8 +2,7 @@ #define _ASM_X86_CHECKSUM_32_H #include - -#include +#include /* * computes the checksum of a memory block at buff, length len, diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index cbdfe5f76347..89314ed74fee 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -371,6 +371,10 @@ extern unsigned int xstate_size; struct perf_event; +typedef struct { + unsigned long seg; +} mm_segment_t; + struct thread_struct { /* Cached TLS descriptors: */ struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; @@ -419,6 +423,8 @@ struct thread_struct { /* Max allowed port in the bitmap, in bytes: */ unsigned io_bitmap_max; + mm_segment_t addr_limit; + unsigned int sig_on_uaccess_err:1; unsigned int uaccess_err:1; /* uaccess failed */ @@ -493,11 +499,6 @@ static inline void load_sp0(struct tss_struct *tss, #define set_iopl_mask native_set_iopl_mask #endif /* CONFIG_PARAVIRT */ -typedef struct { - unsigned long seg; -} mm_segment_t; - - /* Free all resources held by a thread. */ extern void release_thread(struct task_struct *); @@ -719,6 +720,7 @@ static inline void spin_lock_prefetch(const void *x) .sp0 = TOP_OF_INIT_STACK, \ .sysenter_cs = __KERNEL_CS, \ .io_bitmap_ptr = NULL, \ + .addr_limit = KERNEL_DS, \ } extern unsigned long thread_saved_pc(struct task_struct *tsk); @@ -768,8 +770,9 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); #define STACK_TOP TASK_SIZE #define STACK_TOP_MAX TASK_SIZE_MAX -#define INIT_THREAD { \ - .sp0 = TOP_OF_INIT_STACK \ +#define INIT_THREAD { \ + .sp0 = TOP_OF_INIT_STACK, \ + .addr_limit = KERNEL_DS, \ } /* diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 7c47bb659ecd..89bff044a6f5 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -57,7 +57,6 @@ struct thread_info { __u32 flags; /* low level flags */ __u32 status; /* thread synchronous flags */ __u32 cpu; /* current CPU */ - mm_segment_t addr_limit; }; #define INIT_THREAD_INFO(tsk) \ @@ -65,7 +64,6 @@ struct thread_info { .task = &tsk, \ .flags = 0, \ .cpu = 0, \ - .addr_limit = KERNEL_DS, \ } #define init_thread_info (init_thread_union.thread_info) @@ -184,11 +182,6 @@ static inline unsigned long current_stack_pointer(void) # define cpu_current_top_of_stack (cpu_tss + TSS_sp0) #endif -/* Load thread_info address into "reg" */ -#define GET_THREAD_INFO(reg) \ - _ASM_MOV PER_CPU_VAR(cpu_current_top_of_stack),reg ; \ - _ASM_SUB $(THREAD_SIZE),reg ; - /* * ASM operand which evaluates to a 'thread_info' address of * the current task, if it is known that "reg" is exactly "off" diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 8f66e5655c23..c03bfb68c503 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -29,12 +29,12 @@ #define USER_DS MAKE_MM_SEG(TASK_SIZE_MAX) #define get_ds() (KERNEL_DS) -#define get_fs() (current_thread_info()->addr_limit) -#define set_fs(x) (current_thread_info()->addr_limit = (x)) +#define get_fs() (current->thread.addr_limit) +#define set_fs(x) (current->thread.addr_limit = (x)) #define segment_eq(a, b) ((a).seg == (b).seg) -#define user_addr_max() (current_thread_info()->addr_limit.seg) +#define user_addr_max() (current->thread.addr_limit.seg) #define __addr_ok(addr) \ ((unsigned long __force)(addr) < user_addr_max()) diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 674134e9f5e5..2bd5c6ff7ee7 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -31,7 +31,9 @@ void common(void) { BLANK(); OFFSET(TI_flags, thread_info, flags); OFFSET(TI_status, thread_info, status); - OFFSET(TI_addr_limit, thread_info, addr_limit); + + BLANK(); + OFFSET(TASK_addr_limit, task_struct, thread.addr_limit); BLANK(); OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 2b0ef26da0bd..bf603ebbfd8e 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -17,11 +17,11 @@ /* Standard copy_to_user with segment limit checking */ ENTRY(_copy_to_user) - GET_THREAD_INFO(%rax) + mov PER_CPU_VAR(current_task), %rax movq %rdi,%rcx addq %rdx,%rcx jc bad_to_user - cmpq TI_addr_limit(%rax),%rcx + cmpq TASK_addr_limit(%rax),%rcx ja bad_to_user ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \ "jmp copy_user_generic_string", \ @@ -32,11 +32,11 @@ ENDPROC(_copy_to_user) /* Standard copy_from_user with segment limit checking */ ENTRY(_copy_from_user) - GET_THREAD_INFO(%rax) + mov PER_CPU_VAR(current_task), %rax movq %rsi,%rcx addq %rdx,%rcx jc bad_from_user - cmpq TI_addr_limit(%rax),%rcx + cmpq TASK_addr_limit(%rax),%rcx ja bad_from_user ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \ "jmp copy_user_generic_string", \ diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c index 28a6654f0d08..b6fcb9a9ddbc 100644 --- a/arch/x86/lib/csum-wrappers_64.c +++ b/arch/x86/lib/csum-wrappers_64.c @@ -6,6 +6,7 @@ */ #include #include +#include #include /** diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index 46668cda4ffd..0ef5128c2de8 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -35,8 +35,8 @@ .text ENTRY(__get_user_1) - GET_THREAD_INFO(%_ASM_DX) - cmp TI_addr_limit(%_ASM_DX),%_ASM_AX + mov PER_CPU_VAR(current_task), %_ASM_DX + cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user ASM_STAC 1: movzbl (%_ASM_AX),%edx @@ -48,8 +48,8 @@ ENDPROC(__get_user_1) ENTRY(__get_user_2) add $1,%_ASM_AX jc bad_get_user - GET_THREAD_INFO(%_ASM_DX) - cmp TI_addr_limit(%_ASM_DX),%_ASM_AX + mov PER_CPU_VAR(current_task), %_ASM_DX + cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user ASM_STAC 2: movzwl -1(%_ASM_AX),%edx @@ -61,8 +61,8 @@ ENDPROC(__get_user_2) ENTRY(__get_user_4) add $3,%_ASM_AX jc bad_get_user - GET_THREAD_INFO(%_ASM_DX) - cmp TI_addr_limit(%_ASM_DX),%_ASM_AX + mov PER_CPU_VAR(current_task), %_ASM_DX + cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user ASM_STAC 3: movl -3(%_ASM_AX),%edx @@ -75,8 +75,8 @@ ENTRY(__get_user_8) #ifdef CONFIG_X86_64 add $7,%_ASM_AX jc bad_get_user - GET_THREAD_INFO(%_ASM_DX) - cmp TI_addr_limit(%_ASM_DX),%_ASM_AX + mov PER_CPU_VAR(current_task), %_ASM_DX + cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user ASM_STAC 4: movq -7(%_ASM_AX),%rdx @@ -86,8 +86,8 @@ ENTRY(__get_user_8) #else add $7,%_ASM_AX jc bad_get_user_8 - GET_THREAD_INFO(%_ASM_DX) - cmp TI_addr_limit(%_ASM_DX),%_ASM_AX + mov PER_CPU_VAR(current_task), %_ASM_DX + cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user_8 ASM_STAC 4: movl -7(%_ASM_AX),%edx diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S index e0817a12d323..c891ece81e5b 100644 --- a/arch/x86/lib/putuser.S +++ b/arch/x86/lib/putuser.S @@ -29,14 +29,14 @@ * as they get called from within inline assembly. */ -#define ENTER GET_THREAD_INFO(%_ASM_BX) +#define ENTER mov PER_CPU_VAR(current_task), %_ASM_BX #define EXIT ASM_CLAC ; \ ret .text ENTRY(__put_user_1) ENTER - cmp TI_addr_limit(%_ASM_BX),%_ASM_CX + cmp TASK_addr_limit(%_ASM_BX),%_ASM_CX jae bad_put_user ASM_STAC 1: movb %al,(%_ASM_CX) @@ -46,7 +46,7 @@ ENDPROC(__put_user_1) ENTRY(__put_user_2) ENTER - mov TI_addr_limit(%_ASM_BX),%_ASM_BX + mov TASK_addr_limit(%_ASM_BX),%_ASM_BX sub $1,%_ASM_BX cmp %_ASM_BX,%_ASM_CX jae bad_put_user @@ -58,7 +58,7 @@ ENDPROC(__put_user_2) ENTRY(__put_user_4) ENTER - mov TI_addr_limit(%_ASM_BX),%_ASM_BX + mov TASK_addr_limit(%_ASM_BX),%_ASM_BX sub $3,%_ASM_BX cmp %_ASM_BX,%_ASM_CX jae bad_put_user @@ -70,7 +70,7 @@ ENDPROC(__put_user_4) ENTRY(__put_user_8) ENTER - mov TI_addr_limit(%_ASM_BX),%_ASM_BX + mov TASK_addr_limit(%_ASM_BX),%_ASM_BX sub $7,%_ASM_BX cmp %_ASM_BX,%_ASM_CX jae bad_put_user diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index 0a42327a59d7..9f760cdcaf40 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -6,7 +6,7 @@ * Copyright 2002 Andi Kleen */ #include -#include +#include /* * Zero Userspace diff --git a/drivers/pnp/isapnp/proc.c b/drivers/pnp/isapnp/proc.c index 5edee645d890..262285e48a09 100644 --- a/drivers/pnp/isapnp/proc.c +++ b/drivers/pnp/isapnp/proc.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include extern struct pnp_protocol isapnp_protocol; diff --git a/lib/bitmap.c b/lib/bitmap.c index c66da508cbf7..eca88087fa8a 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -14,9 +14,9 @@ #include #include #include +#include #include -#include /* * bitmaps provide an array of bits, implemented using an an -- cgit v1.2.3 From fb59831b496a5bb7d0a06c7e702d88d1757edfca Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 14 Jul 2016 13:22:58 -0700 Subject: x86/smp: Remove stack_smp_processor_id() It serves no purpose -- raw_smp_processor_id() works fine. This change will be needed to move thread_info off the stack. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/a2bf4f07fbc30fb32f9f7f3f8f94ad3580823847.1468527351.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpu.h | 1 - arch/x86/include/asm/smp.h | 6 ------ arch/x86/kernel/cpu/common.c | 2 +- 3 files changed, 1 insertion(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 678637ad7476..59d34c521d96 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -17,7 +17,6 @@ static inline void prefill_possible_map(void) {} #define cpu_physical_id(cpu) boot_cpu_physical_apicid #define safe_smp_processor_id() 0 -#define stack_smp_processor_id() 0 #endif /* CONFIG_SMP */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 66b057306f40..0576b6157f3a 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -172,12 +172,6 @@ extern int safe_smp_processor_id(void); #elif defined(CONFIG_X86_64_SMP) #define raw_smp_processor_id() (this_cpu_read(cpu_number)) -#define stack_smp_processor_id() \ -({ \ - struct thread_info *ti; \ - __asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK)); \ - ti->cpu; \ -}) #define safe_smp_processor_id() smp_processor_id() #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 0fe6953f421c..d22a7b9c4f0e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1452,7 +1452,7 @@ void cpu_init(void) struct task_struct *me; struct tss_struct *t; unsigned long v; - int cpu = stack_smp_processor_id(); + int cpu = raw_smp_processor_id(); int i; wait_for_master_cpu(cpu); -- cgit v1.2.3 From eb43e8f85fffc1ba535e0362a872101dfe48abe3 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 14 Jul 2016 13:22:59 -0700 Subject: x86/smp: Remove unnecessary initialization of thread_info::cpu It's statically initialized to zero -- no need to dynamically initialize it to zero as well. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/6cf6314dce3051371a913ee19d1b88e29c68c560.1468527351.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index fafe8b923cac..0e91dbeca2fd 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1285,7 +1285,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) cpumask_copy(cpu_callin_mask, cpumask_of(0)); mb(); - current_thread_info()->cpu = 0; /* needed? */ for_each_possible_cpu(i) { zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); -- cgit v1.2.3 From 3ebfd81f7fb3e81a754e37283b7f38c62244641a Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Thu, 14 Jul 2016 12:31:53 -0700 Subject: x86/syscalls: Add compat_sys_preadv64v2/compat_sys_pwritev64v2 Don't use the same syscall numbers for 2 different syscalls: 534 x32 preadv compat_sys_preadv64 535 x32 pwritev compat_sys_pwritev64 534 x32 preadv2 compat_sys_preadv2 535 x32 pwritev2 compat_sys_pwritev2 Add compat_sys_preadv64v2() and compat_sys_pwritev64v2() so that 64-bit offset is passed in one 64-bit register on x32, similar to compat_sys_preadv64() and compat_sys_pwritev64(). Signed-off-by: H.J. Lu Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Christoph Hellwig Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/CAMe9rOovCMf-RQfx_n1U_Tu_DX1BYkjtFr%3DQ4-_PFVSj9BCzUA@mail.gmail.com Signed-off-by: Ingo Molnar --- arch/x86/entry/syscalls/syscall_64.tbl | 4 ++-- arch/x86/include/asm/unistd.h | 2 ++ fs/read_write.c | 18 ++++++++++++++++++ 3 files changed, 22 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 555263e385c9..e9ce9c7c39b4 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -374,5 +374,5 @@ 543 x32 io_setup compat_sys_io_setup 544 x32 io_submit compat_sys_io_submit 545 x32 execveat compat_sys_execveat/ptregs -534 x32 preadv2 compat_sys_preadv2 -535 x32 pwritev2 compat_sys_pwritev2 +546 x32 preadv2 compat_sys_preadv64v2 +547 x32 pwritev2 compat_sys_pwritev64v2 diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index 2b19caa4081c..32712a925f26 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h @@ -26,6 +26,8 @@ # define __ARCH_WANT_COMPAT_SYS_GETDENTS64 # define __ARCH_WANT_COMPAT_SYS_PREADV64 # define __ARCH_WANT_COMPAT_SYS_PWRITEV64 +# define __ARCH_WANT_COMPAT_SYS_PREADV64V2 +# define __ARCH_WANT_COMPAT_SYS_PWRITEV64V2 # endif diff --git a/fs/read_write.c b/fs/read_write.c index 933b53a375b4..66215a7b17cf 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1168,6 +1168,15 @@ COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd, return do_compat_preadv64(fd, vec, vlen, pos, 0); } +#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2 +COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd, + const struct compat_iovec __user *,vec, + unsigned long, vlen, loff_t, pos, int, flags) +{ + return do_compat_preadv64(fd, vec, vlen, pos, flags); +} +#endif + COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd, const struct compat_iovec __user *,vec, compat_ulong_t, vlen, u32, pos_low, u32, pos_high, @@ -1265,6 +1274,15 @@ COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd, return do_compat_pwritev64(fd, vec, vlen, pos, 0); } +#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2 +COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd, + const struct compat_iovec __user *,vec, + unsigned long, vlen, loff_t, pos, int, flags) +{ + return do_compat_pwritev64(fd, vec, vlen, pos, flags); +} +#endif + COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd, const struct compat_iovec __user *,vec, compat_ulong_t, vlen, u32, pos_low, u32, pos_high, int, flags) -- cgit v1.2.3 From c48ec42d6eae08f55685ab660f0743ed33b9f22a Mon Sep 17 00:00:00 2001 From: Wei Jiangang Date: Fri, 15 Jul 2016 16:12:10 +0800 Subject: x86/tsc: Remove the unused check_tsc_disabled() check_tsc_disabled() was introduced by commit: c73deb6aecda ("perf/x86: Add ability to calculate TSC from perf sample timestamps") The only caller was arch_perf_update_userpage(), which had been refactored by commit: d8b11a0cbd1c ("perf/x86: Clean up cap_user_time* setting") ... so no need keep and export it any more. Signed-off-by: Wei Jiangang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: a.p.zijlstra@chello.nl Cc: adrian.hunter@intel.com Cc: bp@suse.de Link: http://lkml.kernel.org/r/1468570330-25810-1-git-send-email-weijg.fnst@cn.fujitsu.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/tsc.h | 1 - arch/x86/kernel/tsc.c | 6 ------ 2 files changed, 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index a30591e1567c..33b6365c22fe 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -35,7 +35,6 @@ extern void tsc_init(void); extern void mark_tsc_unstable(char *reason); extern int unsynchronized_tsc(void); extern int check_tsc_unstable(void); -extern int check_tsc_disabled(void); extern unsigned long native_calibrate_cpu(void); extern unsigned long native_calibrate_tsc(void); extern unsigned long long native_sched_clock_from_tsc(u64 tsc); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 2a952fcb1516..a804b5ab32d0 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -335,12 +335,6 @@ int check_tsc_unstable(void) } EXPORT_SYMBOL_GPL(check_tsc_unstable); -int check_tsc_disabled(void) -{ - return tsc_disabled; -} -EXPORT_SYMBOL_GPL(check_tsc_disabled); - #ifdef CONFIG_X86_TSC int __init notsc_setup(char *str) { -- cgit v1.2.3 From 102bb9fef68a21f357dc813d4792666c8295bc35 Mon Sep 17 00:00:00 2001 From: Wei Jiangang Date: Thu, 14 Jul 2016 10:24:06 +0800 Subject: x86/apic: Remove the unused struct apic::apic_id_mask field The only user verify_local_APIC() had been removed by commit: 4399c03c6780 ("x86/apic: Remove verify_local_APIC()") ... so there is no need to keep it. Signed-off-by: Wei Jiangang Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: boris.ostrovsky@oracle.com Cc: bsd@redhat.com Cc: david.vrabel@citrix.com Cc: jgross@suse.com Cc: konrad.wilk@oracle.com Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1468463046-20849-1-git-send-email-weijg.fnst@cn.fujitsu.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 1 - arch/x86/kernel/apic/apic_flat_64.c | 2 -- arch/x86/kernel/apic/apic_noop.c | 1 - arch/x86/kernel/apic/apic_numachip.c | 2 -- arch/x86/kernel/apic/bigsmp_32.c | 1 - arch/x86/kernel/apic/probe_32.c | 1 - arch/x86/kernel/apic/x2apic_cluster.c | 1 - arch/x86/kernel/apic/x2apic_phys.c | 1 - arch/x86/kernel/apic/x2apic_uv_x.c | 1 - arch/x86/xen/apic.c | 1 - 10 files changed, 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index bc27611fa58f..f5befd4945f2 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -300,7 +300,6 @@ struct apic { unsigned int (*get_apic_id)(unsigned long x); unsigned long (*set_apic_id)(unsigned int id); - unsigned long apic_id_mask; int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, const struct cpumask *andmask, diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 76f89e2b245a..048747778d37 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -181,7 +181,6 @@ static struct apic apic_flat = { .get_apic_id = flat_get_apic_id, .set_apic_id = set_apic_id, - .apic_id_mask = 0xFFu << 24, .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, @@ -278,7 +277,6 @@ static struct apic apic_physflat = { .get_apic_id = flat_get_apic_id, .set_apic_id = set_apic_id, - .apic_id_mask = 0xFFu << 24, .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 13d19ed58514..2cebf59092d8 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -141,7 +141,6 @@ struct apic apic_noop = { .get_apic_id = noop_get_apic_id, .set_apic_id = NULL, - .apic_id_mask = 0x0F << 24, .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index ab5c2c685a3c..714d4fda0d52 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -269,7 +269,6 @@ static const struct apic apic_numachip1 __refconst = { .get_apic_id = numachip1_get_apic_id, .set_apic_id = numachip1_set_apic_id, - .apic_id_mask = 0xffU << 24, .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, @@ -321,7 +320,6 @@ static const struct apic apic_numachip2 __refconst = { .get_apic_id = numachip2_get_apic_id, .set_apic_id = numachip2_set_apic_id, - .apic_id_mask = 0xffU << 24, .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index cf9bd896c12d..06dbaa458bfe 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -171,7 +171,6 @@ static struct apic apic_bigsmp = { .get_apic_id = bigsmp_get_apic_id, .set_apic_id = NULL, - .apic_id_mask = 0xFF << 24, .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index f316e34abb42..93edfa01b408 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -101,7 +101,6 @@ static struct apic apic_default = { .get_apic_id = default_get_apic_id, .set_apic_id = NULL, - .apic_id_mask = 0x0F << 24, .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index aca8b75c1552..24170d0809ba 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -270,7 +270,6 @@ static struct apic apic_x2apic_cluster = { .get_apic_id = x2apic_get_apic_id, .set_apic_id = x2apic_set_apic_id, - .apic_id_mask = 0xFFFFFFFFu, .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index a1242e2c12e6..4f13f54f1b1f 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -126,7 +126,6 @@ static struct apic apic_x2apic_phys = { .get_apic_id = x2apic_get_apic_id, .set_apic_id = x2apic_set_apic_id, - .apic_id_mask = 0xFFFFFFFFu, .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 29003154fafd..5a58c917179c 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -582,7 +582,6 @@ static struct apic __refdata apic_x2apic_uv_x = { .get_apic_id = x2apic_get_apic_id, .set_apic_id = set_apic_id, - .apic_id_mask = 0xFFFFFFFFu, .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and, diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index db52a7fafcc2..44c88ad1841a 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -177,7 +177,6 @@ static struct apic xen_pv_apic = { .get_apic_id = xen_get_apic_id, .set_apic_id = xen_set_apic_id, /* Can be NULL on 32-bit. */ - .apic_id_mask = 0xFF << 24, /* Used by verify_local_APIC. Match with what xen_get_apic_id does. */ .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, -- cgit v1.2.3 From 6bd80f372371a7b3f5ff13e4e8a560066299c001 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 6 Jun 2016 09:43:00 +0200 Subject: m68k/defconfig: Update defconfigs for v4.7-rc2 Signed-off-by: Geert Uytterhoeven --- arch/m68k/configs/amiga_defconfig | 4 ++++ arch/m68k/configs/apollo_defconfig | 4 ++++ arch/m68k/configs/atari_defconfig | 4 ++++ arch/m68k/configs/bvme6000_defconfig | 4 ++++ arch/m68k/configs/hp300_defconfig | 4 ++++ arch/m68k/configs/mac_defconfig | 4 ++++ arch/m68k/configs/multi_defconfig | 4 ++++ arch/m68k/configs/mvme147_defconfig | 4 ++++ arch/m68k/configs/mvme16x_defconfig | 4 ++++ arch/m68k/configs/q40_defconfig | 4 ++++ arch/m68k/configs/sun3_defconfig | 4 ++++ arch/m68k/configs/sun3x_defconfig | 4 ++++ 12 files changed, 48 insertions(+) (limited to 'arch') diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index 3ee6976f6088..8f5b6f7dd136 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y @@ -359,6 +360,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y @@ -553,7 +555,9 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m +CONFIG_TEST_UUID=m CONFIG_TEST_RHASHTABLE=m +CONFIG_TEST_HASH=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index e96787ffcbce..31bded9c83d4 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y @@ -341,6 +342,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y @@ -512,7 +514,9 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m +CONFIG_TEST_UUID=m CONFIG_TEST_RHASHTABLE=m +CONFIG_TEST_HASH=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 083fe6beac14..0d7739e04ae2 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y @@ -350,6 +351,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y @@ -533,7 +535,9 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m +CONFIG_TEST_UUID=m CONFIG_TEST_RHASHTABLE=m +CONFIG_TEST_HASH=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index 475130c06dcb..2cbb5c465fec 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y @@ -340,6 +341,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y @@ -504,7 +506,9 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m +CONFIG_TEST_UUID=m CONFIG_TEST_RHASHTABLE=m +CONFIG_TEST_HASH=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index 4339658c200f..96102a42c156 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y @@ -341,6 +342,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y @@ -514,7 +516,9 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m +CONFIG_TEST_UUID=m CONFIG_TEST_RHASHTABLE=m +CONFIG_TEST_HASH=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 831cc8c3a2e2..97d88f7dc5a7 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y @@ -357,6 +358,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y @@ -536,7 +538,9 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m +CONFIG_TEST_UUID=m CONFIG_TEST_RHASHTABLE=m +CONFIG_TEST_HASH=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 6377afeb522b..be25ef208f0f 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y @@ -390,6 +391,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y @@ -616,7 +618,9 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m +CONFIG_TEST_UUID=m CONFIG_TEST_RHASHTABLE=m +CONFIG_TEST_HASH=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 4304b3d56262..a008344360c9 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y @@ -339,6 +340,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y @@ -504,7 +506,9 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m +CONFIG_TEST_UUID=m CONFIG_TEST_RHASHTABLE=m +CONFIG_TEST_HASH=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index 074bda4094ff..6735a25f36d4 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y @@ -340,6 +341,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y @@ -504,7 +506,9 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m +CONFIG_TEST_UUID=m CONFIG_TEST_RHASHTABLE=m +CONFIG_TEST_HASH=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index 07b9fa8d7f2e..780c6e9f6cf9 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y @@ -346,6 +347,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y @@ -527,7 +529,9 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m +CONFIG_TEST_UUID=m CONFIG_TEST_RHASHTABLE=m +CONFIG_TEST_HASH=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 36e6fae02d45..44693cf361e5 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y @@ -337,6 +338,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y @@ -506,7 +508,9 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m +CONFIG_TEST_UUID=m CONFIG_TEST_RHASHTABLE=m +CONFIG_TEST_HASH=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index 903acf929511..ef0071d61158 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y @@ -337,6 +338,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y @@ -506,7 +508,9 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m +CONFIG_TEST_UUID=m CONFIG_TEST_RHASHTABLE=m +CONFIG_TEST_HASH=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m -- cgit v1.2.3 From 6f6ef07f412c5bfc37cde57e94b1fec789471907 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 20 Jul 2016 11:30:34 +0300 Subject: x86/insn: perf tools: Fix vcvtph2ps instruction decoding vcvtph2ps does not have an immediate operand, so remove the erroneous 'Ib' from its opcode map entry. Add vcvtph2ps to the perf tools new instructions test to verify it. Signed-off-by: Adrian Hunter Acked-by: Ingo Molnar Acked-by: Masami Hiramatsu Cc: Andy Lutomirski Cc: Dan Williams Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Thomas Gleixner Cc: X86 ML Link: http://lkml.kernel.org/r/1469003437-32706-2-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- arch/x86/lib/x86-opcode-map.txt | 2 +- tools/perf/arch/x86/tests/insn-x86-dat-32.c | 10 ++++++---- tools/perf/arch/x86/tests/insn-x86-dat-64.c | 10 ++++++---- tools/perf/arch/x86/tests/insn-x86-dat-src.c | 4 ++++ tools/perf/util/intel-pt-decoder/x86-opcode-map.txt | 2 +- 5 files changed, 18 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index d388de72eaca..28082de46f0d 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -629,7 +629,7 @@ AVXcode: 2 10: pblendvb Vdq,Wdq (66) 11: 12: -13: vcvtph2ps Vx,Wx,Ib (66),(v) +13: vcvtph2ps Vx,Wx (66),(v) 14: blendvps Vdq,Wdq (66) 15: blendvpd Vdq,Wdq (66) 16: vpermps Vqq,Hqq,Wqq (66),(v) diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-32.c b/tools/perf/arch/x86/tests/insn-x86-dat-32.c index 3b491cfe204e..ca08e6eb988f 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-32.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-32.c @@ -6,6 +6,8 @@ {{0x0f, 0x31, }, 2, 0, "", "", "0f 31 \trdtsc ",}, +{{0xc4, 0xe2, 0x7d, 0x13, 0xeb, }, 5, 0, "", "", +"c4 e2 7d 13 eb \tvcvtph2ps %xmm3,%ymm5",}, {{0xf3, 0x0f, 0x1b, 0x00, }, 4, 0, "", "", "f3 0f 1b 00 \tbndmk (%eax),%bnd0",}, {{0xf3, 0x0f, 0x1b, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", @@ -309,19 +311,19 @@ {{0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", "0f 1b 84 08 78 56 34 12 \tbndstx %bnd0,0x12345678(%eax,%ecx,1)",}, {{0xf2, 0xe8, 0xfc, 0xff, 0xff, 0xff, }, 6, 0xfffffffc, "call", "unconditional", -"f2 e8 fc ff ff ff \tbnd call 3c3 ",}, +"f2 e8 fc ff ff ff \tbnd call 3c8 ",}, {{0xf2, 0xff, 0x10, }, 3, 0, "call", "indirect", "f2 ff 10 \tbnd call *(%eax)",}, {{0xf2, 0xc3, }, 2, 0, "ret", "indirect", "f2 c3 \tbnd ret ",}, {{0xf2, 0xe9, 0xfc, 0xff, 0xff, 0xff, }, 6, 0xfffffffc, "jmp", "unconditional", -"f2 e9 fc ff ff ff \tbnd jmp 3ce ",}, +"f2 e9 fc ff ff ff \tbnd jmp 3d3 ",}, {{0xf2, 0xe9, 0xfc, 0xff, 0xff, 0xff, }, 6, 0xfffffffc, "jmp", "unconditional", -"f2 e9 fc ff ff ff \tbnd jmp 3d4 ",}, +"f2 e9 fc ff ff ff \tbnd jmp 3d9 ",}, {{0xf2, 0xff, 0x21, }, 3, 0, "jmp", "indirect", "f2 ff 21 \tbnd jmp *(%ecx)",}, {{0xf2, 0x0f, 0x85, 0xfc, 0xff, 0xff, 0xff, }, 7, 0xfffffffc, "jcc", "conditional", -"f2 0f 85 fc ff ff ff \tbnd jne 3de ",}, +"f2 0f 85 fc ff ff ff \tbnd jne 3e3 ",}, {{0x0f, 0x3a, 0xcc, 0xc1, 0x00, }, 5, 0, "", "", "0f 3a cc c1 00 \tsha1rnds4 $0x0,%xmm1,%xmm0",}, {{0x0f, 0x3a, 0xcc, 0xd7, 0x91, }, 5, 0, "", "", diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-64.c b/tools/perf/arch/x86/tests/insn-x86-dat-64.c index 4fe7cce179c4..262d9d25a6fa 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-64.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-64.c @@ -6,6 +6,8 @@ {{0x0f, 0x31, }, 2, 0, "", "", "0f 31 \trdtsc ",}, +{{0xc4, 0xe2, 0x7d, 0x13, 0xeb, }, 5, 0, "", "", +"c4 e2 7d 13 eb \tvcvtph2ps %xmm3,%ymm5",}, {{0xf3, 0x0f, 0x1b, 0x00, }, 4, 0, "", "", "f3 0f 1b 00 \tbndmk (%rax),%bnd0",}, {{0xf3, 0x41, 0x0f, 0x1b, 0x00, }, 5, 0, "", "", @@ -325,19 +327,19 @@ {{0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", "0f 1b 84 08 78 56 34 12 \tbndstx %bnd0,0x12345678(%rax,%rcx,1)",}, {{0xf2, 0xe8, 0x00, 0x00, 0x00, 0x00, }, 6, 0, "call", "unconditional", -"f2 e8 00 00 00 00 \tbnd callq 3f6 ",}, +"f2 e8 00 00 00 00 \tbnd callq 3fb ",}, {{0x67, 0xf2, 0xff, 0x10, }, 4, 0, "call", "indirect", "67 f2 ff 10 \tbnd callq *(%eax)",}, {{0xf2, 0xc3, }, 2, 0, "ret", "indirect", "f2 c3 \tbnd retq ",}, {{0xf2, 0xe9, 0x00, 0x00, 0x00, 0x00, }, 6, 0, "jmp", "unconditional", -"f2 e9 00 00 00 00 \tbnd jmpq 402 ",}, +"f2 e9 00 00 00 00 \tbnd jmpq 407 ",}, {{0xf2, 0xe9, 0x00, 0x00, 0x00, 0x00, }, 6, 0, "jmp", "unconditional", -"f2 e9 00 00 00 00 \tbnd jmpq 408 ",}, +"f2 e9 00 00 00 00 \tbnd jmpq 40d ",}, {{0x67, 0xf2, 0xff, 0x21, }, 4, 0, "jmp", "indirect", "67 f2 ff 21 \tbnd jmpq *(%ecx)",}, {{0xf2, 0x0f, 0x85, 0x00, 0x00, 0x00, 0x00, }, 7, 0, "jcc", "conditional", -"f2 0f 85 00 00 00 00 \tbnd jne 413 ",}, +"f2 0f 85 00 00 00 00 \tbnd jne 418 ",}, {{0x0f, 0x3a, 0xcc, 0xc1, 0x00, }, 5, 0, "", "", "0f 3a cc c1 00 \tsha1rnds4 $0x0,%xmm1,%xmm0",}, {{0x0f, 0x3a, 0xcc, 0xd7, 0x91, }, 5, 0, "", "", diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-src.c b/tools/perf/arch/x86/tests/insn-x86-dat-src.c index 41b1b1c62660..3cd677513e9e 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-src.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-src.c @@ -19,6 +19,10 @@ int main(void) /* Following line is a marker for the awk script - do not change */ asm volatile("rdtsc"); /* Start here */ + /* Test fix for vcvtph2ps in x86-opcode-map.txt */ + + asm volatile("vcvtph2ps %xmm3,%ymm5"); + #ifdef __x86_64__ /* bndmk m64, bnd */ diff --git a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt index d388de72eaca..28082de46f0d 100644 --- a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt +++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt @@ -629,7 +629,7 @@ AVXcode: 2 10: pblendvb Vdq,Wdq (66) 11: 12: -13: vcvtph2ps Vx,Wx,Ib (66),(v) +13: vcvtph2ps Vx,Wx (66),(v) 14: blendvps Vdq,Wdq (66) 15: blendvpd Vdq,Wdq (66) 16: vpermps Vqq,Hqq,Wqq (66),(v) -- cgit v1.2.3 From edce21216a8887bf06ba85ee49a00695e44c4341 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 21 Jul 2016 09:53:52 +0200 Subject: x86/boot: Reorganize and clean up the BIOS area reservation code So the reserve_ebda_region() code has accumulated a number of problems over the years that make it really difficult to read and understand: - The calculation of 'lowmem' and 'ebda_addr' is an unnecessarily interleaved mess of first lowmem, then ebda_addr, then lowmem tweaks... - 'lowmem' here means 'super low mem' - i.e. 16-bit addressable memory. In other parts of the x86 code 'lowmem' means 32-bit addressable memory... This makes it super confusing to read. - It does not help at all that we have various memory range markers, half of which are 'start of range', half of which are 'end of range' - but this crucial property is not obvious in the naming at all ... gave me a headache trying to understand all this. - Also, the 'ebda_addr' name sucks: it highlights that it's an address (which is obvious, all values here are addresses!), while it does not highlight that it's the _start_ of the EBDA region ... - 'BIOS_LOWMEM_KILOBYTES' says a lot of things, except that this is the only value that is a pointer to a value, not a memory range address! - The function name itself is a misnomer: it says 'reserve_ebda_region()' while its main purpose is to reserve all the firmware ROM typically between 640K and 1MB, while the 'EBDA' part is only a small part of that ... - Likewise, the paravirt quirk flag name 'ebda_search' is misleading as well: this too should be about whether to reserve firmware areas in the paravirt case. - In fact thinking about this as 'end of RAM' is confusing: what this function *really* wants to reserve is firmware data and code areas! Once the thinking is inverted from a mixed 'ram' and 'reserved firmware area' notion to a pure 'reserved area' notion everything becomes a lot clearer. To improve all this rewrite the whole code (without changing the logic): - Firstly invert the naming from 'lowmem end' to 'BIOS reserved area start' and propagate this concept through all the variable names and constants. BIOS_RAM_SIZE_KB_PTR // was: BIOS_LOWMEM_KILOBYTES BIOS_START_MIN // was: INSANE_CUTOFF ebda_start // was: ebda_addr bios_start // was: lowmem BIOS_START_MAX // was: LOWMEM_CAP - Then clean up the name of the function itself by renaming it to reserve_bios_regions() and renaming the ::ebda_search paravirt flag to ::reserve_bios_regions. - Fix up all the comments (fix typos), harmonize and simplify their formulation and remove comments that become unnecessary due to the much better naming all around. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bios_ebda.h | 2 +- arch/x86/include/asm/x86_init.h | 4 +- arch/x86/kernel/ebda.c | 124 +++++++++++++++++++++++++------------- arch/x86/kernel/head32.c | 2 +- arch/x86/kernel/head64.c | 2 +- arch/x86/kernel/platform-quirks.c | 4 +- 6 files changed, 88 insertions(+), 50 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/bios_ebda.h b/arch/x86/include/asm/bios_ebda.h index 2b00c776f223..4b7b8e71607e 100644 --- a/arch/x86/include/asm/bios_ebda.h +++ b/arch/x86/include/asm/bios_ebda.h @@ -17,7 +17,7 @@ static inline unsigned int get_bios_ebda(void) return address; /* 0 means none */ } -void reserve_ebda_region(void); +void reserve_bios_regions(void); #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION /* diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 4dcdf74dfed8..c519c052700a 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -168,14 +168,14 @@ struct x86_legacy_devices { * struct x86_legacy_features - legacy x86 features * * @rtc: this device has a CMOS real-time clock present - * @ebda_search: it's safe to search for the EBDA signature in the hardware's + * @reserve_bios_regions: it's safe to search for the EBDA signature in the hardware's * low RAM * @devices: legacy x86 devices, refer to struct x86_legacy_devices * documentation for further details. */ struct x86_legacy_features { int rtc; - int ebda_search; + int reserve_bios_regions; struct x86_legacy_devices devices; }; diff --git a/arch/x86/kernel/ebda.c b/arch/x86/kernel/ebda.c index afe65dffee80..6219eef20e2e 100644 --- a/arch/x86/kernel/ebda.c +++ b/arch/x86/kernel/ebda.c @@ -6,66 +6,104 @@ #include /* + * This function reserves all conventional PC system BIOS related + * firmware memory areas (some of which are data, some of which + * are code), that must not be used by the kernel as available + * RAM. + * * The BIOS places the EBDA/XBDA at the top of conventional * memory, and usually decreases the reported amount of - * conventional memory (int 0x12) too. This also contains a - * workaround for Dell systems that neglect to reserve EBDA. - * The same workaround also avoids a problem with the AMD768MPX - * chipset: reserve a page before VGA to prevent PCI prefetch - * into it (errata #56). Usually the page is reserved anyways, - * unless you have no PS/2 mouse plugged in. + * conventional memory (int 0x12) too. + * + * This means that as a first approximation on most systems we can + * guess the reserved BIOS area by looking at the low BIOS RAM size + * value and assume that everything above that value (up to 1MB) is + * reserved. + * + * But life in firmware country is not that simple: + * + * - This code also contains a quirk for Dell systems that neglect + * to reserve the EBDA area in the 'RAM size' value ... + * + * - The same quirk also avoids a problem with the AMD768MPX + * chipset: reserve a page before VGA to prevent PCI prefetch + * into it (errata #56). (Usually the page is reserved anyways, + * unless you have no PS/2 mouse plugged in.) + * + * - Plus paravirt systems don't have a reliable value in the + * 'BIOS RAM size' pointer we can rely on, so we must quirk + * them too. + * + * Due to those various problems this function is deliberately + * very conservative and tries to err on the side of reserving + * too much, to not risk reserving too little. + * + * Losing a small amount of memory in the bottom megabyte is + * rarely a problem, as long as we have enough memory to install + * the SMP bootup trampoline which *must* be in this area. * - * This functions is deliberately very conservative. Losing - * memory in the bottom megabyte is rarely a problem, as long - * as we have enough memory to install the trampoline. Using - * memory that is in use by the BIOS or by some DMA device - * the BIOS didn't shut down *is* a big problem. + * Using memory that is in use by the BIOS or by some DMA device + * the BIOS didn't shut down *is* a big problem to the kernel, + * obviously. */ -#define BIOS_LOWMEM_KILOBYTES 0x413 -#define LOWMEM_CAP 0x9f000U /* Absolute maximum */ -#define INSANE_CUTOFF 0x20000U /* Less than this = insane */ +#define BIOS_RAM_SIZE_KB_PTR 0x413 -void __init reserve_ebda_region(void) +#define BIOS_START_MIN 0x20000U /* 128K, less than this is insane */ +#define BIOS_START_MAX 0x9f000U /* 640K, absolute maximum */ + +void __init reserve_bios_regions(void) { - unsigned int lowmem, ebda_addr; + unsigned int bios_start, ebda_start; /* - * To determine the position of the EBDA and the - * end of conventional memory, we need to look at - * the BIOS data area. In a paravirtual environment - * that area is absent. We'll just have to assume - * that the paravirt case can handle memory setup - * correctly, without our help. + * NOTE: In a paravirtual environment the BIOS reserved + * area is absent. We'll just have to assume that the + * paravirt case can handle memory setup correctly, + * without our help. */ - if (!x86_platform.legacy.ebda_search) + if (!x86_platform.legacy.reserve_bios_regions) return; - /* end of low (conventional) memory */ - lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES); - lowmem <<= 10; - - /* start of EBDA area */ - ebda_addr = get_bios_ebda(); + /* Get the start address of the EBDA page: */ + ebda_start = get_bios_ebda(); /* - * Note: some old Dells seem to need 4k EBDA without - * reporting so, so just consider the memory above 0x9f000 - * to be off limits (bugzilla 2990). + * Quirk: some old Dells seem to have a 4k EBDA without + * reporting so in their BIOS RAM size value, so just + * consider the memory above 640K to be off limits + * (bugzilla 2990). + * + * We detect this case by filtering for nonsensical EBDA + * addresses below 128K, where we can assume that they + * are bogus and bump it up to a fixed 640K value: */ + if (ebda_start < BIOS_START_MIN) + ebda_start = BIOS_START_MAX; - /* If the EBDA address is below 128K, assume it is bogus */ - if (ebda_addr < INSANE_CUTOFF) - ebda_addr = LOWMEM_CAP; + /* + * BIOS RAM size is encoded in kilobytes, convert it + * to bytes to get a first guess at where the BIOS + * firmware area starts: + */ + bios_start = *(unsigned short *)__va(BIOS_RAM_SIZE_KB_PTR); + bios_start <<= 10; - /* If lowmem is less than 128K, assume it is bogus */ - if (lowmem < INSANE_CUTOFF) - lowmem = LOWMEM_CAP; + /* + * If bios_start is less than 128K, assume it is bogus + * and bump it up to 640K: + */ + if (bios_start < BIOS_START_MIN) + bios_start = BIOS_START_MAX; - /* Use the lower of the lowmem and EBDA markers as the cutoff */ - lowmem = min(lowmem, ebda_addr); - lowmem = min(lowmem, LOWMEM_CAP); /* Absolute cap */ + /* + * Use the lower of the bios_start and ebda_start + * as the starting point, but don't allow it to + * go beyond 640K: + */ + bios_start = min(bios_start, ebda_start); + bios_start = min(bios_start, BIOS_START_MAX); - /* reserve all memory between lowmem and the 1MB mark */ - memblock_reserve(lowmem, 0x100000 - lowmem); + /* Reserve all memory between bios_start and the 1MB mark: */ + memblock_reserve(bios_start, 0x100000 - bios_start); } diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index d784bb547a9d..2dda0bc4576e 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -26,7 +26,7 @@ static void __init i386_default_early_setup(void) x86_init.resources.reserve_resources = i386_reserve_resources; x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc; - reserve_ebda_region(); + reserve_bios_regions(); } asmlinkage __visible void __init i386_start_kernel(void) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index b72fb0b71dd1..99d48e7d2974 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -183,7 +183,7 @@ void __init x86_64_start_reservations(char *real_mode_data) copy_bootdata(__va(real_mode_data)); x86_early_init_platform_quirks(); - reserve_ebda_region(); + reserve_bios_regions(); switch (boot_params.hdr.hardware_subarch) { case X86_SUBARCH_INTEL_MID: diff --git a/arch/x86/kernel/platform-quirks.c b/arch/x86/kernel/platform-quirks.c index b2f8a33b36ff..24a50301f150 100644 --- a/arch/x86/kernel/platform-quirks.c +++ b/arch/x86/kernel/platform-quirks.c @@ -7,12 +7,12 @@ void __init x86_early_init_platform_quirks(void) { x86_platform.legacy.rtc = 1; - x86_platform.legacy.ebda_search = 0; + x86_platform.legacy.reserve_bios_regions = 0; x86_platform.legacy.devices.pnpbios = 1; switch (boot_params.hdr.hardware_subarch) { case X86_SUBARCH_PC: - x86_platform.legacy.ebda_search = 1; + x86_platform.legacy.reserve_bios_regions = 1; break; case X86_SUBARCH_XEN: case X86_SUBARCH_LGUEST: -- cgit v1.2.3 From 25af37f4e1e0a747824e3713b80d6b97dad28b7c Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 20 Jul 2016 11:30:35 +0300 Subject: x86/insn: Add AVX-512 support to the instruction decoder Add support for Intel's AVX-512 instructions to the instruction decoder. AVX-512 instructions are documented in Intel Architecture Instruction Set Extensions Programming Reference (February 2016). AVX-512 instructions are identified by a EVEX prefix which, for the purpose of instruction decoding, can be treated as though it were a 4-byte VEX prefix. Existing instructions which can now accept an EVEX prefix need not be further annotated in the op code map (x86-opcode-map.txt). In the case of new instructions, the op code map is updated accordingly. Also add associated Mask Instructions that are used to manipulate mask registers used in AVX-512 instructions. The 'perf tools' instruction decoder is updated in a subsequent patch. And a representative set of instructions is added to the perf tools new instructions test in a subsequent patch. Signed-off-by: Adrian Hunter Acked-by: Ingo Molnar Acked-by: Masami Hiramatsu Cc: Andy Lutomirski Cc: Dan Williams Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Thomas Gleixner Cc: X86 ML Link: http://lkml.kernel.org/r/1469003437-32706-3-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- arch/x86/include/asm/inat.h | 17 ++- arch/x86/include/asm/insn.h | 12 +- arch/x86/lib/insn.c | 18 ++- arch/x86/lib/x86-opcode-map.txt | 263 +++++++++++++++++++++++------------ arch/x86/tools/gen-insn-attr-x86.awk | 11 +- 5 files changed, 220 insertions(+), 101 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h index 74a2e312e8a2..02aff0867211 100644 --- a/arch/x86/include/asm/inat.h +++ b/arch/x86/include/asm/inat.h @@ -48,6 +48,7 @@ /* AVX VEX prefixes */ #define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */ #define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */ +#define INAT_PFX_EVEX 15 /* EVEX prefix */ #define INAT_LSTPFX_MAX 3 #define INAT_LGCPFX_MAX 11 @@ -89,6 +90,7 @@ #define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4)) #define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5)) #define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6)) +#define INAT_EVEXONLY (1 << (INAT_FLAG_OFFS + 7)) /* Attribute making macros for attribute tables */ #define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) #define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) @@ -141,7 +143,13 @@ static inline int inat_last_prefix_id(insn_attr_t attr) static inline int inat_is_vex_prefix(insn_attr_t attr) { attr &= INAT_PFX_MASK; - return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3; + return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3 || + attr == INAT_PFX_EVEX; +} + +static inline int inat_is_evex_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_EVEX; } static inline int inat_is_vex3_prefix(insn_attr_t attr) @@ -216,6 +224,11 @@ static inline int inat_accept_vex(insn_attr_t attr) static inline int inat_must_vex(insn_attr_t attr) { - return attr & INAT_VEXONLY; + return attr & (INAT_VEXONLY | INAT_EVEXONLY); +} + +static inline int inat_must_evex(insn_attr_t attr) +{ + return attr & INAT_EVEXONLY; } #endif diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index e7814b74caf8..b3e32b010ab1 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -91,6 +91,7 @@ struct insn { #define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ #define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ /* VEX bit fields */ +#define X86_EVEX_M(vex) ((vex) & 0x03) /* EVEX Byte1 */ #define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ #define X86_VEX2_M 1 /* VEX2.M always 1 */ #define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ @@ -133,6 +134,13 @@ static inline int insn_is_avx(struct insn *insn) return (insn->vex_prefix.value != 0); } +static inline int insn_is_evex(struct insn *insn) +{ + if (!insn->prefixes.got) + insn_get_prefixes(insn); + return (insn->vex_prefix.nbytes == 4); +} + /* Ensure this instruction is decoded completely */ static inline int insn_complete(struct insn *insn) { @@ -144,8 +152,10 @@ static inline insn_byte_t insn_vex_m_bits(struct insn *insn) { if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ return X86_VEX2_M; - else + else if (insn->vex_prefix.nbytes == 3) /* 3 bytes VEX */ return X86_VEX3_M(insn->vex_prefix.bytes[1]); + else /* EVEX */ + return X86_EVEX_M(insn->vex_prefix.bytes[1]); } static inline insn_byte_t insn_vex_p_bits(struct insn *insn) diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 1a416935bac9..1088eb8f3a5f 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c @@ -155,14 +155,24 @@ found: /* * In 32-bits mode, if the [7:6] bits (mod bits of * ModRM) on the second byte are not 11b, it is - * LDS or LES. + * LDS or LES or BOUND. */ if (X86_MODRM_MOD(b2) != 3) goto vex_end; } insn->vex_prefix.bytes[0] = b; insn->vex_prefix.bytes[1] = b2; - if (inat_is_vex3_prefix(attr)) { + if (inat_is_evex_prefix(attr)) { + b2 = peek_nbyte_next(insn_byte_t, insn, 2); + insn->vex_prefix.bytes[2] = b2; + b2 = peek_nbyte_next(insn_byte_t, insn, 3); + insn->vex_prefix.bytes[3] = b2; + insn->vex_prefix.nbytes = 4; + insn->next_byte += 4; + if (insn->x86_64 && X86_VEX_W(b2)) + /* VEX.W overrides opnd_size */ + insn->opnd_bytes = 8; + } else if (inat_is_vex3_prefix(attr)) { b2 = peek_nbyte_next(insn_byte_t, insn, 2); insn->vex_prefix.bytes[2] = b2; insn->vex_prefix.nbytes = 3; @@ -221,7 +231,9 @@ void insn_get_opcode(struct insn *insn) m = insn_vex_m_bits(insn); p = insn_vex_p_bits(insn); insn->attr = inat_get_avx_attribute(op, m, p); - if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr)) + if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) || + (!inat_accept_vex(insn->attr) && + !inat_is_group(insn->attr))) insn->attr = 0; /* This instruction is bad */ goto end; /* VEX has only 1 byte for opcode */ } diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 28082de46f0d..ec378cd7b71e 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -13,12 +13,17 @@ # opcode: escape # escaped-name # EndTable # +# mnemonics that begin with lowercase 'v' accept a VEX or EVEX prefix +# mnemonics that begin with lowercase 'k' accept a VEX prefix +# # # GrpTable: GrpXXX # reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] # EndTable # # AVX Superscripts +# (ev): this opcode requires EVEX prefix. +# (evo): this opcode is changed by EVEX prefix (EVEX opcode) # (v): this opcode requires VEX prefix. # (v1): this opcode only supports 128bit VEX. # @@ -137,7 +142,7 @@ AVXcode: # 0x60 - 0x6f 60: PUSHA/PUSHAD (i64) 61: POPA/POPAD (i64) -62: BOUND Gv,Ma (i64) +62: BOUND Gv,Ma (i64) | EVEX (Prefix) 63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64) 64: SEG=FS (Prefix) 65: SEG=GS (Prefix) @@ -399,17 +404,17 @@ AVXcode: 1 3f: # 0x0f 0x40-0x4f 40: CMOVO Gv,Ev -41: CMOVNO Gv,Ev -42: CMOVB/C/NAE Gv,Ev +41: CMOVNO Gv,Ev | kandw/q Vk,Hk,Uk | kandb/d Vk,Hk,Uk (66) +42: CMOVB/C/NAE Gv,Ev | kandnw/q Vk,Hk,Uk | kandnb/d Vk,Hk,Uk (66) 43: CMOVAE/NB/NC Gv,Ev -44: CMOVE/Z Gv,Ev -45: CMOVNE/NZ Gv,Ev -46: CMOVBE/NA Gv,Ev -47: CMOVA/NBE Gv,Ev +44: CMOVE/Z Gv,Ev | knotw/q Vk,Uk | knotb/d Vk,Uk (66) +45: CMOVNE/NZ Gv,Ev | korw/q Vk,Hk,Uk | korb/d Vk,Hk,Uk (66) +46: CMOVBE/NA Gv,Ev | kxnorw/q Vk,Hk,Uk | kxnorb/d Vk,Hk,Uk (66) +47: CMOVA/NBE Gv,Ev | kxorw/q Vk,Hk,Uk | kxorb/d Vk,Hk,Uk (66) 48: CMOVS Gv,Ev 49: CMOVNS Gv,Ev -4a: CMOVP/PE Gv,Ev -4b: CMOVNP/PO Gv,Ev +4a: CMOVP/PE Gv,Ev | kaddw/q Vk,Hk,Uk | kaddb/d Vk,Hk,Uk (66) +4b: CMOVNP/PO Gv,Ev | kunpckbw Vk,Hk,Uk (66) | kunpckwd/dq Vk,Hk,Uk 4c: CMOVL/NGE Gv,Ev 4d: CMOVNL/GE Gv,Ev 4e: CMOVLE/NG Gv,Ev @@ -426,7 +431,7 @@ AVXcode: 1 58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1) 59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1) 5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1) -5b: vcvtdq2ps Vps,Wdq | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3) +5b: vcvtdq2ps Vps,Wdq | vcvtqq2ps Vps,Wqq (evo) | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3) 5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1) 5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1) 5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1) @@ -447,7 +452,7 @@ AVXcode: 1 6c: vpunpcklqdq Vx,Hx,Wx (66),(v1) 6d: vpunpckhqdq Vx,Hx,Wx (66),(v1) 6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1) -6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqu Vx,Wx (F3) +6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqa32/64 Vx,Wx (66),(evo) | vmovdqu Vx,Wx (F3) | vmovdqu32/64 Vx,Wx (F3),(evo) | vmovdqu8/16 Vx,Wx (F2),(ev) # 0x0f 0x70-0x7f 70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1) 71: Grp12 (1A) @@ -458,14 +463,14 @@ AVXcode: 1 76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1) # Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX. 77: emms | vzeroupper | vzeroall -78: VMREAD Ey,Gy -79: VMWRITE Gy,Ey -7a: -7b: +78: VMREAD Ey,Gy | vcvttps2udq/pd2udq Vx,Wpd (evo) | vcvttsd2usi Gv,Wx (F2),(ev) | vcvttss2usi Gv,Wx (F3),(ev) | vcvttps2uqq/pd2uqq Vx,Wx (66),(ev) +79: VMWRITE Gy,Ey | vcvtps2udq/pd2udq Vx,Wpd (evo) | vcvtsd2usi Gv,Wx (F2),(ev) | vcvtss2usi Gv,Wx (F3),(ev) | vcvtps2uqq/pd2uqq Vx,Wx (66),(ev) +7a: vcvtudq2pd/uqq2pd Vpd,Wx (F3),(ev) | vcvtudq2ps/uqq2ps Vpd,Wx (F2),(ev) | vcvttps2qq/pd2qq Vx,Wx (66),(ev) +7b: vcvtusi2sd Vpd,Hpd,Ev (F2),(ev) | vcvtusi2ss Vps,Hps,Ev (F3),(ev) | vcvtps2qq/pd2qq Vx,Wx (66),(ev) 7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2) 7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2) 7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1) -7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3) +7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqa32/64 Wx,Vx (66),(evo) | vmovdqu Wx,Vx (F3) | vmovdqu32/64 Wx,Vx (F3),(evo) | vmovdqu8/16 Wx,Vx (F2),(ev) # 0x0f 0x80-0x8f # Note: "forced64" is Intel CPU behavior (see comment about CALL insn). 80: JO Jz (f64) @@ -485,16 +490,16 @@ AVXcode: 1 8e: JLE/JNG Jz (f64) 8f: JNLE/JG Jz (f64) # 0x0f 0x90-0x9f -90: SETO Eb -91: SETNO Eb -92: SETB/C/NAE Eb -93: SETAE/NB/NC Eb +90: SETO Eb | kmovw/q Vk,Wk | kmovb/d Vk,Wk (66) +91: SETNO Eb | kmovw/q Mv,Vk | kmovb/d Mv,Vk (66) +92: SETB/C/NAE Eb | kmovw Vk,Rv | kmovb Vk,Rv (66) | kmovq/d Vk,Rv (F2) +93: SETAE/NB/NC Eb | kmovw Gv,Uk | kmovb Gv,Uk (66) | kmovq/d Gv,Uk (F2) 94: SETE/Z Eb 95: SETNE/NZ Eb 96: SETBE/NA Eb 97: SETA/NBE Eb -98: SETS Eb -99: SETNS Eb +98: SETS Eb | kortestw/q Vk,Uk | kortestb/d Vk,Uk (66) +99: SETNS Eb | ktestw/q Vk,Uk | ktestb/d Vk,Uk (66) 9a: SETP/PE Eb 9b: SETNP/PO Eb 9c: SETL/NGE Eb @@ -564,11 +569,11 @@ d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1) d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1) d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1) da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1) -db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) +db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) | vpandd/q Vx,Hx,Wx (66),(evo) dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1) dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1) de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1) -df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) +df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) | vpandnd/q Vx,Hx,Wx (66),(evo) # 0x0f 0xe0-0xef e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1) e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1) @@ -576,16 +581,16 @@ e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1) e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1) e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1) e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1) -e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtpd2dq Vx,Wpd (F2) +e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtdq2pd/qq2pd Vx,Wdq (F3),(evo) | vcvtpd2dq Vx,Wpd (F2) e7: movntq Mq,Pq | vmovntdq Mx,Vx (66) e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1) e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1) ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1) -eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) +eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) | vpord/q Vx,Hx,Wx (66),(evo) ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1) ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1) ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1) -ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) +ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) | vpxord/q Vx,Hx,Wx (66),(evo) # 0x0f 0xf0-0xff f0: vlddqu Vx,Mx (F2) f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1) @@ -626,81 +631,105 @@ AVXcode: 2 0e: vtestps Vx,Wx (66),(v) 0f: vtestpd Vx,Wx (66),(v) # 0x0f 0x38 0x10-0x1f -10: pblendvb Vdq,Wdq (66) -11: -12: -13: vcvtph2ps Vx,Wx (66),(v) -14: blendvps Vdq,Wdq (66) -15: blendvpd Vdq,Wdq (66) -16: vpermps Vqq,Hqq,Wqq (66),(v) +10: pblendvb Vdq,Wdq (66) | vpsrlvw Vx,Hx,Wx (66),(evo) | vpmovuswb Wx,Vx (F3),(ev) +11: vpmovusdb Wx,Vd (F3),(ev) | vpsravw Vx,Hx,Wx (66),(ev) +12: vpmovusqb Wx,Vq (F3),(ev) | vpsllvw Vx,Hx,Wx (66),(ev) +13: vcvtph2ps Vx,Wx (66),(v) | vpmovusdw Wx,Vd (F3),(ev) +14: blendvps Vdq,Wdq (66) | vpmovusqw Wx,Vq (F3),(ev) | vprorvd/q Vx,Hx,Wx (66),(evo) +15: blendvpd Vdq,Wdq (66) | vpmovusqd Wx,Vq (F3),(ev) | vprolvd/q Vx,Hx,Wx (66),(evo) +16: vpermps Vqq,Hqq,Wqq (66),(v) | vpermps/d Vqq,Hqq,Wqq (66),(evo) 17: vptest Vx,Wx (66) 18: vbroadcastss Vx,Wd (66),(v) -19: vbroadcastsd Vqq,Wq (66),(v) -1a: vbroadcastf128 Vqq,Mdq (66),(v) -1b: +19: vbroadcastsd Vqq,Wq (66),(v) | vbroadcastf32x2 Vqq,Wq (66),(evo) +1a: vbroadcastf128 Vqq,Mdq (66),(v) | vbroadcastf32x4/64x2 Vqq,Wq (66),(evo) +1b: vbroadcastf32x8/64x4 Vqq,Mdq (66),(ev) 1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1) 1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1) 1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1) -1f: +1f: vpabsq Vx,Wx (66),(ev) # 0x0f 0x38 0x20-0x2f -20: vpmovsxbw Vx,Ux/Mq (66),(v1) -21: vpmovsxbd Vx,Ux/Md (66),(v1) -22: vpmovsxbq Vx,Ux/Mw (66),(v1) -23: vpmovsxwd Vx,Ux/Mq (66),(v1) -24: vpmovsxwq Vx,Ux/Md (66),(v1) -25: vpmovsxdq Vx,Ux/Mq (66),(v1) -26: -27: -28: vpmuldq Vx,Hx,Wx (66),(v1) -29: vpcmpeqq Vx,Hx,Wx (66),(v1) -2a: vmovntdqa Vx,Mx (66),(v1) +20: vpmovsxbw Vx,Ux/Mq (66),(v1) | vpmovswb Wx,Vx (F3),(ev) +21: vpmovsxbd Vx,Ux/Md (66),(v1) | vpmovsdb Wx,Vd (F3),(ev) +22: vpmovsxbq Vx,Ux/Mw (66),(v1) | vpmovsqb Wx,Vq (F3),(ev) +23: vpmovsxwd Vx,Ux/Mq (66),(v1) | vpmovsdw Wx,Vd (F3),(ev) +24: vpmovsxwq Vx,Ux/Md (66),(v1) | vpmovsqw Wx,Vq (F3),(ev) +25: vpmovsxdq Vx,Ux/Mq (66),(v1) | vpmovsqd Wx,Vq (F3),(ev) +26: vptestmb/w Vk,Hx,Wx (66),(ev) | vptestnmb/w Vk,Hx,Wx (F3),(ev) +27: vptestmd/q Vk,Hx,Wx (66),(ev) | vptestnmd/q Vk,Hx,Wx (F3),(ev) +28: vpmuldq Vx,Hx,Wx (66),(v1) | vpmovm2b/w Vx,Uk (F3),(ev) +29: vpcmpeqq Vx,Hx,Wx (66),(v1) | vpmovb2m/w2m Vk,Ux (F3),(ev) +2a: vmovntdqa Vx,Mx (66),(v1) | vpbroadcastmb2q Vx,Uk (F3),(ev) 2b: vpackusdw Vx,Hx,Wx (66),(v1) -2c: vmaskmovps Vx,Hx,Mx (66),(v) -2d: vmaskmovpd Vx,Hx,Mx (66),(v) +2c: vmaskmovps Vx,Hx,Mx (66),(v) | vscalefps/d Vx,Hx,Wx (66),(evo) +2d: vmaskmovpd Vx,Hx,Mx (66),(v) | vscalefss/d Vx,Hx,Wx (66),(evo) 2e: vmaskmovps Mx,Hx,Vx (66),(v) 2f: vmaskmovpd Mx,Hx,Vx (66),(v) # 0x0f 0x38 0x30-0x3f -30: vpmovzxbw Vx,Ux/Mq (66),(v1) -31: vpmovzxbd Vx,Ux/Md (66),(v1) -32: vpmovzxbq Vx,Ux/Mw (66),(v1) -33: vpmovzxwd Vx,Ux/Mq (66),(v1) -34: vpmovzxwq Vx,Ux/Md (66),(v1) -35: vpmovzxdq Vx,Ux/Mq (66),(v1) -36: vpermd Vqq,Hqq,Wqq (66),(v) +30: vpmovzxbw Vx,Ux/Mq (66),(v1) | vpmovwb Wx,Vx (F3),(ev) +31: vpmovzxbd Vx,Ux/Md (66),(v1) | vpmovdb Wx,Vd (F3),(ev) +32: vpmovzxbq Vx,Ux/Mw (66),(v1) | vpmovqb Wx,Vq (F3),(ev) +33: vpmovzxwd Vx,Ux/Mq (66),(v1) | vpmovdw Wx,Vd (F3),(ev) +34: vpmovzxwq Vx,Ux/Md (66),(v1) | vpmovqw Wx,Vq (F3),(ev) +35: vpmovzxdq Vx,Ux/Mq (66),(v1) | vpmovqd Wx,Vq (F3),(ev) +36: vpermd Vqq,Hqq,Wqq (66),(v) | vpermd/q Vqq,Hqq,Wqq (66),(evo) 37: vpcmpgtq Vx,Hx,Wx (66),(v1) -38: vpminsb Vx,Hx,Wx (66),(v1) -39: vpminsd Vx,Hx,Wx (66),(v1) -3a: vpminuw Vx,Hx,Wx (66),(v1) -3b: vpminud Vx,Hx,Wx (66),(v1) +38: vpminsb Vx,Hx,Wx (66),(v1) | vpmovm2d/q Vx,Uk (F3),(ev) +39: vpminsd Vx,Hx,Wx (66),(v1) | vpminsd/q Vx,Hx,Wx (66),(evo) | vpmovd2m/q2m Vk,Ux (F3),(ev) +3a: vpminuw Vx,Hx,Wx (66),(v1) | vpbroadcastmw2d Vx,Uk (F3),(ev) +3b: vpminud Vx,Hx,Wx (66),(v1) | vpminud/q Vx,Hx,Wx (66),(evo) 3c: vpmaxsb Vx,Hx,Wx (66),(v1) -3d: vpmaxsd Vx,Hx,Wx (66),(v1) +3d: vpmaxsd Vx,Hx,Wx (66),(v1) | vpmaxsd/q Vx,Hx,Wx (66),(evo) 3e: vpmaxuw Vx,Hx,Wx (66),(v1) -3f: vpmaxud Vx,Hx,Wx (66),(v1) +3f: vpmaxud Vx,Hx,Wx (66),(v1) | vpmaxud/q Vx,Hx,Wx (66),(evo) # 0x0f 0x38 0x40-0x8f -40: vpmulld Vx,Hx,Wx (66),(v1) +40: vpmulld Vx,Hx,Wx (66),(v1) | vpmulld/q Vx,Hx,Wx (66),(evo) 41: vphminposuw Vdq,Wdq (66),(v1) -42: -43: -44: +42: vgetexpps/d Vx,Wx (66),(ev) +43: vgetexpss/d Vx,Hx,Wx (66),(ev) +44: vplzcntd/q Vx,Wx (66),(ev) 45: vpsrlvd/q Vx,Hx,Wx (66),(v) -46: vpsravd Vx,Hx,Wx (66),(v) +46: vpsravd Vx,Hx,Wx (66),(v) | vpsravd/q Vx,Hx,Wx (66),(evo) 47: vpsllvd/q Vx,Hx,Wx (66),(v) -# Skip 0x48-0x57 +# Skip 0x48-0x4b +4c: vrcp14ps/d Vpd,Wpd (66),(ev) +4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev) +4e: vrsqrt14ps/d Vpd,Wpd (66),(ev) +4f: vrsqrt14ss/d Vsd,Hsd,Wsd (66),(ev) +# Skip 0x50-0x57 58: vpbroadcastd Vx,Wx (66),(v) -59: vpbroadcastq Vx,Wx (66),(v) -5a: vbroadcasti128 Vqq,Mdq (66),(v) -# Skip 0x5b-0x77 +59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo) +5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo) +5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev) +# Skip 0x5c-0x63 +64: vpblendmd/q Vx,Hx,Wx (66),(ev) +65: vblendmps/d Vx,Hx,Wx (66),(ev) +66: vpblendmb/w Vx,Hx,Wx (66),(ev) +# Skip 0x67-0x74 +75: vpermi2b/w Vx,Hx,Wx (66),(ev) +76: vpermi2d/q Vx,Hx,Wx (66),(ev) +77: vpermi2ps/d Vx,Hx,Wx (66),(ev) 78: vpbroadcastb Vx,Wx (66),(v) 79: vpbroadcastw Vx,Wx (66),(v) -# Skip 0x7a-0x7f +7a: vpbroadcastb Vx,Rv (66),(ev) +7b: vpbroadcastw Vx,Rv (66),(ev) +7c: vpbroadcastd/q Vx,Rv (66),(ev) +7d: vpermt2b/w Vx,Hx,Wx (66),(ev) +7e: vpermt2d/q Vx,Hx,Wx (66),(ev) +7f: vpermt2ps/d Vx,Hx,Wx (66),(ev) 80: INVEPT Gy,Mdq (66) 81: INVPID Gy,Mdq (66) 82: INVPCID Gy,Mdq (66) +83: vpmultishiftqb Vx,Hx,Wx (66),(ev) +88: vexpandps/d Vpd,Wpd (66),(ev) +89: vpexpandd/q Vx,Wx (66),(ev) +8a: vcompressps/d Wx,Vx (66),(ev) +8b: vpcompressd/q Wx,Vx (66),(ev) 8c: vpmaskmovd/q Vx,Hx,Mx (66),(v) +8d: vpermb/w Vx,Hx,Wx (66),(ev) 8e: vpmaskmovd/q Mx,Vx,Hx (66),(v) # 0x0f 0x38 0x90-0xbf (FMA) -90: vgatherdd/q Vx,Hx,Wx (66),(v) -91: vgatherqd/q Vx,Hx,Wx (66),(v) +90: vgatherdd/q Vx,Hx,Wx (66),(v) | vpgatherdd/q Vx,Wx (66),(evo) +91: vgatherqd/q Vx,Hx,Wx (66),(v) | vpgatherqd/q Vx,Wx (66),(evo) 92: vgatherdps/d Vx,Hx,Wx (66),(v) 93: vgatherqps/d Vx,Hx,Wx (66),(v) 94: @@ -715,6 +744,10 @@ AVXcode: 2 9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1) 9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v) 9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1) +a0: vpscatterdd/q Wx,Vx (66),(ev) +a1: vpscatterqd/q Wx,Vx (66),(ev) +a2: vscatterdps/d Wx,Vx (66),(ev) +a3: vscatterqps/d Wx,Vx (66),(ev) a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v) a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v) a8: vfmadd213ps/d Vx,Hx,Wx (66),(v) @@ -725,6 +758,8 @@ ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v) ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1) ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v) af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1) +b4: vpmadd52luq Vx,Hx,Wx (66),(ev) +b5: vpmadd52huq Vx,Hx,Wx (66),(ev) b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v) b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v) b8: vfmadd231ps/d Vx,Hx,Wx (66),(v) @@ -736,12 +771,15 @@ bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1) be: vfnmsub231ps/d Vx,Hx,Wx (66),(v) bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1) # 0x0f 0x38 0xc0-0xff -c8: sha1nexte Vdq,Wdq +c4: vpconflictd/q Vx,Wx (66),(ev) +c6: Grp18 (1A) +c7: Grp19 (1A) +c8: sha1nexte Vdq,Wdq | vexp2ps/d Vx,Wx (66),(ev) c9: sha1msg1 Vdq,Wdq -ca: sha1msg2 Vdq,Wdq -cb: sha256rnds2 Vdq,Wdq -cc: sha256msg1 Vdq,Wdq -cd: sha256msg2 Vdq,Wdq +ca: sha1msg2 Vdq,Wdq | vrcp28ps/d Vx,Wx (66),(ev) +cb: sha256rnds2 Vdq,Wdq | vrcp28ss/d Vx,Hx,Wx (66),(ev) +cc: sha256msg1 Vdq,Wdq | vrsqrt28ps/d Vx,Wx (66),(ev) +cd: sha256msg2 Vdq,Wdq | vrsqrt28ss/d Vx,Hx,Wx (66),(ev) db: VAESIMC Vdq,Wdq (66),(v1) dc: VAESENC Vdq,Hdq,Wdq (66),(v1) dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1) @@ -763,15 +801,15 @@ AVXcode: 3 00: vpermq Vqq,Wqq,Ib (66),(v) 01: vpermpd Vqq,Wqq,Ib (66),(v) 02: vpblendd Vx,Hx,Wx,Ib (66),(v) -03: +03: valignd/q Vx,Hx,Wx,Ib (66),(ev) 04: vpermilps Vx,Wx,Ib (66),(v) 05: vpermilpd Vx,Wx,Ib (66),(v) 06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v) 07: -08: vroundps Vx,Wx,Ib (66) -09: vroundpd Vx,Wx,Ib (66) -0a: vroundss Vss,Wss,Ib (66),(v1) -0b: vroundsd Vsd,Wsd,Ib (66),(v1) +08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo) +09: vroundpd Vx,Wx,Ib (66) | vrndscalepd Vx,Wx,Ib (66),(evo) +0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo) +0b: vroundsd Vsd,Wsd,Ib (66),(v1) | vrndscalesd Vx,Hx,Wx,Ib (66),(evo) 0c: vblendps Vx,Hx,Wx,Ib (66) 0d: vblendpd Vx,Hx,Wx,Ib (66) 0e: vpblendw Vx,Hx,Wx,Ib (66),(v1) @@ -780,26 +818,51 @@ AVXcode: 3 15: vpextrw Rd/Mw,Vdq,Ib (66),(v1) 16: vpextrd/q Ey,Vdq,Ib (66),(v1) 17: vextractps Ed,Vdq,Ib (66),(v1) -18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) -19: vextractf128 Wdq,Vqq,Ib (66),(v) +18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) | vinsertf32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo) +19: vextractf128 Wdq,Vqq,Ib (66),(v) | vextractf32x4/64x2 Wdq,Vqq,Ib (66),(evo) +1a: vinsertf32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev) +1b: vextractf32x8/64x4 Wdq,Vqq,Ib (66),(ev) 1d: vcvtps2ph Wx,Vx,Ib (66),(v) +1e: vpcmpud/q Vk,Hd,Wd,Ib (66),(ev) +1f: vpcmpd/q Vk,Hd,Wd,Ib (66),(ev) 20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1) 21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1) 22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1) -38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) -39: vextracti128 Wdq,Vqq,Ib (66),(v) +23: vshuff32x4/64x2 Vx,Hx,Wx,Ib (66),(ev) +25: vpternlogd/q Vx,Hx,Wx,Ib (66),(ev) +26: vgetmantps/d Vx,Wx,Ib (66),(ev) +27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev) +30: kshiftrb/w Vk,Uk,Ib (66),(v) +31: kshiftrd/q Vk,Uk,Ib (66),(v) +32: kshiftlb/w Vk,Uk,Ib (66),(v) +33: kshiftld/q Vk,Uk,Ib (66),(v) +38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) | vinserti32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo) +39: vextracti128 Wdq,Vqq,Ib (66),(v) | vextracti32x4/64x2 Wdq,Vqq,Ib (66),(evo) +3a: vinserti32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev) +3b: vextracti32x8/64x4 Wdq,Vqq,Ib (66),(ev) +3e: vpcmpub/w Vk,Hk,Wx,Ib (66),(ev) +3f: vpcmpb/w Vk,Hk,Wx,Ib (66),(ev) 40: vdpps Vx,Hx,Wx,Ib (66) 41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1) -42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) +42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) | vdbpsadbw Vx,Hx,Wx,Ib (66),(evo) +43: vshufi32x4/64x2 Vx,Hx,Wx,Ib (66),(ev) 44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1) 46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v) 4a: vblendvps Vx,Hx,Wx,Lx (66),(v) 4b: vblendvpd Vx,Hx,Wx,Lx (66),(v) 4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1) +50: vrangeps/d Vx,Hx,Wx,Ib (66),(ev) +51: vrangess/d Vx,Hx,Wx,Ib (66),(ev) +54: vfixupimmps/d Vx,Hx,Wx,Ib (66),(ev) +55: vfixupimmss/d Vx,Hx,Wx,Ib (66),(ev) +56: vreduceps/d Vx,Wx,Ib (66),(ev) +57: vreducess/d Vx,Hx,Wx,Ib (66),(ev) 60: vpcmpestrm Vdq,Wdq,Ib (66),(v1) 61: vpcmpestri Vdq,Wdq,Ib (66),(v1) 62: vpcmpistrm Vdq,Wdq,Ib (66),(v1) 63: vpcmpistri Vdq,Wdq,Ib (66),(v1) +66: vfpclassps/d Vk,Wx,Ib (66),(ev) +67: vfpclassss/d Vk,Wx,Ib (66),(ev) cc: sha1rnds4 Vdq,Wdq,Ib df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) f0: RORX Gy,Ey,Ib (F2),(v) @@ -927,8 +990,10 @@ GrpTable: Grp12 EndTable GrpTable: Grp13 +0: vprord/q Hx,Wx,Ib (66),(ev) +1: vprold/q Hx,Wx,Ib (66),(ev) 2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1) -4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) +4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) | vpsrad/q Hx,Ux,Ib (66),(evo) 6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1) EndTable @@ -963,6 +1028,20 @@ GrpTable: Grp17 3: BLSI By,Ey (v) EndTable +GrpTable: Grp18 +1: vgatherpf0dps/d Wx (66),(ev) +2: vgatherpf1dps/d Wx (66),(ev) +5: vscatterpf0dps/d Wx (66),(ev) +6: vscatterpf1dps/d Wx (66),(ev) +EndTable + +GrpTable: Grp19 +1: vgatherpf0qps/d Wx (66),(ev) +2: vgatherpf1qps/d Wx (66),(ev) +5: vscatterpf0qps/d Wx (66),(ev) +6: vscatterpf1qps/d Wx (66),(ev) +EndTable + # AMD's Prefetch Group GrpTable: GrpP 0: PREFETCH diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk index 093a892026f9..a3d2c62fd805 100644 --- a/arch/x86/tools/gen-insn-attr-x86.awk +++ b/arch/x86/tools/gen-insn-attr-x86.awk @@ -72,12 +72,14 @@ BEGIN { lprefix_expr = "\\((66|F2|F3)\\)" max_lprefix = 4 - # All opcodes starting with lower-case 'v' or with (v1) superscript + # All opcodes starting with lower-case 'v', 'k' or with (v1) superscript # accepts VEX prefix - vexok_opcode_expr = "^v.*" + vexok_opcode_expr = "^[vk].*" vexok_expr = "\\(v1\\)" # All opcodes with (v) superscript supports *only* VEX prefix vexonly_expr = "\\(v\\)" + # All opcodes with (ev) superscript supports *only* EVEX prefix + evexonly_expr = "\\(ev\\)" prefix_expr = "\\(Prefix\\)" prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" @@ -95,6 +97,7 @@ BEGIN { prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" prefix_num["VEX+1byte"] = "INAT_PFX_VEX2" prefix_num["VEX+2byte"] = "INAT_PFX_VEX3" + prefix_num["EVEX"] = "INAT_PFX_EVEX" clear_vars() } @@ -319,7 +322,9 @@ function convert_operands(count,opnd, i,j,imm,mod) flags = add_flags(flags, "INAT_MODRM") # check VEX codes - if (match(ext, vexonly_expr)) + if (match(ext, evexonly_expr)) + flags = add_flags(flags, "INAT_VEXOK | INAT_EVEXONLY") + else if (match(ext, vexonly_expr)) flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr)) flags = add_flags(flags, "INAT_VEXOK") -- cgit v1.2.3 From ec3ed4a2104b8d1ab8da2db5b1221b2ba8a7a6e1 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 20 Jul 2016 12:45:51 -0700 Subject: x86/fpu: Do not BUG_ON() in early FPU code I don't think it is really possible to have a system where CPUID enumerates support for XSAVE but that it does not have FP/SSE (they are "legacy" features and always present). But, I did manage to hit this case in qemu when I enabled its somewhat shaky XSAVE support. The bummer is that the FPU is set up before we parse the command-line or have *any* console support including earlyprintk. That turned what should have been an easy thing to debug in to a bit more of an odyssey. So a BUG() here is worthless. All it does it guarantee that if/when we hit this case we have an empty console. So, remove the BUG() and try to limp along by disabling XSAVE and trying to continue. Add a comment on why we are doing this, and also add a common "out_disable" path for leaving fpu__init_system_xstate(). Signed-off-by: Dave Hansen Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20160720194551.63BB2B58@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/xstate.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 3169bcaf9391..680049aa4593 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -714,8 +714,13 @@ void __init fpu__init_system_xstate(void) xfeatures_mask = eax + ((u64)edx << 32); if ((xfeatures_mask & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) { + /* + * This indicates that something really unexpected happened + * with the enumeration. Disable XSAVE and try to continue + * booting without it. This is too early to BUG(). + */ pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", xfeatures_mask); - BUG(); + goto out_disable; } xfeatures_mask &= fpu__get_supported_xfeatures_mask(); @@ -723,11 +728,8 @@ void __init fpu__init_system_xstate(void) /* Enable xstate instructions to be able to continue with initialization: */ fpu__init_cpu_xstate(); err = init_xstate_size(); - if (err) { - /* something went wrong, boot without any XSAVE support */ - fpu__init_disable_system_xstate(); - return; - } + if (err) + goto out_disable; /* * Update info used for ptrace frames; use standard-format size and no @@ -744,6 +746,11 @@ void __init fpu__init_system_xstate(void) xfeatures_mask, fpu_kernel_xstate_size, boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard"); + return; + +out_disable: + /* something went wrong, try to boot without any XSAVE support */ + fpu__init_disable_system_xstate(); } /* -- cgit v1.2.3 From 30f027398b329c75c8f23a3c13be240b50866fdc Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 21 Jul 2016 14:16:51 -0700 Subject: x86/boot: Clarify what x86_legacy_features.reserve_bios_regions does It doesn't just control probing for the EBDA -- it controls whether we detect and reserve the <1MB BIOS regions in general. Signed-off-by: Andy Lutomirski Cc: Andrew Morton Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Kees Cook Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Mario Limonciello Cc: Matthew Garrett Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Link: http://lkml.kernel.org/r/55bd591115498440d461857a7b64f349a5d911f3.1469135598.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/x86_init.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index c519c052700a..66c15a01667f 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -168,8 +168,9 @@ struct x86_legacy_devices { * struct x86_legacy_features - legacy x86 features * * @rtc: this device has a CMOS real-time clock present - * @reserve_bios_regions: it's safe to search for the EBDA signature in the hardware's - * low RAM + * @reserve_bios_regions: boot code will search for the EBDA address and the + * start of the 640k - 1M BIOS region. If false, the platform must + * ensure that its memory map correctly reserves sub-1MB regions as needed. * @devices: legacy x86 devices, refer to struct x86_legacy_devices * documentation for further details. */ -- cgit v1.2.3 From 6a79296cb15d947bcb4558011fe066e5d8252b35 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 21 Jul 2016 14:16:52 -0700 Subject: x86/boot: Simplify EBDA-vs-BIOS reservation logic Both the intent and the effect of reserve_bios_regions() is simple: reserve the range from the apparent BIOS start (suitably filtered) through 1MB and, if the EBDA start address is sensible, extend that reservation downward to cover the EBDA as well. The code is overcomplicated, though, and contains head-scratchers like: if (ebda_start < BIOS_START_MIN) ebda_start = BIOS_START_MAX; That snipped is trying to say "if ebda_start < BIOS_START_MIN, ignore it". Simplify it: reorder the code so that it makes sense. This should have no functional effect under any circumstances. Signed-off-by: Andy Lutomirski Cc: Andrew Morton Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Kees Cook Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Mario Limonciello Cc: Matthew Garrett Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Link: http://lkml.kernel.org/r/ef89c0c761be20ead8bd9a3275743e6259b6092a.1469135598.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/ebda.c | 34 +++++++++++----------------------- 1 file changed, 11 insertions(+), 23 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/ebda.c b/arch/x86/kernel/ebda.c index 6219eef20e2e..4312f8ae71b7 100644 --- a/arch/x86/kernel/ebda.c +++ b/arch/x86/kernel/ebda.c @@ -65,22 +65,6 @@ void __init reserve_bios_regions(void) if (!x86_platform.legacy.reserve_bios_regions) return; - /* Get the start address of the EBDA page: */ - ebda_start = get_bios_ebda(); - - /* - * Quirk: some old Dells seem to have a 4k EBDA without - * reporting so in their BIOS RAM size value, so just - * consider the memory above 640K to be off limits - * (bugzilla 2990). - * - * We detect this case by filtering for nonsensical EBDA - * addresses below 128K, where we can assume that they - * are bogus and bump it up to a fixed 640K value: - */ - if (ebda_start < BIOS_START_MIN) - ebda_start = BIOS_START_MAX; - /* * BIOS RAM size is encoded in kilobytes, convert it * to bytes to get a first guess at where the BIOS @@ -91,18 +75,22 @@ void __init reserve_bios_regions(void) /* * If bios_start is less than 128K, assume it is bogus - * and bump it up to 640K: + * and bump it up to 640K. Similarly, if bios_start is above 640K, + * don't trust it. */ - if (bios_start < BIOS_START_MIN) + if (bios_start < BIOS_START_MIN || bios_start > BIOS_START_MAX) bios_start = BIOS_START_MAX; + /* Get the start address of the EBDA page: */ + ebda_start = get_bios_ebda(); + /* - * Use the lower of the bios_start and ebda_start - * as the starting point, but don't allow it to - * go beyond 640K: + * If the EBDA start address is sane and is below the BIOS region, + * then also reserve everything from the EBDA start address up to + * the BIOS region. */ - bios_start = min(bios_start, ebda_start); - bios_start = min(bios_start, BIOS_START_MAX); + if (ebda_start >= BIOS_START_MIN && ebda_start < bios_start) + bios_start = ebda_start; /* Reserve all memory between bios_start and the 1MB mark: */ memblock_reserve(bios_start, 0x100000 - bios_start); -- cgit v1.2.3 From 530dd8d4b9daf77e3e5d145a26210d91ced954c7 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 22 Jul 2016 21:58:08 -0700 Subject: x86/mm/cpa: Fix populate_pgd(): Stop trying to deallocate failed PUDs Valdis Kletnieks bisected a boot failure back to this recent commit: 360cb4d15567 ("x86/mm/cpa: In populate_pgd(), don't set the PGD entry until it's populated") I broke the case where a PUD table got allocated -- populate_pud() would wander off a pgd_none entry and get lost. I'm not sure how this survived my testing. Fix the original issue in a much simpler way. The problem was that, if we allocated a PUD table, failed to populate it, and freed it, another CPU could potentially keep using the PGD entry we installed (either by copying it via vmalloc_fault or by speculatively caching it). There's a straightforward fix: simply leave the top-level entry in place if this happens. This can't waste any significant amount of memory -- there are at most 256 entries like this systemwide and, as a practical matter, if we hit this failure path repeatedly, we're likely to reuse the same page anyway. For context, this is a reversion with this hunk added in: if (ret < 0) { + /* + * Leave the PUD page in place in case some other CPU or thread + * already found it, but remove any useless entries we just + * added to it. + */ - unmap_pgd_range(cpa->pgd, addr, + unmap_pud_range(pgd_entry, addr, addr + (cpa->numpages << PAGE_SHIFT)); return ret; } This effectively open-codes what the now-deleted unmap_pgd_range() function used to do except that unmap_pgd_range() used to try to free the page as well. Reported-by: Valdis Kletnieks Signed-off-by: Andy Lutomirski Cc: Andrew Morton Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Mike Krinkin Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Link: http://lkml.kernel.org/r/21cbc2822aa18aa812c0215f4231dbf5f65afa7f.1469249789.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 26c93c6e04a0..2bc6ea153f76 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1082,6 +1082,8 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) pud = (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); if (!pud) return -1; + + set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE)); } pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr); @@ -1089,16 +1091,11 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) ret = populate_pud(cpa, addr, pgd_entry, pgprot); if (ret < 0) { - if (pud) - free_page((unsigned long)pud); unmap_pud_range(pgd_entry, addr, addr + (cpa->numpages << PAGE_SHIFT)); return ret; } - if (pud) - set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE)); - cpa->numpages = ret; return 0; } -- cgit v1.2.3 From 55920d31f1e3fea06702c74271dd56c4fc9b70ca Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 23 Jul 2016 09:59:28 -0700 Subject: x86/mm/cpa: Add missing comment in populate_pdg() In commit: 21cbc2822aa1 ("x86/mm/cpa: Unbreak populate_pgd(): stop trying to deallocate failed PUDs") I intended to add this comment, but I failed at using git. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/242baf8612394f4e31216f96d13c4d2e9b90d1b7.1469293159.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 2bc6ea153f76..47870a534877 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1091,6 +1091,11 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) ret = populate_pud(cpa, addr, pgd_entry, pgprot); if (ret < 0) { + /* + * Leave the PUD page in place in case some other CPU or thread + * already found it, but remove any useless entries we just + * added to it. + */ unmap_pud_range(pgd_entry, addr, addr + (cpa->numpages << PAGE_SHIFT)); return ret; -- cgit v1.2.3 From d51306f1a3bc0e3a7b86d8f2b2dedf34b356d3dd Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Sat, 23 Jul 2016 14:35:40 +1000 Subject: x86: Make the vdso2c compiler use the host architecture headers To be clear: this is a ppc64le hosted, x86_64 target cross build. Signed-off-by: Stephen Rothwell Acked-by: Andy Lutomirski Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20160723150845.3af8e452@canb.auug.org.au Signed-off-by: Arnaldo Carvalho de Melo --- arch/x86/entry/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 253b72eaade6..25e88c030c47 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -55,7 +55,7 @@ VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \ $(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE $(call if_changed,vdso) -HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/x86/include/uapi +HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/$(SUBARCH)/include/uapi hostprogs-y += vdso2c quiet_cmd_vdso2c = VDSO2C $@ -- cgit v1.2.3