From 35d35aae817706800a4913711d563a99e1dc380a Mon Sep 17 00:00:00 2001 From: Tushar Behera Date: Thu, 6 Mar 2014 11:34:43 +0530 Subject: dt-bindings: clock: Move at91.h to dt-bindigs/clock Most of the clock related dt-binding header files are located in dt-bindings/clock folder. It would be good to keep all the similar header files at a single location. Signed-off-by: Tushar Behera CC: Rob Landley CC: Andrew Victor CC: Jean-Christophe Plagniol-Villard Acked-by: Boris BREZILLON [nicolas.ferre@atmel.com: add new at91sam9261 & at91sam9rl] Signed-off-by: Nicolas Ferre --- Documentation/devicetree/bindings/clock/at91-clock.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/clock/at91-clock.txt b/Documentation/devicetree/bindings/clock/at91-clock.txt index cd5e23912888..6794cdc96d8f 100644 --- a/Documentation/devicetree/bindings/clock/at91-clock.txt +++ b/Documentation/devicetree/bindings/clock/at91-clock.txt @@ -62,7 +62,7 @@ Required properties for PMC node: - interrupt-controller : tell that the PMC is an interrupt controller. - #interrupt-cells : must be set to 1. The first cell encodes the interrupt id, and reflect the bit position in the PMC_ER/DR/SR registers. - You can use the dt macros defined in dt-bindings/clk/at91.h. + You can use the dt macros defined in dt-bindings/clock/at91.h. 0 (AT91_PMC_MOSCS) -> main oscillator ready 1 (AT91_PMC_LOCKA) -> PLL A ready 2 (AT91_PMC_LOCKB) -> PLL B ready -- cgit v1.2.3 From c06cbcb6052edd8f4ee00d65f874b26404ac8f96 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 22 Apr 2014 17:29:42 +0100 Subject: net: Update my email address Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- Documentation/networking/scaling.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/networking/scaling.txt b/Documentation/networking/scaling.txt index ca6977f5b2ed..99ca40e8e810 100644 --- a/Documentation/networking/scaling.txt +++ b/Documentation/networking/scaling.txt @@ -429,7 +429,7 @@ RPS and RFS were introduced in kernel 2.6.35. XPS was incorporated into (therbert@google.com) Accelerated RFS was introduced in 2.6.35. Original patches were -submitted by Ben Hutchings (bhutchings@solarflare.com) +submitted by Ben Hutchings (bwh@kernel.org) Authors: Tom Herbert (therbert@google.com) -- cgit v1.2.3 From 88154c96ee2dab84ae78ad41562b4a3a23d83788 Mon Sep 17 00:00:00 2001 From: Heiko Stübner Date: Fri, 25 Apr 2014 10:06:13 +0200 Subject: arc_emac: add clock handling This adds ability for the arc_emac to really handle its supplying clock. To get the needed clock-frequency either a real clock or the previous clock-frequency property must be provided. Signed-off-by: Heiko Stuebner Tested-by: Max Schwarz Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/arc_emac.txt | 12 +++++- drivers/net/ethernet/arc/emac.h | 2 + drivers/net/ethernet/arc/emac_main.c | 46 ++++++++++++++++------ 3 files changed, 47 insertions(+), 13 deletions(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/arc_emac.txt b/Documentation/devicetree/bindings/net/arc_emac.txt index 7fbb027218a1..a1d71eb43b20 100644 --- a/Documentation/devicetree/bindings/net/arc_emac.txt +++ b/Documentation/devicetree/bindings/net/arc_emac.txt @@ -4,11 +4,15 @@ Required properties: - compatible: Should be "snps,arc-emac" - reg: Address and length of the register set for the device - interrupts: Should contain the EMAC interrupts -- clock-frequency: CPU frequency. It is needed to calculate and set polling -period of EMAC. - max-speed: see ethernet.txt file in the same directory. - phy: see ethernet.txt file in the same directory. +Clock handling: +The clock frequency is needed to calculate and set polling period of EMAC. +It must be provided by one of: +- clock-frequency: CPU frequency. +- clocks: reference to the clock supplying the EMAC. + Child nodes of the driver are the individual PHY devices connected to the MDIO bus. They must have a "reg" property given the PHY address on the MDIO bus. @@ -19,7 +23,11 @@ Examples: reg = <0xc0fc2000 0x3c>; interrupts = <6>; mac-address = [ 00 11 22 33 44 55 ]; + clock-frequency = <80000000>; + /* or */ + clocks = <&emac_clock>; + max-speed = <100>; phy = <&phy0>; diff --git a/drivers/net/ethernet/arc/emac.h b/drivers/net/ethernet/arc/emac.h index 928fac6dd10a..53f85bf71526 100644 --- a/drivers/net/ethernet/arc/emac.h +++ b/drivers/net/ethernet/arc/emac.h @@ -11,6 +11,7 @@ #include #include #include +#include /* STATUS and ENABLE Register bit masks */ #define TXINT_MASK (1<<0) /* Transmit interrupt */ @@ -131,6 +132,7 @@ struct arc_emac_priv { struct mii_bus *bus; void __iomem *regs; + struct clk *clk; struct napi_struct napi; struct net_device_stats stats; diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c index 9747ddaf6ad2..d647a7d115ac 100644 --- a/drivers/net/ethernet/arc/emac_main.c +++ b/drivers/net/ethernet/arc/emac_main.c @@ -649,13 +649,6 @@ static int arc_emac_probe(struct platform_device *pdev) return -ENODEV; } - /* Get CPU clock frequency from device tree */ - if (of_property_read_u32(pdev->dev.of_node, "clock-frequency", - &clock_frequency)) { - dev_err(&pdev->dev, "failed to retrieve from device tree\n"); - return -EINVAL; - } - /* Get IRQ from device tree */ irq = irq_of_parse_and_map(pdev->dev.of_node, 0); if (!irq) { @@ -687,13 +680,32 @@ static int arc_emac_probe(struct platform_device *pdev) } dev_dbg(&pdev->dev, "Registers base address is 0x%p\n", priv->regs); + priv->clk = of_clk_get(pdev->dev.of_node, 0); + if (IS_ERR(priv->clk)) { + /* Get CPU clock frequency from device tree */ + if (of_property_read_u32(pdev->dev.of_node, "clock-frequency", + &clock_frequency)) { + dev_err(&pdev->dev, "failed to retrieve from device tree\n"); + err = -EINVAL; + goto out_netdev; + } + } else { + err = clk_prepare_enable(priv->clk); + if (err) { + dev_err(&pdev->dev, "failed to enable clock\n"); + goto out_clkget; + } + + clock_frequency = clk_get_rate(priv->clk); + } + id = arc_reg_get(priv, R_ID); /* Check for EMAC revision 5 or 7, magic number */ if (!(id == 0x0005fd02 || id == 0x0007fd02)) { dev_err(&pdev->dev, "ARC EMAC not detected, id=0x%x\n", id); err = -ENODEV; - goto out_netdev; + goto out_clken; } dev_info(&pdev->dev, "ARC EMAC detected with id: 0x%x\n", id); @@ -708,7 +720,7 @@ static int arc_emac_probe(struct platform_device *pdev) ndev->name, ndev); if (err) { dev_err(&pdev->dev, "could not allocate IRQ\n"); - goto out_netdev; + goto out_clken; } /* Get MAC address from device tree */ @@ -729,7 +741,7 @@ static int arc_emac_probe(struct platform_device *pdev) if (!priv->rxbd) { dev_err(&pdev->dev, "failed to allocate data buffers\n"); err = -ENOMEM; - goto out_netdev; + goto out_clken; } priv->txbd = priv->rxbd + RX_BD_NUM; @@ -741,7 +753,7 @@ static int arc_emac_probe(struct platform_device *pdev) err = arc_mdio_probe(pdev, priv); if (err) { dev_err(&pdev->dev, "failed to probe MII bus\n"); - goto out_netdev; + goto out_clken; } priv->phy_dev = of_phy_connect(ndev, phy_node, arc_emac_adjust_link, 0, @@ -771,6 +783,12 @@ out_netif_api: priv->phy_dev = NULL; out_mdio: arc_mdio_remove(priv); +out_clken: + if (!IS_ERR(priv->clk)) + clk_disable_unprepare(priv->clk); +out_clkget: + if (!IS_ERR(priv->clk)) + clk_put(priv->clk); out_netdev: free_netdev(ndev); return err; @@ -786,6 +804,12 @@ static int arc_emac_remove(struct platform_device *pdev) arc_mdio_remove(priv); unregister_netdev(ndev); netif_napi_del(&priv->napi); + + if (!IS_ERR(priv->clk)) { + clk_disable_unprepare(priv->clk); + clk_put(priv->clk); + } + free_netdev(ndev); return 0; -- cgit v1.2.3 From cf7eb979116c2568e8bc3b6a7269c7a359864ace Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 13 Apr 2014 20:44:46 +0200 Subject: ARM: common: edma: Fix xbar mapping This is another great example of trainwreck engineering: commit 2646a0e529 (ARM: edma: Add EDMA crossbar event mux support) added support for using EDMA on peripherals which have no direct EDMA event mapping. The code compiles and does not explode in your face, but that's it. 1) Reading an u16 array from an u32 device tree array simply does not work. Even if the function is named "edma_of_read_u32_to_s16_array". It merily calls of_property_read_u16_array. So the resulting 16bit array will have every other entry = 0. 2) The DT entry for the xbar registers related to xbar has length 0x10 instead of the real length: 0xfd0 - 0xf90 = 0x40. Not a real problem as it does not cross a page boundary, but wrong nevertheless. 3) But none of this matters as the mapping never happens: After reading nonsense edma_of_read_u32_to_s16_array() invalidates the first array entry pair, so nobody can ever notice the braindamage by immediate explosion. Seems the QA criteria for this code was solely not to explode when someone adds edma-xbar-event-map entries to the DT. Goal achieved, congratulations! Not really helpful if someone wants to use edma on a device which requires a xbar mapping. Fix the issues by: - annotating the device tree entry with "/bits/ 16" as documented in the of_property_read_u16_array kernel doc - make the size of the xbar register mapping correct - invalidating the end of the array and not the start This convoluted mess wants to be completely rewritten as there is no point to keep the xbar_chan array memory and the iomapping of the xbar regs around forever. Marking the xbar mapped channels as used should be done right there. But that's a different issue and this patch is small enough to make it work and allows a simple backport for stable. Cc: stable@vger.kernel.org # v3.12+ Signed-off-by: Thomas Gleixner Signed-off-by: Sekhar Nori --- Documentation/devicetree/bindings/dma/ti-edma.txt | 4 +- arch/arm/boot/dts/am33xx.dtsi | 2 +- arch/arm/common/edma.c | 48 +++++++---------------- 3 files changed, 18 insertions(+), 36 deletions(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/dma/ti-edma.txt b/Documentation/devicetree/bindings/dma/ti-edma.txt index 9fbbdb783a72..68ff2137bae7 100644 --- a/Documentation/devicetree/bindings/dma/ti-edma.txt +++ b/Documentation/devicetree/bindings/dma/ti-edma.txt @@ -29,6 +29,6 @@ edma: edma@49000000 { dma-channels = <64>; ti,edma-regions = <4>; ti,edma-slots = <256>; - ti,edma-xbar-event-map = <1 12 - 2 13>; + ti,edma-xbar-event-map = /bits/ 16 <1 12 + 2 13>; }; diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi index 9770e35f2536..a23af78586d4 100644 --- a/arch/arm/boot/dts/am33xx.dtsi +++ b/arch/arm/boot/dts/am33xx.dtsi @@ -144,7 +144,7 @@ compatible = "ti,edma3"; ti,hwmods = "tpcc", "tptc0", "tptc1", "tptc2"; reg = <0x49000000 0x10000>, - <0x44e10f90 0x10>; + <0x44e10f90 0x40>; interrupts = <12 13 14>; #dma-cells = <1>; dma-channels = <64>; diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c index 41bca32409fc..5339009b3c0c 100644 --- a/arch/arm/common/edma.c +++ b/arch/arm/common/edma.c @@ -1423,55 +1423,38 @@ EXPORT_SYMBOL(edma_clear_event); #if IS_ENABLED(CONFIG_OF) && IS_ENABLED(CONFIG_DMADEVICES) -static int edma_of_read_u32_to_s16_array(const struct device_node *np, - const char *propname, s16 *out_values, - size_t sz) +static int edma_xbar_event_map(struct device *dev, struct device_node *node, + struct edma_soc_info *pdata, size_t sz) { - int ret; - - ret = of_property_read_u16_array(np, propname, out_values, sz); - if (ret) - return ret; - - /* Terminate it */ - *out_values++ = -1; - *out_values++ = -1; - - return 0; -} - -static int edma_xbar_event_map(struct device *dev, - struct device_node *node, - struct edma_soc_info *pdata, int len) -{ - int ret, i; + const char pname[] = "ti,edma-xbar-event-map"; struct resource res; void __iomem *xbar; - const s16 (*xbar_chans)[2]; + s16 (*xbar_chans)[2]; + size_t nelm = sz / sizeof(s16); u32 shift, offset, mux; + int ret, i; - xbar_chans = devm_kzalloc(dev, - len/sizeof(s16) + 2*sizeof(s16), - GFP_KERNEL); + xbar_chans = devm_kzalloc(dev, (nelm + 2) * sizeof(s16), GFP_KERNEL); if (!xbar_chans) return -ENOMEM; ret = of_address_to_resource(node, 1, &res); if (ret) - return -EIO; + return -ENOMEM; xbar = devm_ioremap(dev, res.start, resource_size(&res)); if (!xbar) return -ENOMEM; - ret = edma_of_read_u32_to_s16_array(node, - "ti,edma-xbar-event-map", - (s16 *)xbar_chans, - len/sizeof(u32)); + ret = of_property_read_u16_array(node, pname, (u16 *)xbar_chans, nelm); if (ret) return -EIO; - for (i = 0; xbar_chans[i][0] != -1; i++) { + /* Invalidate last entry for the other user of this mess */ + nelm >>= 1; + xbar_chans[nelm][0] = xbar_chans[nelm][1] = -1; + + for (i = 0; i < nelm; i++) { shift = (xbar_chans[i][1] & 0x03) << 3; offset = xbar_chans[i][1] & 0xfffffffc; mux = readl(xbar + offset); @@ -1480,8 +1463,7 @@ static int edma_xbar_event_map(struct device *dev, writel(mux, (xbar + offset)); } - pdata->xbar_chans = xbar_chans; - + pdata->xbar_chans = (const s16 (*)[2]) xbar_chans; return 0; } -- cgit v1.2.3 From 3891a04aafd668686239349ea58f3314ea2af86b Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 29 Apr 2014 16:46:09 -0700 Subject: x86-64, espfix: Don't leak bits 31:16 of %esp returning to 16-bit stack The IRET instruction, when returning to a 16-bit segment, only restores the bottom 16 bits of the user space stack pointer. This causes some 16-bit software to break, but it also leaks kernel state to user space. We have a software workaround for that ("espfix") for the 32-bit kernel, but it relies on a nonzero stack segment base which is not available in 64-bit mode. In checkin: b3b42ac2cbae x86-64, modify_ldt: Ban 16-bit segments on 64-bit kernels we "solved" this by forbidding 16-bit segments on 64-bit kernels, with the logic that 16-bit support is crippled on 64-bit kernels anyway (no V86 support), but it turns out that people are doing stuff like running old Win16 binaries under Wine and expect it to work. This works around this by creating percpu "ministacks", each of which is mapped 2^16 times 64K apart. When we detect that the return SS is on the LDT, we copy the IRET frame to the ministack and use the relevant alias to return to userspace. The ministacks are mapped readonly, so if IRET faults we promote #GP to #DF which is an IST vector and thus has its own stack; we then do the fixup in the #DF handler. (Making #GP an IST exception would make the msr_safe functions unsafe in NMI/MC context, and quite possibly have other effects.) Special thanks to: - Andy Lutomirski, for the suggestion of using very small stack slots and copy (as opposed to map) the IRET frame there, and for the suggestion to mark them readonly and let the fault promote to #DF. - Konrad Wilk for paravirt fixup and testing. - Borislav Petkov for testing help and useful comments. Reported-by: Brian Gerst Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1398816946-3351-1-git-send-email-hpa@linux.intel.com Cc: Konrad Rzeszutek Wilk Cc: Borislav Petkov Cc: Andrew Lutomriski Cc: Linus Torvalds Cc: Dirk Hohndel Cc: Arjan van de Ven Cc: comex Cc: Alexander van Heukelum Cc: Boris Ostrovsky Cc: # consider after upstream merge --- Documentation/x86/x86_64/mm.txt | 2 + arch/x86/include/asm/pgtable_64_types.h | 2 + arch/x86/include/asm/setup.h | 3 + arch/x86/kernel/Makefile | 1 + arch/x86/kernel/entry_64.S | 73 ++++++++++- arch/x86/kernel/espfix_64.c | 208 ++++++++++++++++++++++++++++++++ arch/x86/kernel/ldt.c | 11 -- arch/x86/kernel/smpboot.c | 7 ++ arch/x86/mm/dump_pagetables.c | 44 +++++-- init/main.c | 4 + 10 files changed, 329 insertions(+), 26 deletions(-) create mode 100644 arch/x86/kernel/espfix_64.c (limited to 'Documentation') diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index c584a51add15..afe68ddbe6a4 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -12,6 +12,8 @@ ffffc90000000000 - ffffe8ffffffffff (=45 bits) vmalloc/ioremap space ffffe90000000000 - ffffe9ffffffffff (=40 bits) hole ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB) ... unused hole ... +ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks +... unused hole ... ffffffff80000000 - ffffffffa0000000 (=512 MB) kernel text mapping, from phys 0 ffffffffa0000000 - ffffffffff5fffff (=1525 MB) module mapping space ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index c883bf726398..7166e25ecb57 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -61,6 +61,8 @@ typedef struct { pteval_t pte; } pte_t; #define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) #define MODULES_END _AC(0xffffffffff000000, UL) #define MODULES_LEN (MODULES_END - MODULES_VADDR) +#define ESPFIX_PGD_ENTRY _AC(-2, UL) +#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << PGDIR_SHIFT) #define EARLY_DYNAMIC_PAGE_TABLES 64 diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 9264f04a4c55..9e3be3329a7e 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -57,6 +57,9 @@ extern void x86_ce4100_early_setup(void); static inline void x86_ce4100_early_setup(void) { } #endif +extern void init_espfix_bsp(void); +extern void init_espfix_ap(void); + #ifndef _SETUP /* diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index f4d96000d33a..1cc3789d99d9 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-y += syscall_$(BITS).o vsyscall_gtod.o obj-$(CONFIG_X86_64) += vsyscall_64.o obj-$(CONFIG_X86_64) += vsyscall_emu_64.o +obj-$(CONFIG_X86_64) += espfix_64.o obj-$(CONFIG_SYSFS) += ksysfs.o obj-y += bootflag.o e820.o obj-y += pci-dma.o quirks.o topology.o kdebugfs.o diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 1e96c3628bf2..bffaa986cafc 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -58,6 +58,7 @@ #include #include #include +#include #include /* Avoid __ASSEMBLER__'ifying just for this. */ @@ -1040,8 +1041,16 @@ restore_args: RESTORE_ARGS 1,8,1 irq_return: + /* + * Are we returning to a stack segment from the LDT? Note: in + * 64-bit mode SS:RSP on the exception stack is always valid. + */ + testb $4,(SS-RIP)(%rsp) + jnz irq_return_ldt + +irq_return_iret: INTERRUPT_RETURN - _ASM_EXTABLE(irq_return, bad_iret) + _ASM_EXTABLE(irq_return_iret, bad_iret) #ifdef CONFIG_PARAVIRT ENTRY(native_iret) @@ -1049,6 +1058,30 @@ ENTRY(native_iret) _ASM_EXTABLE(native_iret, bad_iret) #endif +irq_return_ldt: + pushq_cfi %rax + pushq_cfi %rdi + SWAPGS + movq PER_CPU_VAR(espfix_waddr),%rdi + movq %rax,(0*8)(%rdi) /* RAX */ + movq (2*8)(%rsp),%rax /* RIP */ + movq %rax,(1*8)(%rdi) + movq (3*8)(%rsp),%rax /* CS */ + movq %rax,(2*8)(%rdi) + movq (4*8)(%rsp),%rax /* RFLAGS */ + movq %rax,(3*8)(%rdi) + movq (6*8)(%rsp),%rax /* SS */ + movq %rax,(5*8)(%rdi) + movq (5*8)(%rsp),%rax /* RSP */ + movq %rax,(4*8)(%rdi) + andl $0xffff0000,%eax + popq_cfi %rdi + orq PER_CPU_VAR(espfix_stack),%rax + SWAPGS + movq %rax,%rsp + popq_cfi %rax + jmp irq_return_iret + .section .fixup,"ax" bad_iret: /* @@ -1110,9 +1143,41 @@ ENTRY(retint_kernel) call preempt_schedule_irq jmp exit_intr #endif - CFI_ENDPROC END(common_interrupt) + + /* + * If IRET takes a fault on the espfix stack, then we + * end up promoting it to a doublefault. In that case, + * modify the stack to make it look like we just entered + * the #GP handler from user space, similar to bad_iret. + */ + ALIGN +__do_double_fault: + XCPT_FRAME 1 RDI+8 + movq RSP(%rdi),%rax /* Trap on the espfix stack? */ + sarq $PGDIR_SHIFT,%rax + cmpl $ESPFIX_PGD_ENTRY,%eax + jne do_double_fault /* No, just deliver the fault */ + cmpl $__KERNEL_CS,CS(%rdi) + jne do_double_fault + movq RIP(%rdi),%rax + cmpq $irq_return_iret,%rax +#ifdef CONFIG_PARAVIRT + je 1f + cmpq $native_iret,%rax +#endif + jne do_double_fault /* This shouldn't happen... */ +1: + movq PER_CPU_VAR(kernel_stack),%rax + subq $(6*8-KERNEL_STACK_OFFSET),%rax /* Reset to original stack */ + movq %rax,RSP(%rdi) + movq $0,(%rax) /* Missing (lost) #GP error code */ + movq $general_protection,RIP(%rdi) + retq + CFI_ENDPROC +END(__do_double_fault) + /* * End of kprobes section */ @@ -1314,7 +1379,7 @@ zeroentry overflow do_overflow zeroentry bounds do_bounds zeroentry invalid_op do_invalid_op zeroentry device_not_available do_device_not_available -paranoiderrorentry double_fault do_double_fault +paranoiderrorentry double_fault __do_double_fault zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun errorentry invalid_TSS do_invalid_TSS errorentry segment_not_present do_segment_not_present @@ -1601,7 +1666,7 @@ error_sti: */ error_kernelspace: incl %ebx - leaq irq_return(%rip),%rcx + leaq irq_return_iret(%rip),%rcx cmpq %rcx,RIP+8(%rsp) je error_swapgs movl %ecx,%eax /* zero extend */ diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c new file mode 100644 index 000000000000..8a64da36310f --- /dev/null +++ b/arch/x86/kernel/espfix_64.c @@ -0,0 +1,208 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright 2014 Intel Corporation; author: H. Peter Anvin + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * ----------------------------------------------------------------------- */ + +/* + * The IRET instruction, when returning to a 16-bit segment, only + * restores the bottom 16 bits of the user space stack pointer. This + * causes some 16-bit software to break, but it also leaks kernel state + * to user space. + * + * This works around this by creating percpu "ministacks", each of which + * is mapped 2^16 times 64K apart. When we detect that the return SS is + * on the LDT, we copy the IRET frame to the ministack and use the + * relevant alias to return to userspace. The ministacks are mapped + * readonly, so if the IRET fault we promote #GP to #DF which is an IST + * vector and thus has its own stack; we then do the fixup in the #DF + * handler. + * + * This file sets up the ministacks and the related page tables. The + * actual ministack invocation is in entry_64.S. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Note: we only need 6*8 = 48 bytes for the espfix stack, but round + * it up to a cache line to avoid unnecessary sharing. + */ +#define ESPFIX_STACK_SIZE (8*8UL) +#define ESPFIX_STACKS_PER_PAGE (PAGE_SIZE/ESPFIX_STACK_SIZE) + +/* There is address space for how many espfix pages? */ +#define ESPFIX_PAGE_SPACE (1UL << (PGDIR_SHIFT-PAGE_SHIFT-16)) + +#define ESPFIX_MAX_CPUS (ESPFIX_STACKS_PER_PAGE * ESPFIX_PAGE_SPACE) +#if CONFIG_NR_CPUS > ESPFIX_MAX_CPUS +# error "Need more than one PGD for the ESPFIX hack" +#endif + +#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) + +/* This contains the *bottom* address of the espfix stack */ +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack); +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr); + +/* Initialization mutex - should this be a spinlock? */ +static DEFINE_MUTEX(espfix_init_mutex); + +/* Page allocation bitmap - each page serves ESPFIX_STACKS_PER_PAGE CPUs */ +#define ESPFIX_MAX_PAGES DIV_ROUND_UP(CONFIG_NR_CPUS, ESPFIX_STACKS_PER_PAGE) +static void *espfix_pages[ESPFIX_MAX_PAGES]; + +static __page_aligned_bss pud_t espfix_pud_page[PTRS_PER_PUD] + __aligned(PAGE_SIZE); + +static unsigned int page_random, slot_random; + +/* + * This returns the bottom address of the espfix stack for a specific CPU. + * The math allows for a non-power-of-two ESPFIX_STACK_SIZE, in which case + * we have to account for some amount of padding at the end of each page. + */ +static inline unsigned long espfix_base_addr(unsigned int cpu) +{ + unsigned long page, slot; + unsigned long addr; + + page = (cpu / ESPFIX_STACKS_PER_PAGE) ^ page_random; + slot = (cpu + slot_random) % ESPFIX_STACKS_PER_PAGE; + addr = (page << PAGE_SHIFT) + (slot * ESPFIX_STACK_SIZE); + addr = (addr & 0xffffUL) | ((addr & ~0xffffUL) << 16); + addr += ESPFIX_BASE_ADDR; + return addr; +} + +#define PTE_STRIDE (65536/PAGE_SIZE) +#define ESPFIX_PTE_CLONES (PTRS_PER_PTE/PTE_STRIDE) +#define ESPFIX_PMD_CLONES PTRS_PER_PMD +#define ESPFIX_PUD_CLONES (65536/(ESPFIX_PTE_CLONES*ESPFIX_PMD_CLONES)) + +#define PGTABLE_PROT ((_KERNPG_TABLE & ~_PAGE_RW) | _PAGE_NX) + +static void init_espfix_random(void) +{ + unsigned long rand; + + /* + * This is run before the entropy pools are initialized, + * but this is hopefully better than nothing. + */ + if (!arch_get_random_long(&rand)) { + /* The constant is an arbitrary large prime */ + rdtscll(rand); + rand *= 0xc345c6b72fd16123UL; + } + + slot_random = rand % ESPFIX_STACKS_PER_PAGE; + page_random = (rand / ESPFIX_STACKS_PER_PAGE) + & (ESPFIX_PAGE_SPACE - 1); +} + +void __init init_espfix_bsp(void) +{ + pgd_t *pgd_p; + pteval_t ptemask; + + ptemask = __supported_pte_mask; + + /* Install the espfix pud into the kernel page directory */ + pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)]; + pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page); + + /* Randomize the locations */ + init_espfix_random(); + + /* The rest is the same as for any other processor */ + init_espfix_ap(); +} + +void init_espfix_ap(void) +{ + unsigned int cpu, page; + unsigned long addr; + pud_t pud, *pud_p; + pmd_t pmd, *pmd_p; + pte_t pte, *pte_p; + int n; + void *stack_page; + pteval_t ptemask; + + /* We only have to do this once... */ + if (likely(this_cpu_read(espfix_stack))) + return; /* Already initialized */ + + cpu = smp_processor_id(); + addr = espfix_base_addr(cpu); + page = cpu/ESPFIX_STACKS_PER_PAGE; + + /* Did another CPU already set this up? */ + stack_page = ACCESS_ONCE(espfix_pages[page]); + if (likely(stack_page)) + goto done; + + mutex_lock(&espfix_init_mutex); + + /* Did we race on the lock? */ + stack_page = ACCESS_ONCE(espfix_pages[page]); + if (stack_page) + goto unlock_done; + + ptemask = __supported_pte_mask; + + pud_p = &espfix_pud_page[pud_index(addr)]; + pud = *pud_p; + if (!pud_present(pud)) { + pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP); + pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask)); + paravirt_alloc_pud(&init_mm, __pa(pmd_p) >> PAGE_SHIFT); + for (n = 0; n < ESPFIX_PUD_CLONES; n++) + set_pud(&pud_p[n], pud); + } + + pmd_p = pmd_offset(&pud, addr); + pmd = *pmd_p; + if (!pmd_present(pmd)) { + pte_p = (pte_t *)__get_free_page(PGALLOC_GFP); + pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask)); + paravirt_alloc_pmd(&init_mm, __pa(pte_p) >> PAGE_SHIFT); + for (n = 0; n < ESPFIX_PMD_CLONES; n++) + set_pmd(&pmd_p[n], pmd); + } + + pte_p = pte_offset_kernel(&pmd, addr); + stack_page = (void *)__get_free_page(GFP_KERNEL); + pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask)); + paravirt_alloc_pte(&init_mm, __pa(stack_page) >> PAGE_SHIFT); + for (n = 0; n < ESPFIX_PTE_CLONES; n++) + set_pte(&pte_p[n*PTE_STRIDE], pte); + + /* Job is done for this CPU and any CPU which shares this page */ + ACCESS_ONCE(espfix_pages[page]) = stack_page; + +unlock_done: + mutex_unlock(&espfix_init_mutex); +done: + this_cpu_write(espfix_stack, addr); + this_cpu_write(espfix_waddr, (unsigned long)stack_page + + (addr & ~PAGE_MASK)); +} diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index af1d14a9ebda..ebc987398923 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -229,17 +229,6 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) } } - /* - * On x86-64 we do not support 16-bit segments due to - * IRET leaking the high bits of the kernel stack address. - */ -#ifdef CONFIG_X86_64 - if (!ldt_info.seg_32bit) { - error = -EINVAL; - goto out_unlock; - } -#endif - fill_ldt(&ldt, &ldt_info); if (oldmode) ldt.avl = 0; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 34826934d4a7..61a5350850fb 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -243,6 +243,13 @@ static void notrace start_secondary(void *unused) */ check_tsc_sync_target(); + /* + * Enable the espfix hack for this CPU + */ +#ifdef CONFIG_X86_64 + init_espfix_ap(); +#endif + /* * We need to hold vector_lock so there the set of online cpus * does not change while we are assigning vectors to cpus. Holding diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 20621d753d5f..167ffcac16ed 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -30,12 +30,14 @@ struct pg_state { unsigned long start_address; unsigned long current_address; const struct addr_marker *marker; + unsigned long lines; bool to_dmesg; }; struct addr_marker { unsigned long start_address; const char *name; + unsigned long max_lines; }; /* indices for address_markers; keep sync'd w/ address_markers below */ @@ -46,6 +48,7 @@ enum address_markers_idx { LOW_KERNEL_NR, VMALLOC_START_NR, VMEMMAP_START_NR, + ESPFIX_START_NR, HIGH_KERNEL_NR, MODULES_VADDR_NR, MODULES_END_NR, @@ -68,6 +71,7 @@ static struct addr_marker address_markers[] = { { PAGE_OFFSET, "Low Kernel Mapping" }, { VMALLOC_START, "vmalloc() Area" }, { VMEMMAP_START, "Vmemmap" }, + { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, { __START_KERNEL_map, "High Kernel Mapping" }, { MODULES_VADDR, "Modules" }, { MODULES_END, "End Modules" }, @@ -182,7 +186,7 @@ static void note_page(struct seq_file *m, struct pg_state *st, pgprot_t new_prot, int level) { pgprotval_t prot, cur; - static const char units[] = "KMGTPE"; + static const char units[] = "BKMGTPE"; /* * If we have a "break" in the series, we need to flush the state that @@ -197,6 +201,7 @@ static void note_page(struct seq_file *m, struct pg_state *st, st->current_prot = new_prot; st->level = level; st->marker = address_markers; + st->lines = 0; pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", st->marker->name); } else if (prot != cur || level != st->level || @@ -208,17 +213,24 @@ static void note_page(struct seq_file *m, struct pg_state *st, /* * Now print the actual finished series */ - pt_dump_seq_printf(m, st->to_dmesg, "0x%0*lx-0x%0*lx ", - width, st->start_address, - width, st->current_address); - - delta = (st->current_address - st->start_address) >> 10; - while (!(delta & 1023) && unit[1]) { - delta >>= 10; - unit++; + if (!st->marker->max_lines || + st->lines < st->marker->max_lines) { + pt_dump_seq_printf(m, st->to_dmesg, + "0x%0*lx-0x%0*lx ", + width, st->start_address, + width, st->current_address); + + delta = st->current_address - st->start_address; + while (!(delta & 1023) && unit[1]) { + delta >>= 10; + unit++; + } + pt_dump_cont_printf(m, st->to_dmesg, "%9lu%c ", + delta, *unit); + printk_prot(m, st->current_prot, st->level, + st->to_dmesg); } - pt_dump_cont_printf(m, st->to_dmesg, "%9lu%c ", delta, *unit); - printk_prot(m, st->current_prot, st->level, st->to_dmesg); + st->lines++; /* * We print markers for special areas of address space, @@ -226,7 +238,17 @@ static void note_page(struct seq_file *m, struct pg_state *st, * This helps in the interpretation. */ if (st->current_address >= st->marker[1].start_address) { + if (st->marker->max_lines && + st->lines > st->marker->max_lines) { + unsigned long nskip = + st->lines - st->marker->max_lines; + pt_dump_seq_printf(m, st->to_dmesg, + "... %lu entr%s skipped ... \n", + nskip, + nskip == 1 ? "y" : "ies"); + } st->marker++; + st->lines = 0; pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", st->marker->name); } diff --git a/init/main.c b/init/main.c index 9c7fd4c9249f..70fc00e7db06 100644 --- a/init/main.c +++ b/init/main.c @@ -616,6 +616,10 @@ asmlinkage void __init start_kernel(void) #ifdef CONFIG_X86 if (efi_enabled(EFI_RUNTIME_SERVICES)) efi_enter_virtual_mode(); +#endif +#ifdef CONFIG_X86_64 + /* Should be run before the first non-init thread is created */ + init_espfix_bsp(); #endif thread_info_cache_init(); cred_init(); -- cgit v1.2.3 From 36189cc3cd57ab0f1cd75241f93fe01de928ac06 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 5 May 2014 09:36:43 -0700 Subject: Input: elantech - fix touchpad initialization on Gigabyte U2442 The hw_version 3 Elantech touchpad on the Gigabyte U2442 does not accept 0x0b as initialization value for r10, this stand-alone version of the driver: http://planet76.com/drivers/elantech/psmouse-elantech-v6.tar.bz2 Uses 0x03 which does work, so this means not setting bit 3 of r10 which sets: "Enable Real H/W Resolution In Absolute mode" Which will result in half the x and y resolution we get with that bit set, so simply not setting it everywhere is not a solution. We've been unable to find a way to identify touchpads where setting the bit will fail, so this patch uses a dmi based blacklist for this. https://bugzilla.kernel.org/show_bug.cgi?id=61151 Cc: stable@vger.kernel.org Reported-by: Philipp Wolfer Tested-by: Philipp Wolfer Signed-off-by: Hans de Goede Signed-off-by: Dmitry Torokhov --- Documentation/input/elantech.txt | 5 ++++- drivers/input/mouse/elantech.c | 26 +++++++++++++++++++++++++- drivers/input/mouse/elantech.h | 1 + 3 files changed, 30 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/input/elantech.txt b/Documentation/input/elantech.txt index 5602eb71ad5d..e1ae127ed099 100644 --- a/Documentation/input/elantech.txt +++ b/Documentation/input/elantech.txt @@ -504,9 +504,12 @@ byte 5: * reg_10 bit 7 6 5 4 3 2 1 0 - 0 0 0 0 0 0 0 A + 0 0 0 0 R F T A A: 1 = enable absolute tracking + T: 1 = enable two finger mode auto correct + F: 1 = disable ABS Position Filter + R: 1 = enable real hardware resolution 6.2 Native absolute mode 6 byte packet format ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index 088d3541c7d3..b96e978a37b7 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -11,6 +11,7 @@ */ #include +#include #include #include #include @@ -831,7 +832,11 @@ static int elantech_set_absolute_mode(struct psmouse *psmouse) break; case 3: - etd->reg_10 = 0x0b; + if (etd->set_hw_resolution) + etd->reg_10 = 0x0b; + else + etd->reg_10 = 0x03; + if (elantech_write_reg(psmouse, 0x10, etd->reg_10)) rc = -1; @@ -1330,6 +1335,22 @@ static int elantech_reconnect(struct psmouse *psmouse) return 0; } +/* + * Some hw_version 3 models go into error state when we try to set bit 3 of r10 + */ +static const struct dmi_system_id no_hw_res_dmi_table[] = { +#if defined(CONFIG_DMI) && defined(CONFIG_X86) + { + /* Gigabyte U2442 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"), + DMI_MATCH(DMI_PRODUCT_NAME, "U2442"), + }, + }, +#endif + { } +}; + /* * determine hardware version and set some properties according to it. */ @@ -1390,6 +1411,9 @@ static int elantech_set_properties(struct elantech_data *etd) */ etd->crc_enabled = ((etd->fw_version & 0x4000) == 0x4000); + /* Enable real hardware resolution on hw_version 3 ? */ + etd->set_hw_resolution = !dmi_check_system(no_hw_res_dmi_table); + return 0; } diff --git a/drivers/input/mouse/elantech.h b/drivers/input/mouse/elantech.h index 036a04abaef7..9e0e2a1f340d 100644 --- a/drivers/input/mouse/elantech.h +++ b/drivers/input/mouse/elantech.h @@ -130,6 +130,7 @@ struct elantech_data { bool jumpy_cursor; bool reports_pressure; bool crc_enabled; + bool set_hw_resolution; unsigned char hw_version; unsigned int fw_version; unsigned int single_finger_reports; -- cgit v1.2.3 From 7a5091d58419b4e5222abce58a40c072786ea1d6 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Sun, 11 May 2014 20:25:20 -0700 Subject: x86, rdrand: When nordrand is specified, disable RDSEED as well One can logically expect that when the user has specified "nordrand", the user doesn't want any use of the CPU random number generator, neither RDRAND nor RDSEED, so disable both. Reported-by: Stephan Mueller Cc: Theodore Ts'o Link: http://lkml.kernel.org/r/21542339.0lFnPSyGRS@myon.chronox.de Signed-off-by: H. Peter Anvin --- Documentation/kernel-parameters.txt | 8 ++++---- arch/x86/kernel/cpu/rdrand.c | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 43842177b771..30a8ad0dae53 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2218,10 +2218,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. noreplace-smp [X86-32,SMP] Don't replace SMP instructions with UP alternatives - nordrand [X86] Disable the direct use of the RDRAND - instruction even if it is supported by the - processor. RDRAND is still available to user - space applications. + nordrand [X86] Disable kernel use of the RDRAND and + RDSEED instructions even if they are supported + by the processor. RDRAND and RDSEED are still + available to user space applications. noresume [SWSUSP] Disables resume and restores original swap space. diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c index 384df5105fbc..136ac74dee82 100644 --- a/arch/x86/kernel/cpu/rdrand.c +++ b/arch/x86/kernel/cpu/rdrand.c @@ -27,6 +27,7 @@ static int __init x86_rdrand_setup(char *s) { setup_clear_cpu_cap(X86_FEATURE_RDRAND); + setup_clear_cpu_cap(X86_FEATURE_RDSEED); return 1; } __setup("nordrand", x86_rdrand_setup); -- cgit v1.2.3