diff options
author | Ingo Molnar <mingo@kernel.org> | 2018-03-31 07:30:17 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2018-03-31 07:30:17 +0200 |
commit | 169310f71fc820fe153ec04c6a111e444a68d6d5 (patch) | |
tree | a586d0f08548159ceb8527ff59f8414579f70c1f /arch | |
parent | b3c39758c8a6972f02b43f83dba7fe7a352371b9 (diff) | |
parent | c2a9838452a4d71f76103c18c926468a9ea05713 (diff) | |
download | linux-169310f71fc820fe153ec04c6a111e444a68d6d5.tar.bz2 |
Merge branch 'linus' into locking/core, to pick up fixes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch')
78 files changed, 612 insertions, 469 deletions
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 78a647080ebc..199ebc1c4538 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -22,6 +22,7 @@ config ARM_PTDUMP_DEBUGFS config DEBUG_WX bool "Warn on W+X mappings at boot" + depends on MMU select ARM_PTDUMP_CORE ---help--- Generate a warning if any W+X mappings are found at boot. diff --git a/arch/arm/boot/deflate_xip_data.sh b/arch/arm/boot/deflate_xip_data.sh index 1189598a25eb..5e7d758ebdd6 100755 --- a/arch/arm/boot/deflate_xip_data.sh +++ b/arch/arm/boot/deflate_xip_data.sh @@ -30,7 +30,7 @@ esac sym_val() { # extract hex value for symbol in $1 - local val=$($NM "$VMLINUX" | sed -n "/ $1$/{s/ .*$//p;q}") + local val=$($NM "$VMLINUX" 2>/dev/null | sed -n "/ $1\$/{s/ .*$//p;q}") [ "$val" ] || { echo "can't find $1 in $VMLINUX" 1>&2; exit 1; } # convert from hex to decimal echo $((0x$val)) @@ -48,12 +48,12 @@ data_end=$(($_edata_loc - $base_offset)) file_end=$(stat -c "%s" "$XIPIMAGE") if [ "$file_end" != "$data_end" ]; then printf "end of xipImage doesn't match with _edata_loc (%#x vs %#x)\n" \ - $(($file_end + $base_offset)) $_edata_loc 2>&1 + $(($file_end + $base_offset)) $_edata_loc 1>&2 exit 1; fi # be ready to clean up -trap 'rm -f "$XIPIMAGE.tmp"' 0 1 2 3 +trap 'rm -f "$XIPIMAGE.tmp"; exit 1' 1 2 3 # substitute the data section by a compressed version $DD if="$XIPIMAGE" count=$data_start iflag=count_bytes of="$XIPIMAGE.tmp" diff --git a/arch/arm/boot/dts/aspeed-g4.dtsi b/arch/arm/boot/dts/aspeed-g4.dtsi index b0d8431a3700..ae2b8c952e80 100644 --- a/arch/arm/boot/dts/aspeed-g4.dtsi +++ b/arch/arm/boot/dts/aspeed-g4.dtsi @@ -42,6 +42,11 @@ }; }; + memory@40000000 { + device_type = "memory"; + reg = <0x40000000 0>; + }; + ahb { compatible = "simple-bus"; #address-cells = <1>; diff --git a/arch/arm/boot/dts/aspeed-g5.dtsi b/arch/arm/boot/dts/aspeed-g5.dtsi index 40de3b66c33f..2477ebc11d9d 100644 --- a/arch/arm/boot/dts/aspeed-g5.dtsi +++ b/arch/arm/boot/dts/aspeed-g5.dtsi @@ -42,6 +42,11 @@ }; }; + memory@80000000 { + device_type = "memory"; + reg = <0x80000000 0>; + }; + ahb { compatible = "simple-bus"; #address-cells = <1>; diff --git a/arch/arm/boot/dts/imx7d-sdb.dts b/arch/arm/boot/dts/imx7d-sdb.dts index a7a5dc7b2700..e7d2db839d70 100644 --- a/arch/arm/boot/dts/imx7d-sdb.dts +++ b/arch/arm/boot/dts/imx7d-sdb.dts @@ -82,7 +82,7 @@ enable-active-high; }; - reg_usb_otg2_vbus: regulator-usb-otg1-vbus { + reg_usb_otg2_vbus: regulator-usb-otg2-vbus { compatible = "regulator-fixed"; regulator-name = "usb_otg2_vbus"; regulator-min-microvolt = <5000000>; diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index 6102e4e7f35c..354aff45c1af 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -927,6 +927,7 @@ i2s: i2s@ff890000 { compatible = "rockchip,rk3288-i2s", "rockchip,rk3066-i2s"; reg = <0x0 0xff890000 0x0 0x10000>; + #sound-dai-cells = <0>; interrupts = <GIC_SPI 53 IRQ_TYPE_LEVEL_HIGH>; #address-cells = <1>; #size-cells = <0>; @@ -1176,6 +1177,7 @@ compatible = "rockchip,rk3288-dw-hdmi"; reg = <0x0 0xff980000 0x0 0x20000>; reg-io-width = <4>; + #sound-dai-cells = <0>; rockchip,grf = <&grf>; interrupts = <GIC_SPI 103 IRQ_TYPE_LEVEL_HIGH>; clocks = <&cru PCLK_HDMI_CTRL>, <&cru SCLK_HDMI_HDCP>, <&cru SCLK_HDMI_CEC>; diff --git a/arch/arm/boot/dts/sun6i-a31s-sinovoip-bpi-m2.dts b/arch/arm/boot/dts/sun6i-a31s-sinovoip-bpi-m2.dts index 51e6f1d21c32..b2758dd8ce43 100644 --- a/arch/arm/boot/dts/sun6i-a31s-sinovoip-bpi-m2.dts +++ b/arch/arm/boot/dts/sun6i-a31s-sinovoip-bpi-m2.dts @@ -42,7 +42,6 @@ /dts-v1/; #include "sun6i-a31s.dtsi" -#include "sunxi-common-regulators.dtsi" #include <dt-bindings/gpio/gpio.h> / { @@ -99,6 +98,7 @@ pinctrl-0 = <&gmac_pins_rgmii_a>, <&gmac_phy_reset_pin_bpi_m2>; phy = <&phy1>; phy-mode = "rgmii"; + phy-supply = <®_dldo1>; snps,reset-gpio = <&pio 0 21 GPIO_ACTIVE_HIGH>; /* PA21 */ snps,reset-active-low; snps,reset-delays-us = <0 10000 30000>; @@ -118,7 +118,7 @@ &mmc0 { pinctrl-names = "default"; pinctrl-0 = <&mmc0_pins_a>, <&mmc0_cd_pin_bpi_m2>; - vmmc-supply = <®_vcc3v0>; + vmmc-supply = <®_dcdc1>; bus-width = <4>; cd-gpios = <&pio 0 4 GPIO_ACTIVE_HIGH>; /* PA4 */ cd-inverted; @@ -132,7 +132,7 @@ &mmc2 { pinctrl-names = "default"; pinctrl-0 = <&mmc2_pins_a>; - vmmc-supply = <®_vcc3v0>; + vmmc-supply = <®_aldo1>; mmc-pwrseq = <&mmc2_pwrseq>; bus-width = <4>; non-removable; @@ -163,6 +163,8 @@ reg = <0x68>; interrupt-parent = <&nmi_intc>; interrupts = <0 IRQ_TYPE_LEVEL_LOW>; + eldoin-supply = <®_dcdc1>; + x-powers,drive-vbus-en; }; }; @@ -193,7 +195,28 @@ #include "axp22x.dtsi" +®_aldo1 { + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-name = "vcc-wifi"; +}; + +®_aldo2 { + regulator-always-on; + regulator-min-microvolt = <2500000>; + regulator-max-microvolt = <2500000>; + regulator-name = "vcc-gmac"; +}; + +®_aldo3 { + regulator-always-on; + regulator-min-microvolt = <3000000>; + regulator-max-microvolt = <3000000>; + regulator-name = "avcc"; +}; + ®_dc5ldo { + regulator-always-on; regulator-min-microvolt = <700000>; regulator-max-microvolt = <1320000>; regulator-name = "vdd-cpus"; @@ -233,6 +256,40 @@ regulator-name = "vcc-dram"; }; +®_dldo1 { + regulator-min-microvolt = <3000000>; + regulator-max-microvolt = <3000000>; + regulator-name = "vcc-mac"; +}; + +®_dldo2 { + regulator-min-microvolt = <2800000>; + regulator-max-microvolt = <2800000>; + regulator-name = "avdd-csi"; +}; + +®_dldo3 { + regulator-always-on; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-name = "vcc-pb"; +}; + +®_eldo1 { + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + regulator-name = "vdd-csi"; + status = "okay"; +}; + +®_ldo_io1 { + regulator-always-on; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + regulator-name = "vcc-pm-cpus"; + status = "okay"; +}; + &uart0 { pinctrl-names = "default"; pinctrl-0 = <&uart0_pins_a>; diff --git a/arch/arm/include/asm/vdso.h b/arch/arm/include/asm/vdso.h index 9c99e817535e..5b85889f82ee 100644 --- a/arch/arm/include/asm/vdso.h +++ b/arch/arm/include/asm/vdso.h @@ -12,8 +12,6 @@ struct mm_struct; void arm_install_vdso(struct mm_struct *mm, unsigned long addr); -extern char vdso_start, vdso_end; - extern unsigned int vdso_total_pages; #else /* CONFIG_VDSO */ diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c index a4d6dc0f2427..f4dd7f9663c1 100644 --- a/arch/arm/kernel/vdso.c +++ b/arch/arm/kernel/vdso.c @@ -39,6 +39,8 @@ static struct page **vdso_text_pagelist; +extern char vdso_start[], vdso_end[]; + /* Total number of pages needed for the data and text portions of the VDSO. */ unsigned int vdso_total_pages __ro_after_init; @@ -197,13 +199,13 @@ static int __init vdso_init(void) unsigned int text_pages; int i; - if (memcmp(&vdso_start, "\177ELF", 4)) { + if (memcmp(vdso_start, "\177ELF", 4)) { pr_err("VDSO is not a valid ELF object!\n"); return -ENOEXEC; } - text_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT; - pr_debug("vdso: %i text pages at base %p\n", text_pages, &vdso_start); + text_pages = (vdso_end - vdso_start) >> PAGE_SHIFT; + pr_debug("vdso: %i text pages at base %p\n", text_pages, vdso_start); /* Allocate the VDSO text pagelist */ vdso_text_pagelist = kcalloc(text_pages, sizeof(struct page *), @@ -218,7 +220,7 @@ static int __init vdso_init(void) for (i = 0; i < text_pages; i++) { struct page *page; - page = virt_to_page(&vdso_start + i * PAGE_SIZE); + page = virt_to_page(vdso_start + i * PAGE_SIZE); vdso_text_pagelist[i] = page; } @@ -229,7 +231,7 @@ static int __init vdso_init(void) cntvct_ok = cntvct_functional(); - patch_vdso(&vdso_start); + patch_vdso(vdso_start); return 0; } diff --git a/arch/arm/mach-davinci/board-omapl138-hawk.c b/arch/arm/mach-davinci/board-omapl138-hawk.c index a3e78074be70..62eb7d668890 100644 --- a/arch/arm/mach-davinci/board-omapl138-hawk.c +++ b/arch/arm/mach-davinci/board-omapl138-hawk.c @@ -127,8 +127,8 @@ static struct gpiod_lookup_table mmc_gpios_table = { .dev_id = "da830-mmc.0", .table = { /* CD: gpio3_12: gpio60: chip 1 contains gpio range 32-63*/ - GPIO_LOOKUP("davinci_gpio.1", 28, "cd", GPIO_ACTIVE_LOW), - GPIO_LOOKUP("davinci_gpio.1", 29, "wp", GPIO_ACTIVE_LOW), + GPIO_LOOKUP("davinci_gpio.0", 28, "cd", GPIO_ACTIVE_LOW), + GPIO_LOOKUP("davinci_gpio.0", 29, "wp", GPIO_ACTIVE_LOW), }, }; diff --git a/arch/arm/mach-ux500/cpu-db8500.c b/arch/arm/mach-ux500/cpu-db8500.c index 7e5d7a083707..36cd23c8be9b 100644 --- a/arch/arm/mach-ux500/cpu-db8500.c +++ b/arch/arm/mach-ux500/cpu-db8500.c @@ -133,6 +133,9 @@ static void __init u8500_init_machine(void) if (of_machine_is_compatible("st-ericsson,u8540")) of_platform_populate(NULL, u8500_local_bus_nodes, u8540_auxdata_lookup, NULL); + else + of_platform_populate(NULL, u8500_local_bus_nodes, + NULL, NULL); } static const char * stericsson_dt_platform_compat[] = { diff --git a/arch/arm/plat-omap/dmtimer.c b/arch/arm/plat-omap/dmtimer.c index d443e481c3e9..8805a59bae53 100644 --- a/arch/arm/plat-omap/dmtimer.c +++ b/arch/arm/plat-omap/dmtimer.c @@ -888,11 +888,8 @@ static int omap_dm_timer_probe(struct platform_device *pdev) timer->irq = irq->start; timer->pdev = pdev; - /* Skip pm_runtime_enable for OMAP1 */ - if (!(timer->capability & OMAP_TIMER_NEEDS_RESET)) { - pm_runtime_enable(dev); - pm_runtime_irq_safe(dev); - } + pm_runtime_enable(dev); + pm_runtime_irq_safe(dev); if (!timer->reserved) { ret = pm_runtime_get_sync(dev); diff --git a/arch/arm/plat-omap/include/plat/sram.h b/arch/arm/plat-omap/include/plat/sram.h index fb061cf0d736..30a07730807a 100644 --- a/arch/arm/plat-omap/include/plat/sram.h +++ b/arch/arm/plat-omap/include/plat/sram.h @@ -5,13 +5,4 @@ void omap_map_sram(unsigned long start, unsigned long size, unsigned long skip, int cached); void omap_sram_reset(void); -extern void *omap_sram_push_address(unsigned long size); - -/* Macro to push a function to the internal SRAM, using the fncpy API */ -#define omap_sram_push(funcp, size) ({ \ - typeof(&(funcp)) _res = NULL; \ - void *_sram_address = omap_sram_push_address(size); \ - if (_sram_address) \ - _res = fncpy(_sram_address, &(funcp), size); \ - _res; \ -}) +extern void *omap_sram_push(void *funcp, unsigned long size); diff --git a/arch/arm/plat-omap/sram.c b/arch/arm/plat-omap/sram.c index a5bc92d7e476..921840acf65c 100644 --- a/arch/arm/plat-omap/sram.c +++ b/arch/arm/plat-omap/sram.c @@ -23,6 +23,7 @@ #include <asm/fncpy.h> #include <asm/tlb.h> #include <asm/cacheflush.h> +#include <asm/set_memory.h> #include <asm/mach/map.h> @@ -42,7 +43,7 @@ static void __iomem *omap_sram_ceil; * Note that fncpy requires the returned address to be aligned * to an 8-byte boundary. */ -void *omap_sram_push_address(unsigned long size) +static void *omap_sram_push_address(unsigned long size) { unsigned long available, new_ceil = (unsigned long)omap_sram_ceil; @@ -60,6 +61,30 @@ void *omap_sram_push_address(unsigned long size) return (void *)omap_sram_ceil; } +void *omap_sram_push(void *funcp, unsigned long size) +{ + void *sram; + unsigned long base; + int pages; + void *dst = NULL; + + sram = omap_sram_push_address(size); + if (!sram) + return NULL; + + base = (unsigned long)sram & PAGE_MASK; + pages = PAGE_ALIGN(size) / PAGE_SIZE; + + set_memory_rw(base, pages); + + dst = fncpy(sram, funcp, size); + + set_memory_ro(base, pages); + set_memory_x(base, pages); + + return dst; +} + /* * The SRAM context is lost during off-idle and stack * needs to be reset. @@ -75,6 +100,9 @@ void omap_sram_reset(void) void __init omap_map_sram(unsigned long start, unsigned long size, unsigned long skip, int cached) { + unsigned long base; + int pages; + if (size == 0) return; @@ -95,4 +123,10 @@ void __init omap_map_sram(unsigned long start, unsigned long size, */ memset_io(omap_sram_base + omap_sram_skip, 0, omap_sram_size - omap_sram_skip); + + base = (unsigned long)omap_sram_base; + pages = PAGE_ALIGN(omap_sram_size) / PAGE_SIZE; + + set_memory_ro(base, pages); + set_memory_x(base, pages); } diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 03c6a3c72f9c..4c375e11ae95 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -648,7 +648,7 @@ int vfp_restore_user_hwstate(struct user_vfp __user *ufp, */ static int vfp_dying_cpu(unsigned int cpu) { - vfp_force_reload(cpu, current_thread_info()); + vfp_current_hw_state[cpu] = NULL; return 0; } diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi index 03f195025390..204bdb9857b9 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi @@ -406,8 +406,9 @@ wlan_pd_n: wlan-pd-n { compatible = "regulator-fixed"; regulator-name = "wlan_pd_n"; + pinctrl-names = "default"; + pinctrl-0 = <&wlan_module_reset_l>; - /* Note the wlan_module_reset_l pinctrl */ enable-active-high; gpio = <&gpio1 11 GPIO_ACTIVE_HIGH>; @@ -983,12 +984,6 @@ ap_i2c_audio: &i2c8 { pinctrl-0 = < &ap_pwroff /* AP will auto-assert this when in S3 */ &clk_32k /* This pin is always 32k on gru boards */ - - /* - * We want this driven low ASAP; firmware should help us, but - * we can help ourselves too. - */ - &wlan_module_reset_l >; pcfg_output_low: pcfg-output-low { @@ -1168,12 +1163,7 @@ ap_i2c_audio: &i2c8 { }; wlan_module_reset_l: wlan-module-reset-l { - /* - * We want this driven low ASAP (As {Soon,Strongly} As - * Possible), to avoid leakage through the powered-down - * WiFi. - */ - rockchip,pins = <1 11 RK_FUNC_GPIO &pcfg_output_low>; + rockchip,pins = <1 11 RK_FUNC_GPIO &pcfg_pull_none>; }; bt_host_wake_l: bt-host-wake-l { diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index 2605118d4b4c..0b81ca1d07e7 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -411,8 +411,8 @@ reg = <0x0 0xfe800000 0x0 0x100000>; interrupts = <GIC_SPI 105 IRQ_TYPE_LEVEL_HIGH 0>; dr_mode = "otg"; - phys = <&u2phy0_otg>, <&tcphy0_usb3>; - phy-names = "usb2-phy", "usb3-phy"; + phys = <&u2phy0_otg>; + phy-names = "usb2-phy"; phy_type = "utmi_wide"; snps,dis_enblslpm_quirk; snps,dis-u2-freeclk-exists-quirk; @@ -444,8 +444,8 @@ reg = <0x0 0xfe900000 0x0 0x100000>; interrupts = <GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH 0>; dr_mode = "otg"; - phys = <&u2phy1_otg>, <&tcphy1_usb3>; - phy-names = "usb2-phy", "usb3-phy"; + phys = <&u2phy1_otg>; + phy-names = "usb2-phy"; phy_type = "utmi_wide"; snps,dis_enblslpm_quirk; snps,dis-u2-freeclk-exists-quirk; diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index d7e3299a7734..959e50d2588c 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -363,8 +363,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, { int ret = 0; - vcpu_load(vcpu); - trace_kvm_set_guest_debug(vcpu, dbg->control); if (dbg->control & ~KVM_GUESTDBG_VALID_MASK) { @@ -386,7 +384,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, } out: - vcpu_put(vcpu); return ret; } diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 8c704f1e53c2..2dbb2c9f1ec1 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -972,3 +972,13 @@ int pmd_clear_huge(pmd_t *pmdp) pmd_clear(pmdp); return 1; } + +int pud_free_pmd_page(pud_t *pud) +{ + return pud_none(*pud); +} + +int pmd_free_pte_page(pmd_t *pmd) +{ + return pmd_none(*pmd); +} diff --git a/arch/h8300/include/asm/byteorder.h b/arch/h8300/include/asm/byteorder.h index ecff2d1ca5a3..6eaa7ad5fc2c 100644 --- a/arch/h8300/include/asm/byteorder.h +++ b/arch/h8300/include/asm/byteorder.h @@ -2,7 +2,6 @@ #ifndef __H8300_BYTEORDER_H__ #define __H8300_BYTEORDER_H__ -#define __BIG_ENDIAN __ORDER_BIG_ENDIAN__ #include <linux/byteorder/big_endian.h> #endif diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 4f798aa671dd..3817a3e2146c 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -24,6 +24,7 @@ config MICROBLAZE select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER + select NO_BOOTMEM select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP select HAVE_OPROFILE diff --git a/arch/microblaze/Kconfig.platform b/arch/microblaze/Kconfig.platform index 6996f397c16c..f7f1739c11b9 100644 --- a/arch/microblaze/Kconfig.platform +++ b/arch/microblaze/Kconfig.platform @@ -8,7 +8,6 @@ menu "Platform options" config OPT_LIB_FUNCTION bool "Optimalized lib function" - depends on CPU_LITTLE_ENDIAN default y help Allows turn on optimalized library function (memcpy and memmove). @@ -21,6 +20,7 @@ config OPT_LIB_FUNCTION config OPT_LIB_ASM bool "Optimalized lib function ASM" depends on OPT_LIB_FUNCTION && (XILINX_MICROBLAZE0_USE_BARREL = 1) + depends on CPU_BIG_ENDIAN default n help Allows turn on optimalized library function (memcpy and memmove). diff --git a/arch/microblaze/include/asm/setup.h b/arch/microblaze/include/asm/setup.h index be84a4d3917f..7c968c1d1729 100644 --- a/arch/microblaze/include/asm/setup.h +++ b/arch/microblaze/include/asm/setup.h @@ -44,7 +44,6 @@ void machine_shutdown(void); void machine_halt(void); void machine_power_off(void); -extern void *alloc_maybe_bootmem(size_t size, gfp_t mask); extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask); # endif /* __ASSEMBLY__ */ diff --git a/arch/microblaze/lib/fastcopy.S b/arch/microblaze/lib/fastcopy.S index 62021d7e249e..fdc48bb065d8 100644 --- a/arch/microblaze/lib/fastcopy.S +++ b/arch/microblaze/lib/fastcopy.S @@ -29,10 +29,6 @@ * between mem locations with size of xfer spec'd in bytes */ -#ifdef __MICROBLAZEEL__ -#error Microblaze LE not support ASM optimized lib func. Disable OPT_LIB_ASM. -#endif - #include <linux/linkage.h> .text .globl memcpy diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 434639f9a3a6..df6de7ccdc2e 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -32,9 +32,6 @@ int mem_init_done; #ifndef CONFIG_MMU unsigned int __page_offset; EXPORT_SYMBOL(__page_offset); - -#else -static int init_bootmem_done; #endif /* CONFIG_MMU */ char *klimit = _end; @@ -117,7 +114,6 @@ static void __init paging_init(void) void __init setup_memory(void) { - unsigned long map_size; struct memblock_region *reg; #ifndef CONFIG_MMU @@ -174,17 +170,6 @@ void __init setup_memory(void) pr_info("%s: max_low_pfn: %#lx\n", __func__, max_low_pfn); pr_info("%s: max_pfn: %#lx\n", __func__, max_pfn); - /* - * Find an area to use for the bootmem bitmap. - * We look for the first area which is at least - * 128kB in length (128kB is enough for a bitmap - * for 4GB of memory, using 4kB pages), plus 1 page - * (in case the address isn't page-aligned). - */ - map_size = init_bootmem_node(NODE_DATA(0), - PFN_UP(TOPHYS((u32)klimit)), min_low_pfn, max_low_pfn); - memblock_reserve(PFN_UP(TOPHYS((u32)klimit)) << PAGE_SHIFT, map_size); - /* Add active regions with valid PFNs */ for_each_memblock(memory, reg) { unsigned long start_pfn, end_pfn; @@ -196,32 +181,9 @@ void __init setup_memory(void) &memblock.memory, 0); } - /* free bootmem is whole main memory */ - free_bootmem_with_active_regions(0, max_low_pfn); - - /* reserve allocate blocks */ - for_each_memblock(reserved, reg) { - unsigned long top = reg->base + reg->size - 1; - - pr_debug("reserved - 0x%08x-0x%08x, %lx, %lx\n", - (u32) reg->base, (u32) reg->size, top, - memory_start + lowmem_size - 1); - - if (top <= (memory_start + lowmem_size - 1)) { - reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT); - } else if (reg->base < (memory_start + lowmem_size - 1)) { - unsigned long trunc_size = memory_start + lowmem_size - - reg->base; - reserve_bootmem(reg->base, trunc_size, BOOTMEM_DEFAULT); - } - } - /* XXX need to clip this if using highmem? */ sparse_memory_present_with_active_regions(0); -#ifdef CONFIG_MMU - init_bootmem_done = 1; -#endif paging_init(); } @@ -398,30 +360,16 @@ asmlinkage void __init mmu_init(void) /* This is only called until mem_init is done. */ void __init *early_get_page(void) { - void *p; - if (init_bootmem_done) { - p = alloc_bootmem_pages(PAGE_SIZE); - } else { - /* - * Mem start + kernel_tlb -> here is limit - * because of mem mapping from head.S - */ - p = __va(memblock_alloc_base(PAGE_SIZE, PAGE_SIZE, - memory_start + kernel_tlb)); - } - return p; + /* + * Mem start + kernel_tlb -> here is limit + * because of mem mapping from head.S + */ + return __va(memblock_alloc_base(PAGE_SIZE, PAGE_SIZE, + memory_start + kernel_tlb)); } #endif /* CONFIG_MMU */ -void * __ref alloc_maybe_bootmem(size_t size, gfp_t mask) -{ - if (mem_init_done) - return kmalloc(size, mask); - else - return alloc_bootmem(size); -} - void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask) { void *p; diff --git a/arch/mips/lantiq/Kconfig b/arch/mips/lantiq/Kconfig index 692ae85a3e3d..8e3a1fc2bc39 100644 --- a/arch/mips/lantiq/Kconfig +++ b/arch/mips/lantiq/Kconfig @@ -13,6 +13,8 @@ choice config SOC_AMAZON_SE bool "Amazon SE" select SOC_TYPE_XWAY + select MFD_SYSCON + select MFD_CORE config SOC_XWAY bool "XWAY" diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c index 52500d3b7004..e0af39b33e28 100644 --- a/arch/mips/lantiq/xway/sysctrl.c +++ b/arch/mips/lantiq/xway/sysctrl.c @@ -549,9 +549,9 @@ void __init ltq_soc_init(void) clkdev_add_static(ltq_ar9_cpu_hz(), ltq_ar9_fpi_hz(), ltq_ar9_fpi_hz(), CLOCK_250M); clkdev_add_pmu("1f203018.usb2-phy", "phy", 1, 0, PMU_USB0_P); - clkdev_add_pmu("1e101000.usb", "otg", 1, 0, PMU_USB0); + clkdev_add_pmu("1e101000.usb", "otg", 1, 0, PMU_USB0 | PMU_AHBM); clkdev_add_pmu("1f203034.usb2-phy", "phy", 1, 0, PMU_USB1_P); - clkdev_add_pmu("1e106000.usb", "otg", 1, 0, PMU_USB1); + clkdev_add_pmu("1e106000.usb", "otg", 1, 0, PMU_USB1 | PMU_AHBM); clkdev_add_pmu("1e180000.etop", "switch", 1, 0, PMU_SWITCH); clkdev_add_pmu("1e103000.sdio", NULL, 1, 0, PMU_SDIO); clkdev_add_pmu("1e103100.deu", NULL, 1, 0, PMU_DEU); @@ -560,7 +560,7 @@ void __init ltq_soc_init(void) } else { clkdev_add_static(ltq_danube_cpu_hz(), ltq_danube_fpi_hz(), ltq_danube_fpi_hz(), ltq_danube_pp32_hz()); - clkdev_add_pmu("1f203018.usb2-phy", "ctrl", 1, 0, PMU_USB0); + clkdev_add_pmu("1e101000.usb", "otg", 1, 0, PMU_USB0 | PMU_AHBM); clkdev_add_pmu("1f203018.usb2-phy", "phy", 1, 0, PMU_USB0_P); clkdev_add_pmu("1e103000.sdio", NULL, 1, 0, PMU_SDIO); clkdev_add_pmu("1e103100.deu", NULL, 1, 0, PMU_DEU); diff --git a/arch/mips/ralink/mt7621.c b/arch/mips/ralink/mt7621.c index 1b274742077d..d2718de60b9b 100644 --- a/arch/mips/ralink/mt7621.c +++ b/arch/mips/ralink/mt7621.c @@ -170,6 +170,28 @@ void prom_soc_init(struct ralink_soc_info *soc_info) u32 n1; u32 rev; + /* Early detection of CMP support */ + mips_cm_probe(); + mips_cpc_probe(); + + if (mips_cps_numiocu(0)) { + /* + * mips_cm_probe() wipes out bootloader + * config for CM regions and we have to configure them + * again. This SoC cannot talk to pamlbus devices + * witout proper iocu region set up. + * + * FIXME: it would be better to do this with values + * from DT, but we need this very early because + * without this we cannot talk to pretty much anything + * including serial. + */ + write_gcr_reg0_base(MT7621_PALMBUS_BASE); + write_gcr_reg0_mask(~MT7621_PALMBUS_SIZE | + CM_GCR_REGn_MASK_CMTGT_IOCU0); + __sync(); + } + n0 = __raw_readl(sysc + SYSC_REG_CHIP_NAME0); n1 = __raw_readl(sysc + SYSC_REG_CHIP_NAME1); @@ -194,26 +216,6 @@ void prom_soc_init(struct ralink_soc_info *soc_info) rt2880_pinmux_data = mt7621_pinmux_data; - /* Early detection of CMP support */ - mips_cm_probe(); - mips_cpc_probe(); - - if (mips_cps_numiocu(0)) { - /* - * mips_cm_probe() wipes out bootloader - * config for CM regions and we have to configure them - * again. This SoC cannot talk to pamlbus devices - * witout proper iocu region set up. - * - * FIXME: it would be better to do this with values - * from DT, but we need this very early because - * without this we cannot talk to pretty much anything - * including serial. - */ - write_gcr_reg0_base(MT7621_PALMBUS_BASE); - write_gcr_reg0_mask(~MT7621_PALMBUS_SIZE | - CM_GCR_REGn_MASK_CMTGT_IOCU0); - } if (!register_cps_smp_ops()) return; diff --git a/arch/mips/ralink/reset.c b/arch/mips/ralink/reset.c index 64543d66e76b..e9531fea23a2 100644 --- a/arch/mips/ralink/reset.c +++ b/arch/mips/ralink/reset.c @@ -96,16 +96,9 @@ static void ralink_restart(char *command) unreachable(); } -static void ralink_halt(void) -{ - local_irq_disable(); - unreachable(); -} - static int __init mips_reboot_setup(void) { _machine_restart = ralink_restart; - _machine_halt = ralink_halt; return 0; } diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 79089778725b..e3b45546d589 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -543,7 +543,8 @@ void flush_cache_mm(struct mm_struct *mm) rp3440, etc. So, avoid it if the mm isn't too big. */ if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && mm_total_size(mm) >= parisc_cache_flush_threshold) { - flush_tlb_all(); + if (mm->context) + flush_tlb_all(); flush_cache_all(); return; } @@ -571,6 +572,8 @@ void flush_cache_mm(struct mm_struct *mm) pfn = pte_pfn(*ptep); if (!pfn_valid(pfn)) continue; + if (unlikely(mm->context)) + flush_tlb_page(vma, addr); __flush_cache_page(vma, addr, PFN_PHYS(pfn)); } } @@ -579,26 +582,46 @@ void flush_cache_mm(struct mm_struct *mm) void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { + pgd_t *pgd; + unsigned long addr; + if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && end - start >= parisc_cache_flush_threshold) { - flush_tlb_range(vma, start, end); + if (vma->vm_mm->context) + flush_tlb_range(vma, start, end); flush_cache_all(); return; } - flush_user_dcache_range_asm(start, end); - if (vma->vm_flags & VM_EXEC) - flush_user_icache_range_asm(start, end); - flush_tlb_range(vma, start, end); + if (vma->vm_mm->context == mfsp(3)) { + flush_user_dcache_range_asm(start, end); + if (vma->vm_flags & VM_EXEC) + flush_user_icache_range_asm(start, end); + flush_tlb_range(vma, start, end); + return; + } + + pgd = vma->vm_mm->pgd; + for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) { + unsigned long pfn; + pte_t *ptep = get_ptep(pgd, addr); + if (!ptep) + continue; + pfn = pte_pfn(*ptep); + if (pfn_valid(pfn)) { + if (unlikely(vma->vm_mm->context)) + flush_tlb_page(vma, addr); + __flush_cache_page(vma, addr, PFN_PHYS(pfn)); + } + } } void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn) { - BUG_ON(!vma->vm_mm->context); - if (pfn_valid(pfn)) { - flush_tlb_page(vma, vmaddr); + if (likely(vma->vm_mm->context)) + flush_tlb_page(vma, vmaddr); __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn)); } } diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 0abeb0e2d616..37671feb2bf6 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -87,6 +87,9 @@ typedef struct { /* Number of bits in the mm_cpumask */ atomic_t active_cpus; + /* Number of users of the external (Nest) MMU */ + atomic_t copros; + /* NPU NMMU context */ struct npu_context *npu_context; diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index 8eea90f80e45..19b45ba6caf9 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -47,9 +47,6 @@ extern void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmad #endif extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); extern void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr); -extern void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa, - unsigned long page_size); -extern void radix__flush_tlb_lpid(unsigned long lpid); extern void radix__flush_tlb_all(void); extern void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm, unsigned long address); diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index a2c5c95882cf..2e2bacbdf6ed 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -203,6 +203,7 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTR_DAWR LONG_ASM_CONST(0x0400000000000000) #define CPU_FTR_DABRX LONG_ASM_CONST(0x0800000000000000) #define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000000000000000) +#define CPU_FTR_P9_TLBIE_BUG LONG_ASM_CONST(0x2000000000000000) #define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x4000000000000000) #define CPU_FTR_POWER9_DD2_1 LONG_ASM_CONST(0x8000000000000000) @@ -465,7 +466,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | \ - CPU_FTR_PKEY) + CPU_FTR_PKEY | CPU_FTR_P9_TLBIE_BUG) #define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \ (~CPU_FTR_SAO)) #define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9 diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 051b3d63afe3..3a15b6db9501 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -92,15 +92,23 @@ static inline void dec_mm_active_cpus(struct mm_struct *mm) static inline void mm_context_add_copro(struct mm_struct *mm) { /* - * On hash, should only be called once over the lifetime of - * the context, as we can't decrement the active cpus count - * and flush properly for the time being. + * If any copro is in use, increment the active CPU count + * in order to force TLB invalidations to be global as to + * propagate to the Nest MMU. */ - inc_mm_active_cpus(mm); + if (atomic_inc_return(&mm->context.copros) == 1) + inc_mm_active_cpus(mm); } static inline void mm_context_remove_copro(struct mm_struct *mm) { + int c; + + c = atomic_dec_if_positive(&mm->context.copros); + + /* Detect imbalance between add and remove */ + WARN_ON(c < 0); + /* * Need to broadcast a global flush of the full mm before * decrementing active_cpus count, as the next TLBI may be @@ -111,7 +119,7 @@ static inline void mm_context_remove_copro(struct mm_struct *mm) * for the time being. Invalidations will remain global if * used on hash. */ - if (radix_enabled()) { + if (c == 0 && radix_enabled()) { flush_all_mm(mm); dec_mm_active_cpus(mm); } diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 945e2c29ad2d..8ca5d5b74618 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -709,6 +709,9 @@ static __init void cpufeatures_cpu_quirks(void) cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1; else if ((version & 0xffffefff) == 0x004e0201) cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1; + + if ((version & 0xffff0000) == 0x004e0000) + cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG; } static void __init cpufeatures_setup_finished(void) @@ -720,6 +723,9 @@ static void __init cpufeatures_setup_finished(void) cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE; } + /* Make sure powerpc_base_platform is non-NULL */ + powerpc_base_platform = cur_cpu_spec->platform; + system_registers.lpcr = mfspr(SPRN_LPCR); system_registers.hfscr = mfspr(SPRN_HFSCR); system_registers.fscr = mfspr(SPRN_FSCR); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 3ac87e53b3da..1ecfd8ffb098 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -706,7 +706,7 @@ EXC_COMMON_BEGIN(bad_addr_slb) ld r3, PACA_EXSLB+EX_DAR(r13) std r3, _DAR(r1) beq cr6, 2f - li r10, 0x480 /* fix trap number for I-SLB miss */ + li r10, 0x481 /* fix trap number for I-SLB miss */ std r10, _TRAP(r1) 2: bl save_nvgprs addi r3, r1, STACK_FRAME_OVERHEAD diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index f88038847790..061aa0f47bb1 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -476,6 +476,14 @@ void force_external_irq_replay(void) */ WARN_ON(!arch_irqs_disabled()); + /* + * Interrupts must always be hard disabled before irq_happened is + * modified (to prevent lost update in case of interrupt between + * load and store). + */ + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + /* Indicate in the PACA that we have an interrupt to replay */ local_paca->irq_happened |= PACA_IRQ_EE; } diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 5cb4e4687107..5d9bafe9a371 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -157,6 +157,9 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr, asm volatile("ptesync": : :"memory"); asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1) : : "r" (addr), "r" (kvm->arch.lpid) : "memory"); + if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) + asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1) + : : "r" (addr), "r" (kvm->arch.lpid) : "memory"); asm volatile("ptesync": : :"memory"); } diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 8888e625a999..e1c083fbe434 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -473,6 +473,17 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, trace_tlbie(kvm->arch.lpid, 0, rbvalues[i], kvm->arch.lpid, 0, 0, 0); } + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { + /* + * Need the extra ptesync to make sure we don't + * re-order the tlbie + */ + asm volatile("ptesync": : :"memory"); + asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : + "r" (rbvalues[0]), "r" (kvm->arch.lpid)); + } + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); kvm->arch.tlbie_lock = 0; } else { diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index f31f357b8c5a..d33264697a31 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -320,7 +320,6 @@ kvm_novcpu_exit: stw r12, STACK_SLOT_TRAP(r1) bl kvmhv_commence_exit nop - lwz r12, STACK_SLOT_TRAP(r1) b kvmhv_switch_to_host /* @@ -1220,6 +1219,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) secondary_too_late: li r12, 0 + stw r12, STACK_SLOT_TRAP(r1) cmpdi r4, 0 beq 11f stw r12, VCPU_TRAP(r4) @@ -1558,12 +1558,12 @@ mc_cont: 3: stw r5,VCPU_SLB_MAX(r9) guest_bypass: + stw r12, STACK_SLOT_TRAP(r1) mr r3, r12 /* Increment exit count, poke other threads to exit */ bl kvmhv_commence_exit nop ld r9, HSTATE_KVM_VCPU(r13) - lwz r12, VCPU_TRAP(r9) /* Stop others sending VCPU interrupts to this physical CPU */ li r0, -1 @@ -1898,6 +1898,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1) * POWER7/POWER8 guest -> host partition switch code. * We don't have to lock against tlbies but we do * have to coordinate the hardware threads. + * Here STACK_SLOT_TRAP(r1) contains the trap number. */ kvmhv_switch_to_host: /* Secondary threads wait for primary to do partition switch */ @@ -1950,12 +1951,12 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) /* If HMI, call kvmppc_realmode_hmi_handler() */ + lwz r12, STACK_SLOT_TRAP(r1) cmpwi r12, BOOK3S_INTERRUPT_HMI bne 27f bl kvmppc_realmode_hmi_handler nop cmpdi r3, 0 - li r12, BOOK3S_INTERRUPT_HMI /* * At this point kvmppc_realmode_hmi_handler may have resync-ed * the TB, and if it has, we must not subtract the guest timebase @@ -2008,10 +2009,8 @@ BEGIN_FTR_SECTION lwz r8, KVM_SPLIT_DO_RESTORE(r3) cmpwi r8, 0 beq 47f - stw r12, STACK_SLOT_TRAP(r1) bl kvmhv_p9_restore_lpcr nop - lwz r12, STACK_SLOT_TRAP(r1) b 48f 47: END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) @@ -2049,6 +2048,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) li r0, KVM_GUEST_MODE_NONE stb r0, HSTATE_IN_GUEST(r13) + lwz r12, STACK_SLOT_TRAP(r1) /* return trap # in r12 */ ld r0, SFS+PPC_LR_STKOFF(r1) addi r1, r1, SFS mtlr r0 diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index a0675e91ad7d..656933c85925 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -201,6 +201,15 @@ static inline unsigned long ___tlbie(unsigned long vpn, int psize, return va; } +static inline void fixup_tlbie(unsigned long vpn, int psize, int apsize, int ssize) +{ + if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { + /* Need the extra ptesync to ensure we don't reorder tlbie*/ + asm volatile("ptesync": : :"memory"); + ___tlbie(vpn, psize, apsize, ssize); + } +} + static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) { unsigned long rb; @@ -278,6 +287,7 @@ static inline void tlbie(unsigned long vpn, int psize, int apsize, asm volatile("ptesync": : :"memory"); } else { __tlbie(vpn, psize, apsize, ssize); + fixup_tlbie(vpn, psize, apsize, ssize); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } if (lock_tlbie && !use_local) @@ -771,7 +781,7 @@ static void native_hpte_clear(void) */ static void native_flush_hash_range(unsigned long number, int local) { - unsigned long vpn; + unsigned long vpn = 0; unsigned long hash, index, hidx, shift, slot; struct hash_pte *hptep; unsigned long hpte_v; @@ -843,6 +853,10 @@ static void native_flush_hash_range(unsigned long number, int local) __tlbie(vpn, psize, psize, ssize); } pte_iterate_hashed_end(); } + /* + * Just do one more with the last used values. + */ + fixup_tlbie(vpn, psize, psize, ssize); asm volatile("eieio; tlbsync; ptesync":::"memory"); if (lock_tlbie) diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index 929d9ef7083f..3f980baade4c 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -173,6 +173,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) mm_iommu_init(mm); #endif atomic_set(&mm->context.active_cpus, 0); + atomic_set(&mm->context.copros, 0); return 0; } diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 28c980eb4422..adf469f312f2 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -481,6 +481,7 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0); } + /* do we need fixup here ?*/ asm volatile("eieio; tlbsync; ptesync" : : : "memory"); } EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry); diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 71d1b19ad1c0..a07f5372a4bf 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -119,6 +119,49 @@ static inline void __tlbie_pid(unsigned long pid, unsigned long ric) trace_tlbie(0, 0, rb, rs, ric, prs, r); } +static inline void __tlbiel_va(unsigned long va, unsigned long pid, + unsigned long ap, unsigned long ric) +{ + unsigned long rb,rs,prs,r; + + rb = va & ~(PPC_BITMASK(52, 63)); + rb |= ap << PPC_BITLSHIFT(58); + rs = pid << PPC_BITLSHIFT(31); + prs = 1; /* process scoped */ + r = 1; /* raidx format */ + + asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(0, 1, rb, rs, ric, prs, r); +} + +static inline void __tlbie_va(unsigned long va, unsigned long pid, + unsigned long ap, unsigned long ric) +{ + unsigned long rb,rs,prs,r; + + rb = va & ~(PPC_BITMASK(52, 63)); + rb |= ap << PPC_BITLSHIFT(58); + rs = pid << PPC_BITLSHIFT(31); + prs = 1; /* process scoped */ + r = 1; /* raidx format */ + + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(0, 0, rb, rs, ric, prs, r); +} + +static inline void fixup_tlbie(void) +{ + unsigned long pid = 0; + unsigned long va = ((1UL << 52) - 1); + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { + asm volatile("ptesync": : :"memory"); + __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); + } +} + /* * We use 128 set in radix mode and 256 set in hpt mode. */ @@ -151,24 +194,25 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) static inline void _tlbie_pid(unsigned long pid, unsigned long ric) { asm volatile("ptesync": : :"memory"); - __tlbie_pid(pid, ric); - asm volatile("eieio; tlbsync; ptesync": : :"memory"); -} -static inline void __tlbiel_va(unsigned long va, unsigned long pid, - unsigned long ap, unsigned long ric) -{ - unsigned long rb,rs,prs,r; - - rb = va & ~(PPC_BITMASK(52, 63)); - rb |= ap << PPC_BITLSHIFT(58); - rs = pid << PPC_BITLSHIFT(31); - prs = 1; /* process scoped */ - r = 1; /* raidx format */ - - asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) - : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); - trace_tlbie(0, 1, rb, rs, ric, prs, r); + /* + * Workaround the fact that the "ric" argument to __tlbie_pid + * must be a compile-time contraint to match the "i" constraint + * in the asm statement. + */ + switch (ric) { + case RIC_FLUSH_TLB: + __tlbie_pid(pid, RIC_FLUSH_TLB); + break; + case RIC_FLUSH_PWC: + __tlbie_pid(pid, RIC_FLUSH_PWC); + break; + case RIC_FLUSH_ALL: + default: + __tlbie_pid(pid, RIC_FLUSH_ALL); + } + fixup_tlbie(); + asm volatile("eieio; tlbsync; ptesync": : :"memory"); } static inline void __tlbiel_va_range(unsigned long start, unsigned long end, @@ -203,22 +247,6 @@ static inline void _tlbiel_va_range(unsigned long start, unsigned long end, asm volatile("ptesync": : :"memory"); } -static inline void __tlbie_va(unsigned long va, unsigned long pid, - unsigned long ap, unsigned long ric) -{ - unsigned long rb,rs,prs,r; - - rb = va & ~(PPC_BITMASK(52, 63)); - rb |= ap << PPC_BITLSHIFT(58); - rs = pid << PPC_BITLSHIFT(31); - prs = 1; /* process scoped */ - r = 1; /* raidx format */ - - asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) - : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); - trace_tlbie(0, 0, rb, rs, ric, prs, r); -} - static inline void __tlbie_va_range(unsigned long start, unsigned long end, unsigned long pid, unsigned long page_size, unsigned long psize) @@ -237,6 +265,7 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid, asm volatile("ptesync": : :"memory"); __tlbie_va(va, pid, ap, ric); + fixup_tlbie(); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } @@ -248,6 +277,7 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end, if (also_pwc) __tlbie_pid(pid, RIC_FLUSH_PWC); __tlbie_va_range(start, end, pid, page_size, psize); + fixup_tlbie(); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } @@ -311,6 +341,16 @@ void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmadd } EXPORT_SYMBOL(radix__local_flush_tlb_page); +static bool mm_needs_flush_escalation(struct mm_struct *mm) +{ + /* + * P9 nest MMU has issues with the page walk cache + * caching PTEs and not flushing them properly when + * RIC = 0 for a PID/LPID invalidate + */ + return atomic_read(&mm->context.copros) != 0; +} + #ifdef CONFIG_SMP void radix__flush_tlb_mm(struct mm_struct *mm) { @@ -321,9 +361,12 @@ void radix__flush_tlb_mm(struct mm_struct *mm) return; preempt_disable(); - if (!mm_is_thread_local(mm)) - _tlbie_pid(pid, RIC_FLUSH_TLB); - else + if (!mm_is_thread_local(mm)) { + if (mm_needs_flush_escalation(mm)) + _tlbie_pid(pid, RIC_FLUSH_ALL); + else + _tlbie_pid(pid, RIC_FLUSH_TLB); + } else _tlbiel_pid(pid, RIC_FLUSH_TLB); preempt_enable(); } @@ -435,10 +478,14 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, } if (full) { - if (local) + if (local) { _tlbiel_pid(pid, RIC_FLUSH_TLB); - else - _tlbie_pid(pid, RIC_FLUSH_TLB); + } else { + if (mm_needs_flush_escalation(mm)) + _tlbie_pid(pid, RIC_FLUSH_ALL); + else + _tlbie_pid(pid, RIC_FLUSH_TLB); + } } else { bool hflush = false; unsigned long hstart, hend; @@ -465,6 +512,7 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, if (hflush) __tlbie_va_range(hstart, hend, pid, HPAGE_PMD_SIZE, MMU_PAGE_2M); + fixup_tlbie(); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } } @@ -548,6 +596,9 @@ static inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, } if (full) { + if (!local && mm_needs_flush_escalation(mm)) + also_pwc = true; + if (local) _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); else @@ -603,46 +654,6 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa, - unsigned long page_size) -{ - unsigned long rb,rs,prs,r; - unsigned long ap; - unsigned long ric = RIC_FLUSH_TLB; - - ap = mmu_get_ap(radix_get_mmu_psize(page_size)); - rb = gpa & ~(PPC_BITMASK(52, 63)); - rb |= ap << PPC_BITLSHIFT(58); - rs = lpid & ((1UL << 32) - 1); - prs = 0; /* process scoped */ - r = 1; /* raidx format */ - - asm volatile("ptesync": : :"memory"); - asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) - : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); - asm volatile("eieio; tlbsync; ptesync": : :"memory"); - trace_tlbie(lpid, 0, rb, rs, ric, prs, r); -} -EXPORT_SYMBOL(radix__flush_tlb_lpid_va); - -void radix__flush_tlb_lpid(unsigned long lpid) -{ - unsigned long rb,rs,prs,r; - unsigned long ric = RIC_FLUSH_ALL; - - rb = 0x2 << PPC_BITLSHIFT(53); /* IS = 2 */ - rs = lpid & ((1UL << 32) - 1); - prs = 0; /* partition scoped */ - r = 1; /* raidx format */ - - asm volatile("ptesync": : :"memory"); - asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) - : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); - asm volatile("eieio; tlbsync; ptesync": : :"memory"); - trace_tlbie(lpid, 0, rb, rs, ric, prs, r); -} -EXPORT_SYMBOL(radix__flush_tlb_lpid); - void radix__flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index 847ddffbf38a..b5cfab711651 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -163,13 +163,10 @@ static void tlb_batch_pmd_scan(struct mm_struct *mm, unsigned long vaddr, pte_unmap(pte); } -void set_pmd_at(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp, pmd_t pmd) -{ - pmd_t orig = *pmdp; - - *pmdp = pmd; +static void __set_pmd_acct(struct mm_struct *mm, unsigned long addr, + pmd_t orig, pmd_t pmd) +{ if (mm == &init_mm) return; @@ -219,6 +216,15 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, } } +void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd) +{ + pmd_t orig = *pmdp; + + *pmdp = pmd; + __set_pmd_acct(mm, addr, orig, pmd); +} + static inline pmd_t pmdp_establish(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t pmd) { @@ -227,6 +233,7 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, do { old = *pmdp; } while (cmpxchg64(&pmdp->pmd, old.pmd, pmd.pmd) != old.pmd); + __set_pmd_acct(vma->vm_mm, address, old, pmd); return old; } diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 8b8d2297d486..638411f22267 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -315,19 +315,6 @@ config X86_L1_CACHE_SHIFT default "4" if MELAN || M486 || MGEODEGX1 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX -config X86_PPRO_FENCE - bool "PentiumPro memory ordering errata workaround" - depends on M686 || M586MMX || M586TSC || M586 || M486 || MGEODEGX1 - ---help--- - Old PentiumPro multiprocessor systems had errata that could cause - memory operations to violate the x86 ordering standard in rare cases. - Enabling this option will attempt to work around some (but not all) - occurrences of this problem, at the cost of much heavier spinlock and - memory barrier operations. - - If unsure, say n here. Even distro kernels should think twice before - enabling this: there are few systems, and an unlikely bug. - config X86_F00F_BUG def_bool y depends on M586MMX || M586TSC || M586 || M486 diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 498c1b812300..1c4d012550ec 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -223,6 +223,15 @@ KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) LDFLAGS := -m elf_$(UTS_MACHINE) +# +# The 64-bit kernel must be aligned to 2MB. Pass -z max-page-size=0x200000 to +# the linker to force 2MB page size regardless of the default page size used +# by the linker. +# +ifdef CONFIG_X86_64 +LDFLAGS += $(call ld-option, -z max-page-size=0x200000) +endif + # Speed up the build KBUILD_CFLAGS += -pipe # Workaround for a gcc prelease that unfortunately was shipped in a suse release diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 98761a1576ce..252fee320816 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -309,6 +309,10 @@ static void parse_elf(void *output) switch (phdr->p_type) { case PT_LOAD: +#ifdef CONFIG_X86_64 + if ((phdr->p_align % 0x200000) != 0) + error("Alignment of LOAD segment isn't multiple of 2MB"); +#endif #ifdef CONFIG_RELOCATABLE dest = output; dest += (phdr->p_paddr - LOAD_PHYSICAL_ADDR); diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 805f52703ee3..18ed349b4f83 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1138,7 +1138,7 @@ apicinterrupt3 HYPERV_REENLIGHTENMENT_VECTOR \ #endif /* CONFIG_HYPERV */ idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK -idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK +idtentry int3 do_int3 has_error_code=0 idtentry stack_segment do_stack_segment has_error_code=1 #ifdef CONFIG_XEN diff --git a/arch/x86/entry/vdso/vdso32/vclock_gettime.c b/arch/x86/entry/vdso/vdso32/vclock_gettime.c index 7780bbfb06ef..9242b28418d5 100644 --- a/arch/x86/entry/vdso/vdso32/vclock_gettime.c +++ b/arch/x86/entry/vdso/vdso32/vclock_gettime.c @@ -5,8 +5,6 @@ #undef CONFIG_OPTIMIZE_INLINING #endif -#undef CONFIG_X86_PPRO_FENCE - #ifdef CONFIG_X86_64 /* diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 8560ef68a9d6..317be365bce3 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -347,7 +347,7 @@ void __init set_vsyscall_pgtable_user_bits(pgd_t *root) set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER)); p4d = p4d_offset(pgd, VSYSCALL_ADDR); #if CONFIG_PGTABLE_LEVELS >= 5 - p4d->p4d |= _PAGE_USER; + set_p4d(p4d, __p4d(p4d_val(*p4d) | _PAGE_USER)); #endif pud = pud_offset(p4d, VSYSCALL_ADDR); set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER)); diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 140d33288e78..88797c80b3e0 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2118,7 +2118,8 @@ static int x86_pmu_event_init(struct perf_event *event) event->destroy(event); } - if (READ_ONCE(x86_pmu.attr_rdpmc)) + if (READ_ONCE(x86_pmu.attr_rdpmc) && + !(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS)) event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED; return err; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 56457cb73448..1e41d7508d99 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2952,9 +2952,9 @@ static void intel_pebs_aliases_skl(struct perf_event *event) return intel_pebs_aliases_precdist(event); } -static unsigned long intel_pmu_free_running_flags(struct perf_event *event) +static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event) { - unsigned long flags = x86_pmu.free_running_flags; + unsigned long flags = x86_pmu.large_pebs_flags; if (event->attr.use_clockid) flags &= ~PERF_SAMPLE_TIME; @@ -2976,8 +2976,8 @@ static int intel_pmu_hw_config(struct perf_event *event) if (!event->attr.freq) { event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD; if (!(event->attr.sample_type & - ~intel_pmu_free_running_flags(event))) - event->hw.flags |= PERF_X86_EVENT_FREERUNNING; + ~intel_pmu_large_pebs_flags(event))) + event->hw.flags |= PERF_X86_EVENT_LARGE_PEBS; } if (x86_pmu.pebs_aliases) x86_pmu.pebs_aliases(event); @@ -3194,7 +3194,7 @@ static unsigned bdw_limit_period(struct perf_event *event, unsigned left) X86_CONFIG(.event=0xc0, .umask=0x01)) { if (left < 128) left = 128; - left &= ~0x3fu; + left &= ~0x3fULL; } return left; } @@ -3460,7 +3460,7 @@ static __initconst const struct x86_pmu core_pmu = { .event_map = intel_pmu_event_map, .max_events = ARRAY_SIZE(intel_perfmon_event_map), .apic = 1, - .free_running_flags = PEBS_FREERUNNING_FLAGS, + .large_pebs_flags = LARGE_PEBS_FLAGS, /* * Intel PMCs cannot be accessed sanely above 32-bit width, @@ -3502,7 +3502,7 @@ static __initconst const struct x86_pmu intel_pmu = { .event_map = intel_pmu_event_map, .max_events = ARRAY_SIZE(intel_perfmon_event_map), .apic = 1, - .free_running_flags = PEBS_FREERUNNING_FLAGS, + .large_pebs_flags = LARGE_PEBS_FLAGS, /* * Intel PMCs cannot be accessed sanely above 32 bit width, * so we install an artificial 1<<31 period regardless of diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 18c25ab28557..d8015235ba76 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -935,7 +935,7 @@ void intel_pmu_pebs_add(struct perf_event *event) bool needed_cb = pebs_needs_sched_cb(cpuc); cpuc->n_pebs++; - if (hwc->flags & PERF_X86_EVENT_FREERUNNING) + if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS) cpuc->n_large_pebs++; pebs_update_state(needed_cb, cpuc, event->ctx->pmu); @@ -975,7 +975,7 @@ void intel_pmu_pebs_del(struct perf_event *event) bool needed_cb = pebs_needs_sched_cb(cpuc); cpuc->n_pebs--; - if (hwc->flags & PERF_X86_EVENT_FREERUNNING) + if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS) cpuc->n_large_pebs--; pebs_update_state(needed_cb, cpuc, event->ctx->pmu); @@ -1530,7 +1530,7 @@ void __init intel_ds_init(void) x86_pmu.pebs_record_size = sizeof(struct pebs_record_skl); x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; - x86_pmu.free_running_flags |= PERF_SAMPLE_TIME; + x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME; break; default: diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 22ec65bc033a..c98b943e58b4 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -3343,6 +3343,7 @@ static struct extra_reg skx_uncore_cha_extra_regs[] = { SNBEP_CBO_EVENT_EXTRA_REG(0x9134, 0xffff, 0x4), SNBEP_CBO_EVENT_EXTRA_REG(0x35, 0xff, 0x8), SNBEP_CBO_EVENT_EXTRA_REG(0x36, 0xff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x38, 0xff, 0x3), EVENT_EXTRA_END }; @@ -3562,24 +3563,27 @@ static struct intel_uncore_type *skx_msr_uncores[] = { NULL, }; +/* + * To determine the number of CHAs, it should read bits 27:0 in the CAPID6 + * register which located at Device 30, Function 3, Offset 0x9C. PCI ID 0x2083. + */ +#define SKX_CAPID6 0x9c +#define SKX_CHA_BIT_MASK GENMASK(27, 0) + static int skx_count_chabox(void) { - struct pci_dev *chabox_dev = NULL; - int bus, count = 0; + struct pci_dev *dev = NULL; + u32 val = 0; - while (1) { - chabox_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x208d, chabox_dev); - if (!chabox_dev) - break; - if (count == 0) - bus = chabox_dev->bus->number; - if (bus != chabox_dev->bus->number) - break; - count++; - } + dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x2083, dev); + if (!dev) + goto out; - pci_dev_put(chabox_dev); - return count; + pci_read_config_dword(dev, SKX_CAPID6, &val); + val &= SKX_CHA_BIT_MASK; +out: + pci_dev_put(dev); + return hweight32(val); } void skx_uncore_cpu_init(void) diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 78f91ec1056e..39cd0615f04f 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -69,7 +69,7 @@ struct event_constraint { #define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */ #define PERF_X86_EVENT_EXCL_ACCT 0x0200 /* accounted EXCL event */ #define PERF_X86_EVENT_AUTO_RELOAD 0x0400 /* use PEBS auto-reload */ -#define PERF_X86_EVENT_FREERUNNING 0x0800 /* use freerunning PEBS */ +#define PERF_X86_EVENT_LARGE_PEBS 0x0800 /* use large PEBS */ struct amd_nb { @@ -88,7 +88,7 @@ struct amd_nb { * REGS_USER can be handled for events limited to ring 3. * */ -#define PEBS_FREERUNNING_FLAGS \ +#define LARGE_PEBS_FLAGS \ (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \ PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \ PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \ @@ -608,7 +608,7 @@ struct x86_pmu { struct event_constraint *pebs_constraints; void (*pebs_aliases)(struct perf_event *event); int max_pebs_events; - unsigned long free_running_flags; + unsigned long large_pebs_flags; /* * Intel LBR diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index e1259f043ae9..042b5e892ed1 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -52,11 +52,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, #define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \ "lfence", X86_FEATURE_LFENCE_RDTSC) -#ifdef CONFIG_X86_PPRO_FENCE -#define dma_rmb() rmb() -#else #define dma_rmb() barrier() -#endif #define dma_wmb() barrier() #ifdef CONFIG_X86_32 @@ -68,30 +64,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, #define __smp_wmb() barrier() #define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0) -#if defined(CONFIG_X86_PPRO_FENCE) - -/* - * For this option x86 doesn't have a strong TSO memory - * model and we should fall back to full barriers. - */ - -#define __smp_store_release(p, v) \ -do { \ - compiletime_assert_atomic_type(*p); \ - __smp_mb(); \ - WRITE_ONCE(*p, v); \ -} while (0) - -#define __smp_load_acquire(p) \ -({ \ - typeof(*p) ___p1 = READ_ONCE(*p); \ - compiletime_assert_atomic_type(*p); \ - __smp_mb(); \ - ___p1; \ -}) - -#else /* regular x86 TSO memory ordering */ - #define __smp_store_release(p, v) \ do { \ compiletime_assert_atomic_type(*p); \ @@ -107,8 +79,6 @@ do { \ ___p1; \ }) -#endif - /* Atomic operations are already serializing on x86 */ #define __smp_mb__before_atomic() barrier() #define __smp_mb__after_atomic() barrier() diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index f41079da38c5..d554c11e01ff 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -316,6 +316,7 @@ #define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */ #define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */ #define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */ +#define X86_FEATURE_TME (16*32+13) /* Intel Total Memory Encryption */ #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ #define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ #define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ @@ -328,6 +329,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 95e948627fd0..f6e5b9375d8c 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -232,21 +232,6 @@ extern void set_iounmap_nonlazy(void); */ #define __ISA_IO_base ((char __iomem *)(PAGE_OFFSET)) -/* - * Cache management - * - * This needed for two cases - * 1. Out of order aware processors - * 2. Accidentally out of order processors (PPro errata #51) - */ - -static inline void flush_write_buffers(void) -{ -#if defined(CONFIG_X86_PPRO_FENCE) - asm volatile("lock; addl $0,0(%%esp)": : :"memory"); -#endif -} - #endif /* __KERNEL__ */ extern void native_io_delay(void); diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 7fb1047d61c7..6cf0e4cb7b97 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -39,6 +39,7 @@ struct device; enum ucode_state { UCODE_OK = 0, + UCODE_NEW, UCODE_UPDATED, UCODE_NFOUND, UCODE_ERROR, diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index d0dabeae0505..f928ad9b143f 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -183,7 +183,10 @@ * otherwise we'll run out of registers. We don't care about CET * here, anyway. */ -# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n", \ +# define CALL_NOSPEC \ + ALTERNATIVE( \ + ANNOTATE_RETPOLINE_SAFE \ + "call *%[thunk_target]\n", \ " jmp 904f;\n" \ " .align 16\n" \ "901: call 903f;\n" \ diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 8b6780751132..5db8b0b10766 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -352,6 +352,7 @@ enum vmcs_field { #define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */ #define INTR_TYPE_HARD_EXCEPTION (3 << 8) /* processor exception */ #define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */ +#define INTR_TYPE_PRIV_SW_EXCEPTION (5 << 8) /* ICE breakpoint - undocumented */ #define INTR_TYPE_SOFT_EXCEPTION (6 << 8) /* software exception */ /* GUEST_INTERRUPTIBILITY_INFO flags. */ diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 4aa9fd379390..c3af167d0a70 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -105,7 +105,7 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c) /* * Early microcode releases for the Spectre v2 mitigation were broken. * Information taken from; - * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf + * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/03/microcode-update-guidance.pdf * - https://kb.vmware.com/s/article/52345 * - Microcode revisions observed in the wild * - Release note from 20180108 microcode release @@ -123,7 +123,6 @@ static const struct sku_microcode spectre_bad_microcodes[] = { { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x80 }, { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, - { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 }, { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 }, diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index a998e1a7d46f..48179928ff38 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -339,7 +339,7 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax) return -EINVAL; ret = load_microcode_amd(true, x86_family(cpuid_1_eax), desc.data, desc.size); - if (ret != UCODE_OK) + if (ret > UCODE_UPDATED) return -EINVAL; return 0; @@ -683,27 +683,35 @@ static enum ucode_state __load_microcode_amd(u8 family, const u8 *data, static enum ucode_state load_microcode_amd(bool save, u8 family, const u8 *data, size_t size) { + struct ucode_patch *p; enum ucode_state ret; /* free old equiv table */ free_equiv_cpu_table(); ret = __load_microcode_amd(family, data, size); - - if (ret != UCODE_OK) + if (ret != UCODE_OK) { cleanup(); + return ret; + } -#ifdef CONFIG_X86_32 - /* save BSP's matching patch for early load */ - if (save) { - struct ucode_patch *p = find_patch(0); - if (p) { - memset(amd_ucode_patch, 0, PATCH_MAX_SIZE); - memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data), - PATCH_MAX_SIZE)); - } + p = find_patch(0); + if (!p) { + return ret; + } else { + if (boot_cpu_data.microcode == p->patch_id) + return ret; + + ret = UCODE_NEW; } -#endif + + /* save BSP's matching patch for early load */ + if (!save) + return ret; + + memset(amd_ucode_patch, 0, PATCH_MAX_SIZE); + memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data), PATCH_MAX_SIZE)); + return ret; } diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 70ecbc8099c9..10c4fc2c91f8 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -517,7 +517,29 @@ static int check_online_cpus(void) return -EINVAL; } -static atomic_t late_cpus; +static atomic_t late_cpus_in; +static atomic_t late_cpus_out; + +static int __wait_for_cpus(atomic_t *t, long long timeout) +{ + int all_cpus = num_online_cpus(); + + atomic_inc(t); + + while (atomic_read(t) < all_cpus) { + if (timeout < SPINUNIT) { + pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n", + all_cpus - atomic_read(t)); + return 1; + } + + ndelay(SPINUNIT); + timeout -= SPINUNIT; + + touch_nmi_watchdog(); + } + return 0; +} /* * Returns: @@ -527,30 +549,16 @@ static atomic_t late_cpus; */ static int __reload_late(void *info) { - unsigned int timeout = NSEC_PER_SEC; - int all_cpus = num_online_cpus(); int cpu = smp_processor_id(); enum ucode_state err; int ret = 0; - atomic_dec(&late_cpus); - /* * Wait for all CPUs to arrive. A load will not be attempted unless all * CPUs show up. * */ - while (atomic_read(&late_cpus)) { - if (timeout < SPINUNIT) { - pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n", - atomic_read(&late_cpus)); - return -1; - } - - ndelay(SPINUNIT); - timeout -= SPINUNIT; - - touch_nmi_watchdog(); - } + if (__wait_for_cpus(&late_cpus_in, NSEC_PER_SEC)) + return -1; spin_lock(&update_lock); apply_microcode_local(&err); @@ -558,15 +566,22 @@ static int __reload_late(void *info) if (err > UCODE_NFOUND) { pr_warn("Error reloading microcode on CPU %d\n", cpu); - ret = -1; - } else if (err == UCODE_UPDATED) { + return -1; + /* siblings return UCODE_OK because their engine got updated already */ + } else if (err == UCODE_UPDATED || err == UCODE_OK) { ret = 1; + } else { + return ret; } - atomic_inc(&late_cpus); - - while (atomic_read(&late_cpus) != all_cpus) - cpu_relax(); + /* + * Increase the wait timeout to a safe value here since we're + * serializing the microcode update and that could take a while on a + * large number of CPUs. And that is fine as the *actual* timeout will + * be determined by the last CPU finished updating and thus cut short. + */ + if (__wait_for_cpus(&late_cpus_out, NSEC_PER_SEC * num_online_cpus())) + panic("Timeout during microcode update!\n"); return ret; } @@ -579,12 +594,11 @@ static int microcode_reload_late(void) { int ret; - atomic_set(&late_cpus, num_online_cpus()); + atomic_set(&late_cpus_in, 0); + atomic_set(&late_cpus_out, 0); ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask); - if (ret < 0) - return ret; - else if (ret > 0) + if (ret > 0) microcode_check(); return ret; @@ -607,7 +621,7 @@ static ssize_t reload_store(struct device *dev, return size; tmp_ret = microcode_ops->request_microcode_fw(bsp, µcode_pdev->dev, true); - if (tmp_ret != UCODE_OK) + if (tmp_ret != UCODE_NEW) return size; get_online_cpus(); @@ -691,10 +705,8 @@ static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw) if (system_state != SYSTEM_RUNNING) return UCODE_NFOUND; - ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, - refresh_fw); - - if (ustate == UCODE_OK) { + ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, refresh_fw); + if (ustate == UCODE_NEW) { pr_debug("CPU%d updated upon init\n", cpu); apply_microcode_on_target(cpu); } diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 2aded9db1d42..32b8e5724f96 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -862,6 +862,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, unsigned int leftover = size; unsigned int curr_mc_size = 0, new_mc_size = 0; unsigned int csig, cpf; + enum ucode_state ret = UCODE_OK; while (leftover) { struct microcode_header_intel mc_header; @@ -903,6 +904,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, new_mc = mc; new_mc_size = mc_size; mc = NULL; /* trigger new vmalloc */ + ret = UCODE_NEW; } ucode_ptr += mc_size; @@ -932,7 +934,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", cpu, new_rev, uci->cpu_sig.rev); - return UCODE_OK; + return ret; } static int get_ucode_fw(void *to, const void *from, size_t n) diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 56d99be3706a..50bee5fe1140 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -160,7 +160,6 @@ static const __initconst struct idt_data early_pf_idts[] = { */ static const __initconst struct idt_data dbg_idts[] = { INTG(X86_TRAP_DB, debug), - INTG(X86_TRAP_BP, int3), }; #endif @@ -183,7 +182,6 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss; static const __initconst struct idt_data ist_idts[] = { ISTG(X86_TRAP_DB, debug, DEBUG_STACK), ISTG(X86_TRAP_NMI, nmi, NMI_STACK), - SISTG(X86_TRAP_BP, int3, DEBUG_STACK), ISTG(X86_TRAP_DF, double_fault, DOUBLEFAULT_STACK), #ifdef CONFIG_X86_MCE ISTG(X86_TRAP_MC, &machine_check, MCE_STACK), diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 618285e475c6..ac7ea3a8242f 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -37,7 +37,6 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page, WARN_ON(size == 0); if (!check_addr("map_single", dev, bus, size)) return NOMMU_MAPPING_ERROR; - flush_write_buffers(); return bus; } @@ -72,25 +71,9 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, return 0; s->dma_length = s->length; } - flush_write_buffers(); return nents; } -static void nommu_sync_single_for_device(struct device *dev, - dma_addr_t addr, size_t size, - enum dma_data_direction dir) -{ - flush_write_buffers(); -} - - -static void nommu_sync_sg_for_device(struct device *dev, - struct scatterlist *sg, int nelems, - enum dma_data_direction dir) -{ - flush_write_buffers(); -} - static int nommu_mapping_error(struct device *dev, dma_addr_t dma_addr) { return dma_addr == NOMMU_MAPPING_ERROR; @@ -101,8 +84,6 @@ const struct dma_map_ops nommu_dma_ops = { .free = dma_generic_free_coherent, .map_sg = nommu_map_sg, .map_page = nommu_map_page, - .sync_single_for_device = nommu_sync_single_for_device, - .sync_sg_for_device = nommu_sync_sg_for_device, .is_phys = 1, .mapping_error = nommu_mapping_error, .dma_supported = x86_dma_supported, diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 3d9b2308e7fa..03f3d7695dac 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -577,7 +577,6 @@ do_general_protection(struct pt_regs *regs, long error_code) } NOKPROBE_SYMBOL(do_general_protection); -/* May run on IST stack. */ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) { #ifdef CONFIG_DYNAMIC_FTRACE @@ -592,6 +591,13 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) if (poke_int3_handler(regs)) return; + /* + * Use ist_enter despite the fact that we don't use an IST stack. + * We can be called from a kprobe in non-CONTEXT_KERNEL kernel + * mode or even during context tracking state changes. + * + * This means that we can't schedule. That's okay. + */ ist_enter(regs); RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP @@ -609,15 +615,10 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) SIGTRAP) == NOTIFY_STOP) goto exit; - /* - * Let others (NMI) know that the debug stack is in use - * as we may switch to the interrupt stack. - */ - debug_stack_usage_inc(); cond_local_irq_enable(regs); do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); cond_local_irq_disable(regs); - debug_stack_usage_dec(); + exit: ist_exit(regs); } diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 5edb27f1a2c4..9d0b5af7db91 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -727,7 +727,8 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) return; check_vip: - if (VEFLAGS & X86_EFLAGS_VIP) { + if ((VEFLAGS & (X86_EFLAGS_VIP | X86_EFLAGS_VIF)) == + (X86_EFLAGS_VIP | X86_EFLAGS_VIF)) { save_v86_state(regs, VM86_STI); return; } diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f551962ac294..763bb3bade63 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2770,8 +2770,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, else pte_access &= ~ACC_WRITE_MASK; + if (!kvm_is_mmio_pfn(pfn)) + spte |= shadow_me_mask; + spte |= (u64)pfn << PAGE_SHIFT; - spte |= shadow_me_mask; if (pte_access & ACC_WRITE_MASK) { diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 051dab74e4e9..2d87603f9179 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1045,6 +1045,13 @@ static inline bool is_machine_check(u32 intr_info) (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK); } +/* Undocumented: icebp/int1 */ +static inline bool is_icebp(u32 intr_info) +{ + return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) + == (INTR_TYPE_PRIV_SW_EXCEPTION | INTR_INFO_VALID_MASK); +} + static inline bool cpu_has_vmx_msr_bitmap(void) { return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS; @@ -6179,7 +6186,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { vcpu->arch.dr6 &= ~15; vcpu->arch.dr6 |= dr6 | DR6_RTM; - if (!(dr6 & ~DR6_RESERVED)) /* icebp */ + if (is_icebp(intr_info)) skip_emulated_instruction(vcpu); kvm_queue_exception(vcpu, DB_VECTOR); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index c88573d90f3e..25a30b5d6582 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -330,7 +330,7 @@ static noinline int vmalloc_fault(unsigned long address) if (!pmd_k) return -1; - if (pmd_huge(*pmd_k)) + if (pmd_large(*pmd_k)) return 0; pte_k = pte_offset_kernel(pmd_k, address); @@ -475,7 +475,7 @@ static noinline int vmalloc_fault(unsigned long address) if (pud_none(*pud) || pud_pfn(*pud) != pud_pfn(*pud_ref)) BUG(); - if (pud_huge(*pud)) + if (pud_large(*pud)) return 0; pmd = pmd_offset(pud, address); @@ -486,7 +486,7 @@ static noinline int vmalloc_fault(unsigned long address) if (pmd_none(*pmd) || pmd_pfn(*pmd) != pmd_pfn(*pmd_ref)) BUG(); - if (pmd_huge(*pmd)) + if (pmd_large(*pmd)) return 0; pte_ref = pte_offset_kernel(pmd_ref, address); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 8b72923f1d35..af11a2890235 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -800,17 +800,11 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, #define PAGE_INUSE 0xFD -static void __meminit free_pagetable(struct page *page, int order, - struct vmem_altmap *altmap) +static void __meminit free_pagetable(struct page *page, int order) { unsigned long magic; unsigned int nr_pages = 1 << order; - if (altmap) { - vmem_altmap_free(altmap, nr_pages); - return; - } - /* bootmem page has reserved flag */ if (PageReserved(page)) { __ClearPageReserved(page); @@ -826,9 +820,17 @@ static void __meminit free_pagetable(struct page *page, int order, free_pages((unsigned long)page_address(page), order); } -static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd, +static void __meminit free_hugepage_table(struct page *page, struct vmem_altmap *altmap) { + if (altmap) + vmem_altmap_free(altmap, PMD_SIZE / PAGE_SIZE); + else + free_pagetable(page, get_order(PMD_SIZE)); +} + +static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) +{ pte_t *pte; int i; @@ -839,14 +841,13 @@ static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd, } /* free a pte talbe */ - free_pagetable(pmd_page(*pmd), 0, altmap); + free_pagetable(pmd_page(*pmd), 0); spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); spin_unlock(&init_mm.page_table_lock); } -static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud, - struct vmem_altmap *altmap) +static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) { pmd_t *pmd; int i; @@ -858,14 +859,13 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud, } /* free a pmd talbe */ - free_pagetable(pud_page(*pud), 0, altmap); + free_pagetable(pud_page(*pud), 0); spin_lock(&init_mm.page_table_lock); pud_clear(pud); spin_unlock(&init_mm.page_table_lock); } -static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d, - struct vmem_altmap *altmap) +static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) { pud_t *pud; int i; @@ -877,7 +877,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d, } /* free a pud talbe */ - free_pagetable(p4d_page(*p4d), 0, altmap); + free_pagetable(p4d_page(*p4d), 0); spin_lock(&init_mm.page_table_lock); p4d_clear(p4d); spin_unlock(&init_mm.page_table_lock); @@ -885,7 +885,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d, static void __meminit remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, - struct vmem_altmap *altmap, bool direct) + bool direct) { unsigned long next, pages = 0; pte_t *pte; @@ -916,7 +916,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, * freed when offlining, or simplely not in use. */ if (!direct) - free_pagetable(pte_page(*pte), 0, altmap); + free_pagetable(pte_page(*pte), 0); spin_lock(&init_mm.page_table_lock); pte_clear(&init_mm, addr, pte); @@ -939,7 +939,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, page_addr = page_address(pte_page(*pte)); if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) { - free_pagetable(pte_page(*pte), 0, altmap); + free_pagetable(pte_page(*pte), 0); spin_lock(&init_mm.page_table_lock); pte_clear(&init_mm, addr, pte); @@ -974,9 +974,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, if (IS_ALIGNED(addr, PMD_SIZE) && IS_ALIGNED(next, PMD_SIZE)) { if (!direct) - free_pagetable(pmd_page(*pmd), - get_order(PMD_SIZE), - altmap); + free_hugepage_table(pmd_page(*pmd), + altmap); spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); @@ -989,9 +988,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, page_addr = page_address(pmd_page(*pmd)); if (!memchr_inv(page_addr, PAGE_INUSE, PMD_SIZE)) { - free_pagetable(pmd_page(*pmd), - get_order(PMD_SIZE), - altmap); + free_hugepage_table(pmd_page(*pmd), + altmap); spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); @@ -1003,8 +1001,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, } pte_base = (pte_t *)pmd_page_vaddr(*pmd); - remove_pte_table(pte_base, addr, next, altmap, direct); - free_pte_table(pte_base, pmd, altmap); + remove_pte_table(pte_base, addr, next, direct); + free_pte_table(pte_base, pmd); } /* Call free_pmd_table() in remove_pud_table(). */ @@ -1033,8 +1031,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, IS_ALIGNED(next, PUD_SIZE)) { if (!direct) free_pagetable(pud_page(*pud), - get_order(PUD_SIZE), - altmap); + get_order(PUD_SIZE)); spin_lock(&init_mm.page_table_lock); pud_clear(pud); @@ -1048,8 +1045,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, if (!memchr_inv(page_addr, PAGE_INUSE, PUD_SIZE)) { free_pagetable(pud_page(*pud), - get_order(PUD_SIZE), - altmap); + get_order(PUD_SIZE)); spin_lock(&init_mm.page_table_lock); pud_clear(pud); @@ -1062,7 +1058,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, pmd_base = pmd_offset(pud, 0); remove_pmd_table(pmd_base, addr, next, direct, altmap); - free_pmd_table(pmd_base, pud, altmap); + free_pmd_table(pmd_base, pud); } if (direct) @@ -1094,7 +1090,7 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end, * to adapt for boot-time switching between 4 and 5 level page tables. */ if (CONFIG_PGTABLE_LEVELS == 5) - free_pud_table(pud_base, p4d, altmap); + free_pud_table(pud_base, p4d); } if (direct) diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 004abf9ebf12..34cda7e0551b 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -702,4 +702,52 @@ int pmd_clear_huge(pmd_t *pmd) return 0; } + +/** + * pud_free_pmd_page - Clear pud entry and free pmd page. + * @pud: Pointer to a PUD. + * + * Context: The pud range has been unmaped and TLB purged. + * Return: 1 if clearing the entry succeeded. 0 otherwise. + */ +int pud_free_pmd_page(pud_t *pud) +{ + pmd_t *pmd; + int i; + + if (pud_none(*pud)) + return 1; + + pmd = (pmd_t *)pud_page_vaddr(*pud); + + for (i = 0; i < PTRS_PER_PMD; i++) + if (!pmd_free_pte_page(&pmd[i])) + return 0; + + pud_clear(pud); + free_page((unsigned long)pmd); + + return 1; +} + +/** + * pmd_free_pte_page - Clear pmd entry and free pte page. + * @pmd: Pointer to a PMD. + * + * Context: The pmd range has been unmaped and TLB purged. + * Return: 1 if clearing the entry succeeded. 0 otherwise. + */ +int pmd_free_pte_page(pmd_t *pmd) +{ + pte_t *pte; + + if (pmd_none(*pmd)) + return 1; + + pte = (pte_t *)pmd_page_vaddr(*pmd); + pmd_clear(pmd); + free_page((unsigned long)pte); + + return 1; +} #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 45e4eb5bcbb2..ce5b2ebd5701 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1188,7 +1188,7 @@ skip_init_addrs: * may converge on the last pass. In such case do one more * pass to emit the final image */ - for (pass = 0; pass < 10 || image; pass++) { + for (pass = 0; pass < 20 || image; pass++) { proglen = do_jit(prog, addrs, image, oldproglen, &ctx); if (proglen <= 0) { image = NULL; @@ -1215,6 +1215,7 @@ skip_init_addrs: } } oldproglen = proglen; + cond_resched(); } if (bpf_jit_enable > 1) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index c310a8284358..f9cfbc0d1f33 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -227,7 +227,7 @@ int __init efi_alloc_page_tables(void) if (!pud) { if (CONFIG_PGTABLE_LEVELS > 4) free_page((unsigned long) pgd_page_vaddr(*pgd)); - free_page((unsigned long)efi_pgd); + free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER); return -ENOMEM; } diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h index b7d73400ea29..f31e5d903161 100644 --- a/arch/x86/um/asm/barrier.h +++ b/arch/x86/um/asm/barrier.h @@ -30,11 +30,7 @@ #endif /* CONFIG_X86_32 */ -#ifdef CONFIG_X86_PPRO_FENCE -#define dma_rmb() rmb() -#else /* CONFIG_X86_PPRO_FENCE */ #define dma_rmb() barrier() -#endif /* CONFIG_X86_PPRO_FENCE */ #define dma_wmb() barrier() #include <asm-generic/barrier.h> |