diff options
Diffstat (limited to 'arch')
718 files changed, 28008 insertions, 9762 deletions
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index 4341ccf5c0c4..e7a59d927d78 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -824,7 +824,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr, if (unlikely(la_ptr >= alpha_pmu->num_pmcs)) { /* This should never occur! */ irq_err_count++; - pr_warning("PMI: silly index %ld\n", la_ptr); + pr_warn("PMI: silly index %ld\n", la_ptr); wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask); return; } @@ -847,7 +847,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr, if (unlikely(!event)) { /* This should never occur! */ irq_err_count++; - pr_warning("PMI: No event at index %d!\n", idx); + pr_warn("PMI: No event at index %d!\n", idx); wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask); return; } diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index bfc7f5f5d6f2..9acbeba832c0 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -65,6 +65,14 @@ clock-frequency = <33333333>; }; + reg_5v0: regulator-5v0 { + compatible = "regulator-fixed"; + + regulator-name = "5v0-supply"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + }; + cpu_intc: cpu-interrupt-controller { compatible = "snps,archs-intc"; interrupt-controller; @@ -264,6 +272,21 @@ clocks = <&input_clk>; cs-gpios = <&creg_gpio 0 GPIO_ACTIVE_LOW>, <&creg_gpio 1 GPIO_ACTIVE_LOW>; + + spi-flash@0 { + compatible = "sst26wf016b", "jedec,spi-nor"; + reg = <0>; + #address-cells = <1>; + #size-cells = <1>; + spi-max-frequency = <4000000>; + }; + + adc@1 { + compatible = "ti,adc108s102"; + reg = <1>; + vref-supply = <®_5v0>; + spi-max-frequency = <1000000>; + }; }; creg_gpio: gpio@14b0 { diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig index 9b9a74444ce2..0974226fab55 100644 --- a/arch/arc/configs/hsdk_defconfig +++ b/arch/arc/configs/hsdk_defconfig @@ -32,6 +32,8 @@ CONFIG_INET=y CONFIG_DEVTMPFS=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set +CONFIG_MTD=y +CONFIG_MTD_SPI_NOR=y CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_NETDEVICES=y @@ -55,6 +57,8 @@ CONFIG_GPIO_SYSFS=y CONFIG_GPIO_DWAPB=y CONFIG_GPIO_SNPS_CREG=y # CONFIG_HWMON is not set +CONFIG_REGULATOR=y +CONFIG_REGULATOR_FIXED_VOLTAGE=y CONFIG_DRM=y # CONFIG_DRM_FBDEV_EMULATION is not set CONFIG_DRM_UDL=y @@ -72,6 +76,8 @@ CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_DW=y CONFIG_DMADEVICES=y CONFIG_DW_AXI_DMAC=y +CONFIG_IIO=y +CONFIG_TI_ADC108S102=y CONFIG_EXT3_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index 861a8aea51f9..661fd842ea97 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -614,8 +614,8 @@ static int arc_pmu_device_probe(struct platform_device *pdev) /* loop thru all available h/w condition indexes */ for (i = 0; i < cc_bcr.c; i++) { write_aux_reg(ARC_REG_CC_INDEX, i); - cc_name.indiv.word0 = read_aux_reg(ARC_REG_CC_NAME0); - cc_name.indiv.word1 = read_aux_reg(ARC_REG_CC_NAME1); + cc_name.indiv.word0 = le32_to_cpu(read_aux_reg(ARC_REG_CC_NAME0)); + cc_name.indiv.word1 = le32_to_cpu(read_aux_reg(ARC_REG_CC_NAME1)); arc_pmu_map_hw_event(i, cc_name.str); arc_pmu_add_raw_event_attr(i, cc_name.str); diff --git a/arch/arm/boot/dts/am335x-icev2.dts b/arch/arm/boot/dts/am335x-icev2.dts index 18f70b35da4c..204bccfcc110 100644 --- a/arch/arm/boot/dts/am335x-icev2.dts +++ b/arch/arm/boot/dts/am335x-icev2.dts @@ -432,7 +432,7 @@ pinctrl-0 = <&mmc0_pins_default>; }; -&gpio0 { +&gpio0_target { /* Do not idle the GPIO used for holding the VTT regulator */ ti,no-reset-on-init; ti,no-idle-on-init; diff --git a/arch/arm/boot/dts/am33xx-l4.dtsi b/arch/arm/boot/dts/am33xx-l4.dtsi index 9915c891e05f..7a9eb2b0d45b 100644 --- a/arch/arm/boot/dts/am33xx-l4.dtsi +++ b/arch/arm/boot/dts/am33xx-l4.dtsi @@ -127,7 +127,7 @@ ranges = <0x0 0x5000 0x1000>; }; - target-module@7000 { /* 0x44e07000, ap 14 20.0 */ + gpio0_target: target-module@7000 { /* 0x44e07000, ap 14 20.0 */ compatible = "ti,sysc-omap2", "ti,sysc"; ti,hwmods = "gpio1"; reg = <0x7000 0x4>, @@ -2038,7 +2038,9 @@ reg = <0xe000 0x4>, <0xe054 0x4>; reg-names = "rev", "sysc"; - ti,sysc-midle ; + ti,sysc-midle = <SYSC_IDLE_FORCE>, + <SYSC_IDLE_NO>, + <SYSC_IDLE_SMART>; ti,sysc-sidle = <SYSC_IDLE_FORCE>, <SYSC_IDLE_NO>, <SYSC_IDLE_SMART>; diff --git a/arch/arm/boot/dts/am3874-iceboard.dts b/arch/arm/boot/dts/am3874-iceboard.dts index 883fb85135d4..1b4b2b0500e4 100644 --- a/arch/arm/boot/dts/am3874-iceboard.dts +++ b/arch/arm/boot/dts/am3874-iceboard.dts @@ -111,13 +111,13 @@ reg = <0x70>; #address-cells = <1>; #size-cells = <0>; + i2c-mux-idle-disconnect; i2c@0 { /* FMC A */ #address-cells = <1>; #size-cells = <0>; reg = <0>; - i2c-mux-idle-disconnect; }; i2c@1 { @@ -125,7 +125,6 @@ #address-cells = <1>; #size-cells = <0>; reg = <1>; - i2c-mux-idle-disconnect; }; i2c@2 { @@ -133,7 +132,6 @@ #address-cells = <1>; #size-cells = <0>; reg = <2>; - i2c-mux-idle-disconnect; }; i2c@3 { @@ -141,7 +139,6 @@ #address-cells = <1>; #size-cells = <0>; reg = <3>; - i2c-mux-idle-disconnect; }; i2c@4 { @@ -149,14 +146,12 @@ #address-cells = <1>; #size-cells = <0>; reg = <4>; - i2c-mux-idle-disconnect; }; i2c@5 { #address-cells = <1>; #size-cells = <0>; reg = <5>; - i2c-mux-idle-disconnect; ina230@40 { compatible = "ti,ina230"; reg = <0x40>; shunt-resistor = <5000>; }; ina230@41 { compatible = "ti,ina230"; reg = <0x41>; shunt-resistor = <5000>; }; @@ -182,14 +177,12 @@ #address-cells = <1>; #size-cells = <0>; reg = <6>; - i2c-mux-idle-disconnect; }; i2c@7 { #address-cells = <1>; #size-cells = <0>; reg = <7>; - i2c-mux-idle-disconnect; u41: pca9575@20 { compatible = "nxp,pca9575"; diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi index 848e2a8884e2..14bbc438055f 100644 --- a/arch/arm/boot/dts/am4372.dtsi +++ b/arch/arm/boot/dts/am4372.dtsi @@ -337,6 +337,8 @@ ti,hwmods = "dss_dispc"; clocks = <&disp_clk>; clock-names = "fck"; + + max-memory-bandwidth = <230000000>; }; rfbi: rfbi@4832a800 { diff --git a/arch/arm/boot/dts/am571x-idk.dts b/arch/arm/boot/dts/am571x-idk.dts index 0aaacea1d887..820ce3b60bb6 100644 --- a/arch/arm/boot/dts/am571x-idk.dts +++ b/arch/arm/boot/dts/am571x-idk.dts @@ -186,3 +186,30 @@ pinctrl-1 = <&mmc2_pins_hs>; pinctrl-2 = <&mmc2_pins_ddr_rev20 &mmc2_iodelay_ddr_conf>; }; + +&mac_sw { + pinctrl-names = "default", "sleep"; + status = "okay"; +}; + +&cpsw_port1 { + phy-handle = <ðphy0_sw>; + phy-mode = "rgmii"; + ti,dual-emac-pvid = <1>; +}; + +&cpsw_port2 { + phy-handle = <ðphy1_sw>; + phy-mode = "rgmii"; + ti,dual-emac-pvid = <2>; +}; + +&davinci_mdio_sw { + ethphy0_sw: ethernet-phy@0 { + reg = <0>; + }; + + ethphy1_sw: ethernet-phy@1 { + reg = <1>; + }; +}; diff --git a/arch/arm/boot/dts/am572x-idk.dts b/arch/arm/boot/dts/am572x-idk.dts index ea1c119feaa5..c3d966904d64 100644 --- a/arch/arm/boot/dts/am572x-idk.dts +++ b/arch/arm/boot/dts/am572x-idk.dts @@ -27,3 +27,8 @@ pinctrl-1 = <&mmc2_pins_hs>; pinctrl-2 = <&mmc2_pins_ddr_rev20>; }; + +&mac { + status = "okay"; + dual_emac; +}; diff --git a/arch/arm/boot/dts/am574x-idk.dts b/arch/arm/boot/dts/am574x-idk.dts index 7935d70874ce..fa0088025b2c 100644 --- a/arch/arm/boot/dts/am574x-idk.dts +++ b/arch/arm/boot/dts/am574x-idk.dts @@ -35,3 +35,8 @@ pinctrl-1 = <&mmc2_pins_default>; pinctrl-2 = <&mmc2_pins_default>; }; + +&mac { + status = "okay"; + dual_emac; +}; diff --git a/arch/arm/boot/dts/am57xx-idk-common.dtsi b/arch/arm/boot/dts/am57xx-idk-common.dtsi index 423855a2a2d6..398721c7201c 100644 --- a/arch/arm/boot/dts/am57xx-idk-common.dtsi +++ b/arch/arm/boot/dts/am57xx-idk-common.dtsi @@ -363,11 +363,6 @@ ext-clk-src; }; -&mac { - status = "okay"; - dual_emac; -}; - &cpsw_emac0 { phy-handle = <ðphy0>; phy-mode = "rgmii"; diff --git a/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts b/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts index 09a088f98566..b75af21069f9 100644 --- a/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts +++ b/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts @@ -113,6 +113,7 @@ #address-cells = <1>; #size-cells = <0>; pinctrl-0 = <&emmc_gpio34 &gpclk2_gpio43>; + bus-width = <4>; mmc-pwrseq = <&wifi_pwrseq>; non-removable; status = "okay"; diff --git a/arch/arm/boot/dts/bcm2837-rpi-cm3.dtsi b/arch/arm/boot/dts/bcm2837-rpi-cm3.dtsi index 7c3cb7ece6cb..925cb37c22f0 100644 --- a/arch/arm/boot/dts/bcm2837-rpi-cm3.dtsi +++ b/arch/arm/boot/dts/bcm2837-rpi-cm3.dtsi @@ -9,6 +9,14 @@ reg = <0 0x40000000>; }; + leds { + /* + * Since there is no upstream GPIO driver yet, + * remove the incomplete node. + */ + /delete-node/ act; + }; + reg_3v3: fixed-regulator { compatible = "regulator-fixed"; regulator-name = "3V3"; diff --git a/arch/arm/boot/dts/dra7-l4.dtsi b/arch/arm/boot/dts/dra7-l4.dtsi index ea0e7c19eb4e..37e048771b0f 100644 --- a/arch/arm/boot/dts/dra7-l4.dtsi +++ b/arch/arm/boot/dts/dra7-l4.dtsi @@ -2732,7 +2732,7 @@ interrupt-names = "tx", "rx"; dmas = <&edma_xbar 129 1>, <&edma_xbar 128 1>; dma-names = "tx", "rx"; - clocks = <&ipu_clkctrl DRA7_IPU_MCASP1_CLKCTRL 22>, + clocks = <&ipu_clkctrl DRA7_IPU_MCASP1_CLKCTRL 0>, <&ipu_clkctrl DRA7_IPU_MCASP1_CLKCTRL 24>, <&ipu_clkctrl DRA7_IPU_MCASP1_CLKCTRL 28>; clock-names = "fck", "ahclkx", "ahclkr"; @@ -2768,8 +2768,8 @@ interrupt-names = "tx", "rx"; dmas = <&edma_xbar 131 1>, <&edma_xbar 130 1>; dma-names = "tx", "rx"; - clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP2_CLKCTRL 22>, - <&l4per2_clkctrl DRA7_L4PER2_MCASP2_CLKCTRL 24>, + clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP2_CLKCTRL 0>, + <&ipu_clkctrl DRA7_IPU_MCASP1_CLKCTRL 24>, <&l4per2_clkctrl DRA7_L4PER2_MCASP2_CLKCTRL 28>; clock-names = "fck", "ahclkx", "ahclkr"; status = "disabled"; @@ -2786,9 +2786,8 @@ <SYSC_IDLE_SMART>; /* Domains (P, C): l4per_pwrdm, l4per2_clkdm */ clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP3_CLKCTRL 0>, - <&l4per2_clkctrl DRA7_L4PER2_MCASP3_CLKCTRL 24>, - <&l4per2_clkctrl DRA7_L4PER2_MCASP3_CLKCTRL 28>; - clock-names = "fck", "ahclkx", "ahclkr"; + <&l4per2_clkctrl DRA7_L4PER2_MCASP3_CLKCTRL 24>; + clock-names = "fck", "ahclkx"; #address-cells = <1>; #size-cells = <1>; ranges = <0x0 0x68000 0x2000>, @@ -2804,7 +2803,7 @@ interrupt-names = "tx", "rx"; dmas = <&edma_xbar 133 1>, <&edma_xbar 132 1>; dma-names = "tx", "rx"; - clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP3_CLKCTRL 22>, + clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP3_CLKCTRL 0>, <&l4per2_clkctrl DRA7_L4PER2_MCASP3_CLKCTRL 24>; clock-names = "fck", "ahclkx"; status = "disabled"; @@ -2821,9 +2820,8 @@ <SYSC_IDLE_SMART>; /* Domains (P, C): l4per_pwrdm, l4per2_clkdm */ clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP4_CLKCTRL 0>, - <&l4per2_clkctrl DRA7_L4PER2_MCASP4_CLKCTRL 24>, - <&l4per2_clkctrl DRA7_L4PER2_MCASP4_CLKCTRL 28>; - clock-names = "fck", "ahclkx", "ahclkr"; + <&l4per2_clkctrl DRA7_L4PER2_MCASP4_CLKCTRL 24>; + clock-names = "fck", "ahclkx"; #address-cells = <1>; #size-cells = <1>; ranges = <0x0 0x6c000 0x2000>, @@ -2839,7 +2837,7 @@ interrupt-names = "tx", "rx"; dmas = <&edma_xbar 135 1>, <&edma_xbar 134 1>; dma-names = "tx", "rx"; - clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP4_CLKCTRL 22>, + clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP4_CLKCTRL 0>, <&l4per2_clkctrl DRA7_L4PER2_MCASP4_CLKCTRL 24>; clock-names = "fck", "ahclkx"; status = "disabled"; @@ -2856,9 +2854,8 @@ <SYSC_IDLE_SMART>; /* Domains (P, C): l4per_pwrdm, l4per2_clkdm */ clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP5_CLKCTRL 0>, - <&l4per2_clkctrl DRA7_L4PER2_MCASP5_CLKCTRL 24>, - <&l4per2_clkctrl DRA7_L4PER2_MCASP5_CLKCTRL 28>; - clock-names = "fck", "ahclkx", "ahclkr"; + <&l4per2_clkctrl DRA7_L4PER2_MCASP5_CLKCTRL 24>; + clock-names = "fck", "ahclkx"; #address-cells = <1>; #size-cells = <1>; ranges = <0x0 0x70000 0x2000>, @@ -2874,7 +2871,7 @@ interrupt-names = "tx", "rx"; dmas = <&edma_xbar 137 1>, <&edma_xbar 136 1>; dma-names = "tx", "rx"; - clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP5_CLKCTRL 22>, + clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP5_CLKCTRL 0>, <&l4per2_clkctrl DRA7_L4PER2_MCASP5_CLKCTRL 24>; clock-names = "fck", "ahclkx"; status = "disabled"; @@ -2891,9 +2888,8 @@ <SYSC_IDLE_SMART>; /* Domains (P, C): l4per_pwrdm, l4per2_clkdm */ clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP6_CLKCTRL 0>, - <&l4per2_clkctrl DRA7_L4PER2_MCASP6_CLKCTRL 24>, - <&l4per2_clkctrl DRA7_L4PER2_MCASP6_CLKCTRL 28>; - clock-names = "fck", "ahclkx", "ahclkr"; + <&l4per2_clkctrl DRA7_L4PER2_MCASP6_CLKCTRL 24>; + clock-names = "fck", "ahclkx"; #address-cells = <1>; #size-cells = <1>; ranges = <0x0 0x74000 0x2000>, @@ -2909,7 +2905,7 @@ interrupt-names = "tx", "rx"; dmas = <&edma_xbar 139 1>, <&edma_xbar 138 1>; dma-names = "tx", "rx"; - clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP6_CLKCTRL 22>, + clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP6_CLKCTRL 0>, <&l4per2_clkctrl DRA7_L4PER2_MCASP6_CLKCTRL 24>; clock-names = "fck", "ahclkx"; status = "disabled"; @@ -2926,9 +2922,8 @@ <SYSC_IDLE_SMART>; /* Domains (P, C): l4per_pwrdm, l4per2_clkdm */ clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP7_CLKCTRL 0>, - <&l4per2_clkctrl DRA7_L4PER2_MCASP7_CLKCTRL 24>, - <&l4per2_clkctrl DRA7_L4PER2_MCASP7_CLKCTRL 28>; - clock-names = "fck", "ahclkx", "ahclkr"; + <&l4per2_clkctrl DRA7_L4PER2_MCASP7_CLKCTRL 24>; + clock-names = "fck", "ahclkx"; #address-cells = <1>; #size-cells = <1>; ranges = <0x0 0x78000 0x2000>, @@ -2944,7 +2939,7 @@ interrupt-names = "tx", "rx"; dmas = <&edma_xbar 141 1>, <&edma_xbar 140 1>; dma-names = "tx", "rx"; - clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP7_CLKCTRL 22>, + clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP7_CLKCTRL 0>, <&l4per2_clkctrl DRA7_L4PER2_MCASP7_CLKCTRL 24>; clock-names = "fck", "ahclkx"; status = "disabled"; @@ -2961,9 +2956,8 @@ <SYSC_IDLE_SMART>; /* Domains (P, C): l4per_pwrdm, l4per2_clkdm */ clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP8_CLKCTRL 0>, - <&l4per2_clkctrl DRA7_L4PER2_MCASP8_CLKCTRL 24>, - <&l4per2_clkctrl DRA7_L4PER2_MCASP8_CLKCTRL 28>; - clock-names = "fck", "ahclkx", "ahclkr"; + <&l4per2_clkctrl DRA7_L4PER2_MCASP8_CLKCTRL 24>; + clock-names = "fck", "ahclkx"; #address-cells = <1>; #size-cells = <1>; ranges = <0x0 0x7c000 0x2000>, @@ -2979,7 +2973,7 @@ interrupt-names = "tx", "rx"; dmas = <&edma_xbar 143 1>, <&edma_xbar 142 1>; dma-names = "tx", "rx"; - clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP8_CLKCTRL 22>, + clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP8_CLKCTRL 0>, <&l4per2_clkctrl DRA7_L4PER2_MCASP8_CLKCTRL 24>; clock-names = "fck", "ahclkx"; status = "disabled"; @@ -3085,6 +3079,58 @@ phys = <&phy_gmii_sel 2>; }; }; + + mac_sw: switch@0 { + compatible = "ti,dra7-cpsw-switch","ti,cpsw-switch"; + reg = <0x0 0x4000>; + ranges = <0 0 0x4000>; + clocks = <&gmac_main_clk>; + clock-names = "fck"; + #address-cells = <1>; + #size-cells = <1>; + syscon = <&scm_conf>; + status = "disabled"; + + interrupts = <GIC_SPI 334 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 335 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 336 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 337 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "rx_thresh", "rx", "tx", "misc"; + + ethernet-ports { + #address-cells = <1>; + #size-cells = <0>; + + cpsw_port1: port@1 { + reg = <1>; + label = "port1"; + mac-address = [ 00 00 00 00 00 00 ]; + phys = <&phy_gmii_sel 1>; + }; + + cpsw_port2: port@2 { + reg = <2>; + label = "port2"; + mac-address = [ 00 00 00 00 00 00 ]; + phys = <&phy_gmii_sel 2>; + }; + }; + + davinci_mdio_sw: mdio@1000 { + compatible = "ti,cpsw-mdio","ti,davinci_mdio"; + clocks = <&gmac_main_clk>; + clock-names = "fck"; + #address-cells = <1>; + #size-cells = <0>; + bus_freq = <1000000>; + reg = <0x1000 0x100>; + }; + + cpts { + clocks = <&gmac_clkctrl DRA7_GMAC_GMAC_CLKCTRL 25>; + clock-names = "cpts"; + }; + }; }; }; }; diff --git a/arch/arm/boot/dts/imx6-logicpd-baseboard.dtsi b/arch/arm/boot/dts/imx6-logicpd-baseboard.dtsi index 2a6ce87071f9..9e027b9a5f91 100644 --- a/arch/arm/boot/dts/imx6-logicpd-baseboard.dtsi +++ b/arch/arm/boot/dts/imx6-logicpd-baseboard.dtsi @@ -328,6 +328,10 @@ pinctrl-0 = <&pinctrl_pwm3>; }; +&snvs_pwrkey { + status = "okay"; +}; + &ssi2 { status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6-logicpd-som.dtsi b/arch/arm/boot/dts/imx6-logicpd-som.dtsi index 7ceae3573248..547fb141ec0c 100644 --- a/arch/arm/boot/dts/imx6-logicpd-som.dtsi +++ b/arch/arm/boot/dts/imx6-logicpd-som.dtsi @@ -207,6 +207,10 @@ vin-supply = <&sw1c_reg>; }; +&snvs_poweroff { + status = "okay"; +}; + &iomuxc { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_hog>; diff --git a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi index f3404dd10537..cf628465cd0a 100644 --- a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi +++ b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi @@ -230,6 +230,8 @@ accelerometer@1c { compatible = "fsl,mma8451"; reg = <0x1c>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_mma8451_int>; interrupt-parent = <&gpio6>; interrupts = <31 IRQ_TYPE_LEVEL_LOW>; }; @@ -628,6 +630,12 @@ >; }; + pinctrl_mma8451_int: mma8451intgrp { + fsl,pins = < + MX6QDL_PAD_EIM_BCLK__GPIO6_IO31 0xb0b1 + >; + }; + pinctrl_pwm3: pwm1grp { fsl,pins = < MX6QDL_PAD_SD4_DAT1__PWM3_OUT 0x1b0b1 diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi index 710f850e785c..e2e604d6ba0b 100644 --- a/arch/arm/boot/dts/imx7s.dtsi +++ b/arch/arm/boot/dts/imx7s.dtsi @@ -448,7 +448,7 @@ compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; reg = <0x302d0000 0x10000>; interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&clks IMX7D_CLK_DUMMY>, + clocks = <&clks IMX7D_GPT1_ROOT_CLK>, <&clks IMX7D_GPT1_ROOT_CLK>; clock-names = "ipg", "per"; }; @@ -457,7 +457,7 @@ compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; reg = <0x302e0000 0x10000>; interrupts = <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&clks IMX7D_CLK_DUMMY>, + clocks = <&clks IMX7D_GPT2_ROOT_CLK>, <&clks IMX7D_GPT2_ROOT_CLK>; clock-names = "ipg", "per"; status = "disabled"; @@ -467,7 +467,7 @@ compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; reg = <0x302f0000 0x10000>; interrupts = <GIC_SPI 53 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&clks IMX7D_CLK_DUMMY>, + clocks = <&clks IMX7D_GPT3_ROOT_CLK>, <&clks IMX7D_GPT3_ROOT_CLK>; clock-names = "ipg", "per"; status = "disabled"; @@ -477,7 +477,7 @@ compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; reg = <0x30300000 0x10000>; interrupts = <GIC_SPI 52 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&clks IMX7D_CLK_DUMMY>, + clocks = <&clks IMX7D_GPT4_ROOT_CLK>, <&clks IMX7D_GPT4_ROOT_CLK>; clock-names = "ipg", "per"; status = "disabled"; diff --git a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi index 3fdd0a72f87f..506b118e511a 100644 --- a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi +++ b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi @@ -192,3 +192,7 @@ &twl_gpio { ti,use-leds; }; + +&twl_keypad { + status = "disabled"; +}; diff --git a/arch/arm/boot/dts/mt7629-rfb.dts b/arch/arm/boot/dts/mt7629-rfb.dts index 3621b7d2b22a..9980c10c6e29 100644 --- a/arch/arm/boot/dts/mt7629-rfb.dts +++ b/arch/arm/boot/dts/mt7629-rfb.dts @@ -66,9 +66,21 @@ pinctrl-1 = <&ephy_leds_pins>; status = "okay"; + gmac0: mac@0 { + compatible = "mediatek,eth-mac"; + reg = <0>; + phy-mode = "2500base-x"; + fixed-link { + speed = <2500>; + full-duplex; + pause; + }; + }; + gmac1: mac@1 { compatible = "mediatek,eth-mac"; reg = <1>; + phy-mode = "gmii"; phy-handle = <&phy0>; }; @@ -78,7 +90,6 @@ phy0: ethernet-phy@0 { reg = <0>; - phy-mode = "gmii"; }; }; }; diff --git a/arch/arm/boot/dts/mt7629.dtsi b/arch/arm/boot/dts/mt7629.dtsi index 9608bc2ccb3f..867b88103b9d 100644 --- a/arch/arm/boot/dts/mt7629.dtsi +++ b/arch/arm/boot/dts/mt7629.dtsi @@ -468,14 +468,12 @@ compatible = "mediatek,mt7629-sgmiisys", "syscon"; reg = <0x1b128000 0x3000>; #clock-cells = <1>; - mediatek,physpeed = "2500"; }; sgmiisys1: syscon@1b130000 { compatible = "mediatek,mt7629-sgmiisys", "syscon"; reg = <0x1b130000 0x3000>; #clock-cells = <1>; - mediatek,physpeed = "2500"; }; }; }; diff --git a/arch/arm/boot/dts/omap3-gta04.dtsi b/arch/arm/boot/dts/omap3-gta04.dtsi index d01fc8744fd7..b6ef1a7ac8a4 100644 --- a/arch/arm/boot/dts/omap3-gta04.dtsi +++ b/arch/arm/boot/dts/omap3-gta04.dtsi @@ -124,6 +124,7 @@ spi-max-frequency = <100000>; spi-cpol; spi-cpha; + spi-cs-high; backlight= <&backlight>; label = "lcd"; diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts index 84a5ade1e865..63659880eeb3 100644 --- a/arch/arm/boot/dts/omap3-n900.dts +++ b/arch/arm/boot/dts/omap3-n900.dts @@ -155,6 +155,12 @@ pwms = <&pwm9 0 26316 0>; /* 38000 Hz */ }; + rom_rng: rng { + compatible = "nokia,n900-rom-rng"; + clocks = <&rng_ick>; + clock-names = "ick"; + }; + /* controlled (enabled/disabled) directly by bcm2048 and wl1251 */ vctcxo: vctcxo { compatible = "fixed-clock"; diff --git a/arch/arm/boot/dts/omap4-droid4-xt894.dts b/arch/arm/boot/dts/omap4-droid4-xt894.dts index 4454449de00c..a40fe8d49da6 100644 --- a/arch/arm/boot/dts/omap4-droid4-xt894.dts +++ b/arch/arm/boot/dts/omap4-droid4-xt894.dts @@ -369,7 +369,7 @@ compatible = "ti,wl1285", "ti,wl1283"; reg = <2>; /* gpio_100 with gpmc_wait2 pad as wakeirq */ - interrupts-extended = <&gpio4 4 IRQ_TYPE_EDGE_RISING>, + interrupts-extended = <&gpio4 4 IRQ_TYPE_LEVEL_HIGH>, <&omap4_pmx_core 0x4e>; interrupt-names = "irq", "wakeup"; ref-clock-frequency = <26000000>; diff --git a/arch/arm/boot/dts/omap4-panda-common.dtsi b/arch/arm/boot/dts/omap4-panda-common.dtsi index 14be2ecb62b1..55ea8b6189af 100644 --- a/arch/arm/boot/dts/omap4-panda-common.dtsi +++ b/arch/arm/boot/dts/omap4-panda-common.dtsi @@ -474,7 +474,7 @@ compatible = "ti,wl1271"; reg = <2>; /* gpio_53 with gpmc_ncs3 pad as wakeup */ - interrupts-extended = <&gpio2 21 IRQ_TYPE_EDGE_RISING>, + interrupts-extended = <&gpio2 21 IRQ_TYPE_LEVEL_HIGH>, <&omap4_pmx_core 0x3a>; interrupt-names = "irq", "wakeup"; ref-clock-frequency = <38400000>; diff --git a/arch/arm/boot/dts/omap4-sdp.dts b/arch/arm/boot/dts/omap4-sdp.dts index 3c274965ff40..91480ac1f328 100644 --- a/arch/arm/boot/dts/omap4-sdp.dts +++ b/arch/arm/boot/dts/omap4-sdp.dts @@ -512,7 +512,7 @@ compatible = "ti,wl1281"; reg = <2>; interrupt-parent = <&gpio1>; - interrupts = <21 IRQ_TYPE_EDGE_RISING>; /* gpio 53 */ + interrupts = <21 IRQ_TYPE_LEVEL_HIGH>; /* gpio 53 */ ref-clock-frequency = <26000000>; tcxo-clock-frequency = <26000000>; }; diff --git a/arch/arm/boot/dts/omap4-var-som-om44-wlan.dtsi b/arch/arm/boot/dts/omap4-var-som-om44-wlan.dtsi index 6dbbc9b3229c..d0032213101e 100644 --- a/arch/arm/boot/dts/omap4-var-som-om44-wlan.dtsi +++ b/arch/arm/boot/dts/omap4-var-som-om44-wlan.dtsi @@ -69,7 +69,7 @@ compatible = "ti,wl1271"; reg = <2>; interrupt-parent = <&gpio2>; - interrupts = <9 IRQ_TYPE_EDGE_RISING>; /* gpio 41 */ + interrupts = <9 IRQ_TYPE_LEVEL_HIGH>; /* gpio 41 */ ref-clock-frequency = <38400000>; }; }; diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi index 7fff555ee394..68ac04641bdb 100644 --- a/arch/arm/boot/dts/omap5-board-common.dtsi +++ b/arch/arm/boot/dts/omap5-board-common.dtsi @@ -362,7 +362,7 @@ pinctrl-names = "default"; pinctrl-0 = <&wlcore_irq_pin>; interrupt-parent = <&gpio1>; - interrupts = <14 IRQ_TYPE_EDGE_RISING>; /* gpio 14 */ + interrupts = <14 IRQ_TYPE_LEVEL_HIGH>; /* gpio 14 */ ref-clock-frequency = <26000000>; }; }; diff --git a/arch/arm/boot/dts/omap54xx-clocks.dtsi b/arch/arm/boot/dts/omap54xx-clocks.dtsi index fac2e57dcca9..4791834dacb2 100644 --- a/arch/arm/boot/dts/omap54xx-clocks.dtsi +++ b/arch/arm/boot/dts/omap54xx-clocks.dtsi @@ -1146,7 +1146,7 @@ }; }; - gpu_cm: clock-controller@1500 { + gpu_cm: gpu_cm@1500 { compatible = "ti,omap4-cm"; reg = <0x1500 0x100>; #address-cells = <1>; diff --git a/arch/arm/boot/dts/ste-dbx5x0.dtsi b/arch/arm/boot/dts/ste-dbx5x0.dtsi index a53657b83288..bda454d12150 100644 --- a/arch/arm/boot/dts/ste-dbx5x0.dtsi +++ b/arch/arm/boot/dts/ste-dbx5x0.dtsi @@ -8,6 +8,7 @@ #include <dt-bindings/mfd/dbx500-prcmu.h> #include <dt-bindings/arm/ux500_pm_domains.h> #include <dt-bindings/gpio/gpio.h> +#include <dt-bindings/thermal/thermal.h> / { #address-cells = <1>; @@ -59,8 +60,12 @@ * cooling. */ cpu_thermal: cpu-thermal { - polling-delay-passive = <0>; - polling-delay = <1000>; + polling-delay-passive = <250>; + /* + * This sensor fires interrupts to update the thermal + * zone, so no polling is needed. + */ + polling-delay = <0>; thermal-sensors = <&thermal>; @@ -79,7 +84,7 @@ cooling-maps { trip = <&cpu_alert>; - cooling-device = <&CPU0 0 2>; + cooling-device = <&CPU0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>; contribution = <100>; }; }; diff --git a/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi b/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi index e4a0d51ec3a8..0a3a7d66737b 100644 --- a/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi +++ b/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi @@ -609,13 +609,13 @@ <STM32_PINMUX('F', 6, AF9)>; /* QSPI_BK1_IO3 */ bias-disable; drive-push-pull; - slew-rate = <3>; + slew-rate = <1>; }; pins2 { pinmux = <STM32_PINMUX('B', 6, AF10)>; /* QSPI_BK1_NCS */ bias-pull-up; drive-push-pull; - slew-rate = <3>; + slew-rate = <1>; }; }; @@ -637,13 +637,13 @@ <STM32_PINMUX('G', 7, AF11)>; /* QSPI_BK2_IO3 */ bias-disable; drive-push-pull; - slew-rate = <3>; + slew-rate = <1>; }; pins2 { pinmux = <STM32_PINMUX('C', 0, AF10)>; /* QSPI_BK2_NCS */ bias-pull-up; drive-push-pull; - slew-rate = <3>; + slew-rate = <1>; }; }; diff --git a/arch/arm/boot/dts/stm32mp157c-ev1.dts b/arch/arm/boot/dts/stm32mp157c-ev1.dts index 89d29b50c3f4..91fc0a315c49 100644 --- a/arch/arm/boot/dts/stm32mp157c-ev1.dts +++ b/arch/arm/boot/dts/stm32mp157c-ev1.dts @@ -183,14 +183,12 @@ ov5640: camera@3c { compatible = "ovti,ov5640"; - pinctrl-names = "default"; - pinctrl-0 = <&ov5640_pins>; reg = <0x3c>; clocks = <&clk_ext_camera>; clock-names = "xclk"; DOVDD-supply = <&v2v8>; - powerdown-gpios = <&stmfx_pinctrl 18 GPIO_ACTIVE_HIGH>; - reset-gpios = <&stmfx_pinctrl 19 GPIO_ACTIVE_LOW>; + powerdown-gpios = <&stmfx_pinctrl 18 (GPIO_ACTIVE_HIGH | GPIO_PUSH_PULL)>; + reset-gpios = <&stmfx_pinctrl 19 (GPIO_ACTIVE_LOW | GPIO_PUSH_PULL)>; rotation = <180>; status = "okay"; @@ -223,15 +221,8 @@ joystick_pins: joystick { pins = "gpio0", "gpio1", "gpio2", "gpio3", "gpio4"; - drive-push-pull; bias-pull-down; }; - - ov5640_pins: camera { - pins = "agpio2", "agpio3"; /* stmfx pins 18 & 19 */ - drive-push-pull; - output-low; - }; }; }; }; diff --git a/arch/arm/boot/dts/stm32mp157c.dtsi b/arch/arm/boot/dts/stm32mp157c.dtsi index 9b11654a0a39..f98e0370c0bc 100644 --- a/arch/arm/boot/dts/stm32mp157c.dtsi +++ b/arch/arm/boot/dts/stm32mp157c.dtsi @@ -932,7 +932,7 @@ interrupt-names = "int0", "int1"; clocks = <&rcc CK_HSE>, <&rcc FDCAN_K>; clock-names = "hclk", "cclk"; - bosch,mram-cfg = <0x1400 0 0 32 0 0 2 2>; + bosch,mram-cfg = <0x0 0 0 32 0 0 2 2>; status = "disabled"; }; @@ -945,7 +945,7 @@ interrupt-names = "int0", "int1"; clocks = <&rcc CK_HSE>, <&rcc FDCAN_K>; clock-names = "hclk", "cclk"; - bosch,mram-cfg = <0x0 0 0 32 0 0 2 2>; + bosch,mram-cfg = <0x1400 0 0 32 0 0 2 2>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi index ce823c44e98a..4c268b70b735 100644 --- a/arch/arm/boot/dts/sun4i-a10.dtsi +++ b/arch/arm/boot/dts/sun4i-a10.dtsi @@ -520,6 +520,7 @@ interrupts = <39>; clocks = <&ccu CLK_AHB_EHCI0>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -529,6 +530,7 @@ interrupts = <64>; clocks = <&ccu CLK_USB_OHCI0>, <&ccu CLK_AHB_OHCI0>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -608,6 +610,7 @@ interrupts = <40>; clocks = <&ccu CLK_AHB_EHCI1>; phys = <&usbphy 2>; + phy-names = "usb"; status = "disabled"; }; @@ -617,6 +620,7 @@ interrupts = <65>; clocks = <&ccu CLK_USB_OHCI1>, <&ccu CLK_AHB_OHCI1>; phys = <&usbphy 2>; + phy-names = "usb"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sun5i.dtsi b/arch/arm/boot/dts/sun5i.dtsi index cfb1efc8828c..6befa236ba99 100644 --- a/arch/arm/boot/dts/sun5i.dtsi +++ b/arch/arm/boot/dts/sun5i.dtsi @@ -391,6 +391,7 @@ interrupts = <39>; clocks = <&ccu CLK_AHB_EHCI>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -400,6 +401,7 @@ interrupts = <40>; clocks = <&ccu CLK_USB_OHCI>, <&ccu CLK_AHB_OHCI>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sun6i-a31.dtsi b/arch/arm/boot/dts/sun6i-a31.dtsi index bbeb743633c6..ac7638078420 100644 --- a/arch/arm/boot/dts/sun6i-a31.dtsi +++ b/arch/arm/boot/dts/sun6i-a31.dtsi @@ -545,6 +545,7 @@ clocks = <&ccu CLK_AHB1_EHCI0>; resets = <&ccu RST_AHB1_EHCI0>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -555,6 +556,7 @@ clocks = <&ccu CLK_AHB1_OHCI0>, <&ccu CLK_USB_OHCI0>; resets = <&ccu RST_AHB1_OHCI0>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -565,6 +567,7 @@ clocks = <&ccu CLK_AHB1_EHCI1>; resets = <&ccu RST_AHB1_EHCI1>; phys = <&usbphy 2>; + phy-names = "usb"; status = "disabled"; }; @@ -575,6 +578,7 @@ clocks = <&ccu CLK_AHB1_OHCI1>, <&ccu CLK_USB_OHCI1>; resets = <&ccu RST_AHB1_OHCI1>; phys = <&usbphy 2>; + phy-names = "usb"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index 49380de754a9..8aebefd6accf 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -380,9 +380,8 @@ compatible = "allwinner,sun7i-a20-csi0"; reg = <0x01c09000 0x1000>; interrupts = <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&ccu CLK_AHB_CSI0>, <&ccu CLK_CSI0>, - <&ccu CLK_CSI_SCLK>, <&ccu CLK_DRAM_CSI0>; - clock-names = "bus", "mod", "isp", "ram"; + clocks = <&ccu CLK_AHB_CSI0>, <&ccu CLK_CSI_SCLK>, <&ccu CLK_DRAM_CSI0>; + clock-names = "bus", "isp", "ram"; resets = <&ccu RST_CSI0>; status = "disabled"; }; @@ -623,6 +622,7 @@ interrupts = <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>; clocks = <&ccu CLK_AHB_EHCI0>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -632,6 +632,7 @@ interrupts = <GIC_SPI 64 IRQ_TYPE_LEVEL_HIGH>; clocks = <&ccu CLK_USB_OHCI0>, <&ccu CLK_AHB_OHCI0>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -714,6 +715,7 @@ interrupts = <GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>; clocks = <&ccu CLK_AHB_EHCI1>; phys = <&usbphy 2>; + phy-names = "usb"; status = "disabled"; }; @@ -723,6 +725,7 @@ interrupts = <GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>; clocks = <&ccu CLK_USB_OHCI1>, <&ccu CLK_AHB_OHCI1>; phys = <&usbphy 2>; + phy-names = "usb"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sun8i-a23-a33.dtsi b/arch/arm/boot/dts/sun8i-a23-a33.dtsi index 52eed0ae3607..f292f96ab39b 100644 --- a/arch/arm/boot/dts/sun8i-a23-a33.dtsi +++ b/arch/arm/boot/dts/sun8i-a23-a33.dtsi @@ -307,6 +307,7 @@ clocks = <&ccu CLK_BUS_EHCI>; resets = <&ccu RST_BUS_EHCI>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -317,6 +318,7 @@ clocks = <&ccu CLK_BUS_OHCI>, <&ccu CLK_USB_OHCI>; resets = <&ccu RST_BUS_OHCI>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts b/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts index 568b90ece342..3bec3e0a81b2 100644 --- a/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts +++ b/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts @@ -192,6 +192,7 @@ vqmmc-supply = <®_dldo1>; non-removable; wakeup-source; + keep-power-in-suspend; status = "okay"; brcmf: wifi@1 { diff --git a/arch/arm/boot/dts/sun8i-a83t.dtsi b/arch/arm/boot/dts/sun8i-a83t.dtsi index 523be6611c50..74bb053cf23c 100644 --- a/arch/arm/boot/dts/sun8i-a83t.dtsi +++ b/arch/arm/boot/dts/sun8i-a83t.dtsi @@ -632,6 +632,7 @@ clocks = <&ccu CLK_BUS_EHCI0>; resets = <&ccu RST_BUS_EHCI0>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -643,6 +644,7 @@ clocks = <&ccu CLK_BUS_OHCI0>, <&ccu CLK_USB_OHCI0>; resets = <&ccu RST_BUS_OHCI0>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -654,6 +656,7 @@ clocks = <&ccu CLK_BUS_EHCI1>; resets = <&ccu RST_BUS_EHCI1>; phys = <&usbphy 2>; + phy-names = "usb"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sun8i-r40.dtsi b/arch/arm/boot/dts/sun8i-r40.dtsi index bde068111b85..c9c2688db66d 100644 --- a/arch/arm/boot/dts/sun8i-r40.dtsi +++ b/arch/arm/boot/dts/sun8i-r40.dtsi @@ -273,6 +273,7 @@ clocks = <&ccu CLK_BUS_EHCI1>; resets = <&ccu RST_BUS_EHCI1>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -284,6 +285,7 @@ <&ccu CLK_USB_OHCI1>; resets = <&ccu RST_BUS_OHCI1>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -294,6 +296,7 @@ clocks = <&ccu CLK_BUS_EHCI2>; resets = <&ccu RST_BUS_EHCI2>; phys = <&usbphy 2>; + phy-names = "usb"; status = "disabled"; }; @@ -305,6 +308,7 @@ <&ccu CLK_USB_OHCI2>; resets = <&ccu RST_BUS_OHCI2>; phys = <&usbphy 2>; + phy-names = "usb"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sun9i-a80.dtsi b/arch/arm/boot/dts/sun9i-a80.dtsi index c34d505c7efe..b9b6fb00be28 100644 --- a/arch/arm/boot/dts/sun9i-a80.dtsi +++ b/arch/arm/boot/dts/sun9i-a80.dtsi @@ -346,6 +346,7 @@ clocks = <&usb_clocks CLK_BUS_HCI0>; resets = <&usb_clocks RST_USB0_HCI>; phys = <&usbphy1>; + phy-names = "usb"; status = "disabled"; }; @@ -357,6 +358,7 @@ <&usb_clocks CLK_USB_OHCI0>; resets = <&usb_clocks RST_USB0_HCI>; phys = <&usbphy1>; + phy-names = "usb"; status = "disabled"; }; @@ -378,6 +380,7 @@ clocks = <&usb_clocks CLK_BUS_HCI1>; resets = <&usb_clocks RST_USB1_HCI>; phys = <&usbphy2>; + phy-names = "usb"; status = "disabled"; }; @@ -407,6 +410,7 @@ clocks = <&usb_clocks CLK_BUS_HCI2>; resets = <&usb_clocks RST_USB2_HCI>; phys = <&usbphy3>; + phy-names = "usb"; status = "disabled"; }; @@ -418,6 +422,7 @@ <&usb_clocks CLK_USB_OHCI2>; resets = <&usb_clocks RST_USB2_HCI>; phys = <&usbphy3>; + phy-names = "usb"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sunxi-h3-h5.dtsi b/arch/arm/boot/dts/sunxi-h3-h5.dtsi index eba190b3f9de..107eeafad20a 100644 --- a/arch/arm/boot/dts/sunxi-h3-h5.dtsi +++ b/arch/arm/boot/dts/sunxi-h3-h5.dtsi @@ -304,6 +304,7 @@ clocks = <&ccu CLK_BUS_EHCI1>, <&ccu CLK_BUS_OHCI1>; resets = <&ccu RST_BUS_EHCI1>, <&ccu RST_BUS_OHCI1>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -315,6 +316,7 @@ <&ccu CLK_USB_OHCI1>; resets = <&ccu RST_BUS_EHCI1>, <&ccu RST_BUS_OHCI1>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -325,6 +327,7 @@ clocks = <&ccu CLK_BUS_EHCI2>, <&ccu CLK_BUS_OHCI2>; resets = <&ccu RST_BUS_EHCI2>, <&ccu RST_BUS_OHCI2>; phys = <&usbphy 2>; + phy-names = "usb"; status = "disabled"; }; @@ -336,6 +339,7 @@ <&ccu CLK_USB_OHCI2>; resets = <&ccu RST_BUS_EHCI2>, <&ccu RST_BUS_OHCI2>; phys = <&usbphy 2>; + phy-names = "usb"; status = "disabled"; }; @@ -346,6 +350,7 @@ clocks = <&ccu CLK_BUS_EHCI3>, <&ccu CLK_BUS_OHCI3>; resets = <&ccu RST_BUS_EHCI3>, <&ccu RST_BUS_OHCI3>; phys = <&usbphy 3>; + phy-names = "usb"; status = "disabled"; }; @@ -357,6 +362,7 @@ <&ccu CLK_USB_OHCI3>; resets = <&ccu RST_BUS_EHCI3>, <&ccu RST_BUS_OHCI3>; phys = <&usbphy 3>; + phy-names = "usb"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/vf610-zii-scu4-aib.dts b/arch/arm/boot/dts/vf610-zii-scu4-aib.dts index dc8a5f37a1ef..c8ebb23c4e02 100644 --- a/arch/arm/boot/dts/vf610-zii-scu4-aib.dts +++ b/arch/arm/boot/dts/vf610-zii-scu4-aib.dts @@ -602,6 +602,7 @@ #address-cells = <1>; #size-cells = <0>; reg = <0x70>; + i2c-mux-idle-disconnect; sff0_i2c: i2c@1 { #address-cells = <1>; @@ -640,6 +641,7 @@ reg = <0x71>; #address-cells = <1>; #size-cells = <0>; + i2c-mux-idle-disconnect; sff5_i2c: i2c@1 { #address-cells = <1>; diff --git a/arch/arm/configs/badge4_defconfig b/arch/arm/configs/badge4_defconfig index 5ae5b5228467..ef484c4cfd1a 100644 --- a/arch/arm/configs/badge4_defconfig +++ b/arch/arm/configs/badge4_defconfig @@ -91,7 +91,6 @@ CONFIG_USB_SERIAL_PL2303=m CONFIG_USB_SERIAL_CYBERJACK=m CONFIG_USB_SERIAL_XIRCOM=m CONFIG_USB_SERIAL_OMNINET=m -CONFIG_USB_RIO500=m CONFIG_EXT2_FS=m CONFIG_EXT3_FS=m CONFIG_MSDOS_FS=y diff --git a/arch/arm/configs/corgi_defconfig b/arch/arm/configs/corgi_defconfig index e4f6442588e7..4fec2ec379ad 100644 --- a/arch/arm/configs/corgi_defconfig +++ b/arch/arm/configs/corgi_defconfig @@ -195,7 +195,6 @@ CONFIG_USB_SERIAL_XIRCOM=m CONFIG_USB_SERIAL_OMNINET=m CONFIG_USB_EMI62=m CONFIG_USB_EMI26=m -CONFIG_USB_RIO500=m CONFIG_USB_LEGOTOWER=m CONFIG_USB_LCD=m CONFIG_USB_CYTHERM=m diff --git a/arch/arm/configs/davinci_all_defconfig b/arch/arm/configs/davinci_all_defconfig index b34970ce6b31..231f8973bbb2 100644 --- a/arch/arm/configs/davinci_all_defconfig +++ b/arch/arm/configs/davinci_all_defconfig @@ -167,6 +167,7 @@ CONFIG_FB=y CONFIG_FIRMWARE_EDID=y CONFIG_FB_DA8XX=y CONFIG_BACKLIGHT_PWM=m +CONFIG_BACKLIGHT_GPIO=m CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_SOUND=m @@ -228,7 +229,7 @@ CONFIG_RTC_DRV_OMAP=m CONFIG_DMADEVICES=y CONFIG_TI_EDMA=y CONFIG_COMMON_CLK_PWM=m -CONFIG_REMOTEPROC=m +CONFIG_REMOTEPROC=y CONFIG_DA8XX_REMOTEPROC=m CONFIG_MEMORY=y CONFIG_TI_AEMIF=m diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig index 9bfffbe22d53..0f7381ee0c37 100644 --- a/arch/arm/configs/imx_v6_v7_defconfig +++ b/arch/arm/configs/imx_v6_v7_defconfig @@ -276,6 +276,7 @@ CONFIG_VIDEO_OV5640=m CONFIG_VIDEO_OV5645=m CONFIG_IMX_IPUV3_CORE=y CONFIG_DRM=y +CONFIG_DRM_MSM=y CONFIG_DRM_PANEL_LVDS=y CONFIG_DRM_PANEL_SIMPLE=y CONFIG_DRM_PANEL_SEIKO_43WVF1G=y diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 13ba53286901..e4c8def9a0a5 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -415,7 +415,7 @@ CONFIG_SPI_SH_MSIOF=m CONFIG_SPI_SH_HSPI=y CONFIG_SPI_SIRF=y CONFIG_SPI_STM32=m -CONFIG_SPI_STM32_QSPI=m +CONFIG_SPI_STM32_QSPI=y CONFIG_SPI_SUN4I=y CONFIG_SPI_SUN6I=y CONFIG_SPI_TEGRA114=y @@ -933,7 +933,7 @@ CONFIG_BCM2835_MBOX=y CONFIG_ROCKCHIP_IOMMU=y CONFIG_TEGRA_IOMMU_GART=y CONFIG_TEGRA_IOMMU_SMMU=y -CONFIG_REMOTEPROC=m +CONFIG_REMOTEPROC=y CONFIG_ST_REMOTEPROC=m CONFIG_RPMSG_VIRTIO=m CONFIG_ASPEED_LPC_CTRL=m diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 64eb896907bf..89cce8d4bc6b 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -356,14 +356,15 @@ CONFIG_DRM_OMAP_CONNECTOR_HDMI=m CONFIG_DRM_OMAP_CONNECTOR_ANALOG_TV=m CONFIG_DRM_OMAP_PANEL_DPI=m CONFIG_DRM_OMAP_PANEL_DSI_CM=m -CONFIG_DRM_OMAP_PANEL_SONY_ACX565AKM=m -CONFIG_DRM_OMAP_PANEL_LGPHILIPS_LB035Q02=m -CONFIG_DRM_OMAP_PANEL_SHARP_LS037V7DW01=m -CONFIG_DRM_OMAP_PANEL_TPO_TD028TTEC1=m -CONFIG_DRM_OMAP_PANEL_TPO_TD043MTEA1=m -CONFIG_DRM_OMAP_PANEL_NEC_NL8048HL11=m CONFIG_DRM_TILCDC=m CONFIG_DRM_PANEL_SIMPLE=m +CONFIG_DRM_TI_TFP410=m +CONFIG_DRM_PANEL_LG_LB035Q02=m +CONFIG_DRM_PANEL_NEC_NL8048HL11=m +CONFIG_DRM_PANEL_SHARP_LS037V7DW01=m +CONFIG_DRM_PANEL_SONY_ACX565AKM=m +CONFIG_DRM_PANEL_TPO_TD028TTEC1=m +CONFIG_DRM_PANEL_TPO_TD043MTEA1=m CONFIG_FB=y CONFIG_FIRMWARE_EDID=y CONFIG_FB_MODE_HELPERS=y @@ -423,6 +424,7 @@ CONFIG_USB_SERIAL_GENERIC=y CONFIG_USB_SERIAL_SIMPLE=m CONFIG_USB_SERIAL_FTDI_SIO=m CONFIG_USB_SERIAL_PL2303=m +CONFIG_USB_SERIAL_OPTION=m CONFIG_USB_TEST=m CONFIG_NOP_USB_XCEIV=m CONFIG_AM335X_PHY_USB=m @@ -460,6 +462,7 @@ CONFIG_MMC_SDHCI_OMAP=y CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=m CONFIG_LEDS_CPCAP=m +CONFIG_LEDS_LM3532=m CONFIG_LEDS_GPIO=m CONFIG_LEDS_PCA963X=m CONFIG_LEDS_PWM=m @@ -481,7 +484,7 @@ CONFIG_RTC_DRV_OMAP=m CONFIG_RTC_DRV_CPCAP=m CONFIG_DMADEVICES=y CONFIG_OMAP_IOMMU=y -CONFIG_REMOTEPROC=m +CONFIG_REMOTEPROC=y CONFIG_OMAP_REMOTEPROC=m CONFIG_WKUP_M3_RPROC=m CONFIG_SOC_TI=y @@ -551,3 +554,4 @@ CONFIG_DEBUG_INFO_DWARF4=y CONFIG_MAGIC_SYSRQ=y CONFIG_SCHEDSTATS=y # CONFIG_DEBUG_BUGVERBOSE is not set +CONFIG_TI_CPSW_SWITCHDEV=y diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig index 787c3f9be414..b817c57f05f1 100644 --- a/arch/arm/configs/pxa_defconfig +++ b/arch/arm/configs/pxa_defconfig @@ -581,7 +581,6 @@ CONFIG_USB_SERIAL_XIRCOM=m CONFIG_USB_SERIAL_OMNINET=m CONFIG_USB_EMI62=m CONFIG_USB_EMI26=m -CONFIG_USB_RIO500=m CONFIG_USB_LEGOTOWER=m CONFIG_USB_LCD=m CONFIG_USB_CYTHERM=m diff --git a/arch/arm/configs/s3c2410_defconfig b/arch/arm/configs/s3c2410_defconfig index 95b5a4ffddea..73ed73a8785a 100644 --- a/arch/arm/configs/s3c2410_defconfig +++ b/arch/arm/configs/s3c2410_defconfig @@ -327,7 +327,6 @@ CONFIG_USB_EMI62=m CONFIG_USB_EMI26=m CONFIG_USB_ADUTUX=m CONFIG_USB_SEVSEG=m -CONFIG_USB_RIO500=m CONFIG_USB_LEGOTOWER=m CONFIG_USB_LCD=m CONFIG_USB_CYPRESS_CY7C63=m diff --git a/arch/arm/configs/spitz_defconfig b/arch/arm/configs/spitz_defconfig index 4fb51d665abb..a1cdbfa064c5 100644 --- a/arch/arm/configs/spitz_defconfig +++ b/arch/arm/configs/spitz_defconfig @@ -189,7 +189,6 @@ CONFIG_USB_SERIAL_XIRCOM=m CONFIG_USB_SERIAL_OMNINET=m CONFIG_USB_EMI62=m CONFIG_USB_EMI26=m -CONFIG_USB_RIO500=m CONFIG_USB_LEGOTOWER=m CONFIG_USB_LCD=m CONFIG_USB_CYTHERM=m diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index b24df84a1d7a..2674de6ada1f 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -30,7 +30,7 @@ config CRYPTO_SHA1_ARM_NEON config CRYPTO_SHA1_ARM_CE tristate "SHA1 digest algorithm (ARM v8 Crypto Extensions)" - depends on KERNEL_MODE_NEON + depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) select CRYPTO_SHA1_ARM select CRYPTO_HASH help @@ -39,7 +39,7 @@ config CRYPTO_SHA1_ARM_CE config CRYPTO_SHA2_ARM_CE tristate "SHA-224/256 digest algorithm (ARM v8 Crypto Extensions)" - depends on KERNEL_MODE_NEON + depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) select CRYPTO_SHA256_ARM select CRYPTO_HASH help @@ -81,7 +81,7 @@ config CRYPTO_AES_ARM config CRYPTO_AES_ARM_BS tristate "Bit sliced AES using NEON instructions" depends on KERNEL_MODE_NEON - select CRYPTO_BLKCIPHER + select CRYPTO_SKCIPHER select CRYPTO_LIB_AES select CRYPTO_SIMD help @@ -96,8 +96,9 @@ config CRYPTO_AES_ARM_BS config CRYPTO_AES_ARM_CE tristate "Accelerated AES using ARMv8 Crypto Extensions" - depends on KERNEL_MODE_NEON - select CRYPTO_BLKCIPHER + depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) + select CRYPTO_SKCIPHER + select CRYPTO_LIB_AES select CRYPTO_SIMD help Use an implementation of AES in CBC, CTR and XTS modes that uses @@ -105,7 +106,7 @@ config CRYPTO_AES_ARM_CE config CRYPTO_GHASH_ARM_CE tristate "PMULL-accelerated GHASH using NEON/ARMv8 Crypto Extensions" - depends on KERNEL_MODE_NEON + depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) select CRYPTO_HASH select CRYPTO_CRYPTD select CRYPTO_GF128MUL @@ -117,23 +118,35 @@ config CRYPTO_GHASH_ARM_CE config CRYPTO_CRCT10DIF_ARM_CE tristate "CRCT10DIF digest algorithm using PMULL instructions" - depends on KERNEL_MODE_NEON && CRC_T10DIF + depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) + depends on CRC_T10DIF select CRYPTO_HASH config CRYPTO_CRC32_ARM_CE tristate "CRC32(C) digest algorithm using CRC and/or PMULL instructions" - depends on KERNEL_MODE_NEON && CRC32 + depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) + depends on CRC32 select CRYPTO_HASH config CRYPTO_CHACHA20_NEON - tristate "NEON accelerated ChaCha stream cipher algorithms" - depends on KERNEL_MODE_NEON - select CRYPTO_BLKCIPHER - select CRYPTO_CHACHA20 + tristate "NEON and scalar accelerated ChaCha stream cipher algorithms" + select CRYPTO_SKCIPHER + select CRYPTO_ARCH_HAVE_LIB_CHACHA + +config CRYPTO_POLY1305_ARM + tristate "Accelerated scalar and SIMD Poly1305 hash implementations" + select CRYPTO_HASH + select CRYPTO_ARCH_HAVE_LIB_POLY1305 config CRYPTO_NHPOLY1305_NEON tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)" depends on KERNEL_MODE_NEON select CRYPTO_NHPOLY1305 +config CRYPTO_CURVE25519_NEON + tristate "NEON accelerated Curve25519 scalar multiplication library" + depends on KERNEL_MODE_NEON + select CRYPTO_LIB_CURVE25519_GENERIC + select CRYPTO_ARCH_HAVE_LIB_CURVE25519 + endif diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 4180f3a13512..b745c17d356f 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -10,34 +10,16 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o +obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o +obj-$(CONFIG_CRYPTO_CURVE25519_NEON) += curve25519-neon.o -ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o -ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o -ce-obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o -ce-obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o -ce-obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM_CE) += crct10dif-arm-ce.o -crc-obj-$(CONFIG_CRYPTO_CRC32_ARM_CE) += crc32-arm-ce.o - -ifneq ($(crc-obj-y)$(crc-obj-m),) -ifeq ($(call as-instr,.arch armv8-a\n.arch_extension crc,y,n),y) -ce-obj-y += $(crc-obj-y) -ce-obj-m += $(crc-obj-m) -else -$(warning These CRC Extensions modules need binutils 2.23 or higher) -$(warning $(crc-obj-y) $(crc-obj-m)) -endif -endif - -ifneq ($(ce-obj-y)$(ce-obj-m),) -ifeq ($(call as-instr,.fpu crypto-neon-fp-armv8,y,n),y) -obj-y += $(ce-obj-y) -obj-m += $(ce-obj-m) -else -$(warning These ARMv8 Crypto Extensions modules need binutils 2.23 or higher) -$(warning $(ce-obj-y) $(ce-obj-m)) -endif -endif +obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o +obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o +obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o +obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o +obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM_CE) += crct10dif-arm-ce.o +obj-$(CONFIG_CRYPTO_CRC32_ARM_CE) += crc32-arm-ce.o aes-arm-y := aes-cipher-core.o aes-cipher-glue.o aes-arm-bs-y := aes-neonbs-core.o aes-neonbs-glue.o @@ -53,13 +35,19 @@ aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o -chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o +chacha-neon-y := chacha-scalar-core.o chacha-glue.o +chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o +poly1305-arm-y := poly1305-core.o poly1305-glue.o nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o +curve25519-neon-y := curve25519-core.o curve25519-glue.o ifdef REGENERATE_ARM_CRYPTO quiet_cmd_perl = PERL $@ cmd_perl = $(PERL) $(<) > $(@) +$(src)/poly1305-core.S_shipped: $(src)/poly1305-armv4.pl + $(call cmd,perl) + $(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl $(call cmd,perl) @@ -67,4 +55,9 @@ $(src)/sha512-core.S_shipped: $(src)/sha512-armv4.pl $(call cmd,perl) endif -clean-files += sha256-core.S sha512-core.S +clean-files += poly1305-core.S sha256-core.S sha512-core.S + +# massage the perlasm code a bit so we only get the NEON routine if we need it +poly1305-aflags-$(CONFIG_CPU_V7) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=5 +poly1305-aflags-$(CONFIG_KERNEL_MODE_NEON) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=7 +AFLAGS_poly1305-core.o += $(poly1305-aflags-y) diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S index b978cdf133af..4d1707388d94 100644 --- a/arch/arm/crypto/aes-ce-core.S +++ b/arch/arm/crypto/aes-ce-core.S @@ -9,6 +9,7 @@ #include <asm/assembler.h> .text + .arch armv8-a .fpu crypto-neon-fp-armv8 .align 3 diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c new file mode 100644 index 000000000000..3f0c057aa050 --- /dev/null +++ b/arch/arm/crypto/chacha-glue.c @@ -0,0 +1,343 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ARM NEON accelerated ChaCha and XChaCha stream ciphers, + * including ChaCha20 (RFC7539) + * + * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org> + * Copyright (C) 2015 Martin Willi + */ + +#include <crypto/algapi.h> +#include <crypto/internal/chacha.h> +#include <crypto/internal/simd.h> +#include <crypto/internal/skcipher.h> +#include <linux/jump_label.h> +#include <linux/kernel.h> +#include <linux/module.h> + +#include <asm/cputype.h> +#include <asm/hwcap.h> +#include <asm/neon.h> +#include <asm/simd.h> + +asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src, + int nrounds); +asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src, + int nrounds); +asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds); +asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); + +asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes, + const u32 *state, int nrounds); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon); + +static inline bool neon_usable(void) +{ + return static_branch_likely(&use_neon) && crypto_simd_usable(); +} + +static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds) +{ + u8 buf[CHACHA_BLOCK_SIZE]; + + while (bytes >= CHACHA_BLOCK_SIZE * 4) { + chacha_4block_xor_neon(state, dst, src, nrounds); + bytes -= CHACHA_BLOCK_SIZE * 4; + src += CHACHA_BLOCK_SIZE * 4; + dst += CHACHA_BLOCK_SIZE * 4; + state[12] += 4; + } + while (bytes >= CHACHA_BLOCK_SIZE) { + chacha_block_xor_neon(state, dst, src, nrounds); + bytes -= CHACHA_BLOCK_SIZE; + src += CHACHA_BLOCK_SIZE; + dst += CHACHA_BLOCK_SIZE; + state[12]++; + } + if (bytes) { + memcpy(buf, src, bytes); + chacha_block_xor_neon(state, buf, buf, nrounds); + memcpy(dst, buf, bytes); + } +} + +void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds) +{ + if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) { + hchacha_block_arm(state, stream, nrounds); + } else { + kernel_neon_begin(); + hchacha_block_neon(state, stream, nrounds); + kernel_neon_end(); + } +} +EXPORT_SYMBOL(hchacha_block_arch); + +void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv) +{ + chacha_init_generic(state, key, iv); +} +EXPORT_SYMBOL(chacha_init_arch); + +void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, + int nrounds) +{ + if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() || + bytes <= CHACHA_BLOCK_SIZE) { + chacha_doarm(dst, src, bytes, state, nrounds); + state[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE); + return; + } + + kernel_neon_begin(); + chacha_doneon(state, dst, src, bytes, nrounds); + kernel_neon_end(); +} +EXPORT_SYMBOL(chacha_crypt_arch); + +static int chacha_stream_xor(struct skcipher_request *req, + const struct chacha_ctx *ctx, const u8 *iv, + bool neon) +{ + struct skcipher_walk walk; + u32 state[16]; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + chacha_init_generic(state, ctx->key, iv); + + while (walk.nbytes > 0) { + unsigned int nbytes = walk.nbytes; + + if (nbytes < walk.total) + nbytes = round_down(nbytes, walk.stride); + + if (!neon) { + chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr, + nbytes, state, ctx->nrounds); + state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE); + } else { + kernel_neon_begin(); + chacha_doneon(state, walk.dst.virt.addr, + walk.src.virt.addr, nbytes, ctx->nrounds); + kernel_neon_end(); + } + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + } + + return err; +} + +static int do_chacha(struct skcipher_request *req, bool neon) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); + + return chacha_stream_xor(req, ctx, req->iv, neon); +} + +static int chacha_arm(struct skcipher_request *req) +{ + return do_chacha(req, false); +} + +static int chacha_neon(struct skcipher_request *req) +{ + return do_chacha(req, neon_usable()); +} + +static int do_xchacha(struct skcipher_request *req, bool neon) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); + struct chacha_ctx subctx; + u32 state[16]; + u8 real_iv[16]; + + chacha_init_generic(state, ctx->key, req->iv); + + if (!neon) { + hchacha_block_arm(state, subctx.key, ctx->nrounds); + } else { + kernel_neon_begin(); + hchacha_block_neon(state, subctx.key, ctx->nrounds); + kernel_neon_end(); + } + subctx.nrounds = ctx->nrounds; + + memcpy(&real_iv[0], req->iv + 24, 8); + memcpy(&real_iv[8], req->iv + 16, 8); + return chacha_stream_xor(req, &subctx, real_iv, neon); +} + +static int xchacha_arm(struct skcipher_request *req) +{ + return do_xchacha(req, false); +} + +static int xchacha_neon(struct skcipher_request *req) +{ + return do_xchacha(req, neon_usable()); +} + +static struct skcipher_alg arm_algs[] = { + { + .base.cra_name = "chacha20", + .base.cra_driver_name = "chacha20-arm", + .base.cra_priority = 200, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = CHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .setkey = chacha20_setkey, + .encrypt = chacha_arm, + .decrypt = chacha_arm, + }, { + .base.cra_name = "xchacha20", + .base.cra_driver_name = "xchacha20-arm", + .base.cra_priority = 200, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .setkey = chacha20_setkey, + .encrypt = xchacha_arm, + .decrypt = xchacha_arm, + }, { + .base.cra_name = "xchacha12", + .base.cra_driver_name = "xchacha12-arm", + .base.cra_priority = 200, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .setkey = chacha12_setkey, + .encrypt = xchacha_arm, + .decrypt = xchacha_arm, + }, +}; + +static struct skcipher_alg neon_algs[] = { + { + .base.cra_name = "chacha20", + .base.cra_driver_name = "chacha20-neon", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = CHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .walksize = 4 * CHACHA_BLOCK_SIZE, + .setkey = chacha20_setkey, + .encrypt = chacha_neon, + .decrypt = chacha_neon, + }, { + .base.cra_name = "xchacha20", + .base.cra_driver_name = "xchacha20-neon", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .walksize = 4 * CHACHA_BLOCK_SIZE, + .setkey = chacha20_setkey, + .encrypt = xchacha_neon, + .decrypt = xchacha_neon, + }, { + .base.cra_name = "xchacha12", + .base.cra_driver_name = "xchacha12-neon", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .walksize = 4 * CHACHA_BLOCK_SIZE, + .setkey = chacha12_setkey, + .encrypt = xchacha_neon, + .decrypt = xchacha_neon, + } +}; + +static int __init chacha_simd_mod_init(void) +{ + int err; + + err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); + if (err) + return err; + + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) { + int i; + + switch (read_cpuid_part()) { + case ARM_CPU_PART_CORTEX_A7: + case ARM_CPU_PART_CORTEX_A5: + /* + * The Cortex-A7 and Cortex-A5 do not perform well with + * the NEON implementation but do incredibly with the + * scalar one and use less power. + */ + for (i = 0; i < ARRAY_SIZE(neon_algs); i++) + neon_algs[i].base.cra_priority = 0; + break; + default: + static_branch_enable(&use_neon); + } + + err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); + if (err) + crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); + } + return err; +} + +static void __exit chacha_simd_mod_fini(void) +{ + crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) + crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); +} + +module_init(chacha_simd_mod_init); +module_exit(chacha_simd_mod_fini); + +MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (scalar and NEON accelerated)"); +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("chacha20"); +MODULE_ALIAS_CRYPTO("chacha20-arm"); +MODULE_ALIAS_CRYPTO("xchacha20"); +MODULE_ALIAS_CRYPTO("xchacha20-arm"); +MODULE_ALIAS_CRYPTO("xchacha12"); +MODULE_ALIAS_CRYPTO("xchacha12-arm"); +#ifdef CONFIG_KERNEL_MODE_NEON +MODULE_ALIAS_CRYPTO("chacha20-neon"); +MODULE_ALIAS_CRYPTO("xchacha20-neon"); +MODULE_ALIAS_CRYPTO("xchacha12-neon"); +#endif diff --git a/arch/arm/crypto/chacha-neon-glue.c b/arch/arm/crypto/chacha-neon-glue.c deleted file mode 100644 index a8e9b534c8da..000000000000 --- a/arch/arm/crypto/chacha-neon-glue.c +++ /dev/null @@ -1,202 +0,0 @@ -/* - * ARM NEON accelerated ChaCha and XChaCha stream ciphers, - * including ChaCha20 (RFC7539) - * - * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Based on: - * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code - * - * Copyright (C) 2015 Martin Willi - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include <crypto/algapi.h> -#include <crypto/chacha.h> -#include <crypto/internal/simd.h> -#include <crypto/internal/skcipher.h> -#include <linux/kernel.h> -#include <linux/module.h> - -#include <asm/hwcap.h> -#include <asm/neon.h> -#include <asm/simd.h> - -asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src, - int nrounds); -asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src, - int nrounds); -asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); - -static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, - unsigned int bytes, int nrounds) -{ - u8 buf[CHACHA_BLOCK_SIZE]; - - while (bytes >= CHACHA_BLOCK_SIZE * 4) { - chacha_4block_xor_neon(state, dst, src, nrounds); - bytes -= CHACHA_BLOCK_SIZE * 4; - src += CHACHA_BLOCK_SIZE * 4; - dst += CHACHA_BLOCK_SIZE * 4; - state[12] += 4; - } - while (bytes >= CHACHA_BLOCK_SIZE) { - chacha_block_xor_neon(state, dst, src, nrounds); - bytes -= CHACHA_BLOCK_SIZE; - src += CHACHA_BLOCK_SIZE; - dst += CHACHA_BLOCK_SIZE; - state[12]++; - } - if (bytes) { - memcpy(buf, src, bytes); - chacha_block_xor_neon(state, buf, buf, nrounds); - memcpy(dst, buf, bytes); - } -} - -static int chacha_neon_stream_xor(struct skcipher_request *req, - const struct chacha_ctx *ctx, const u8 *iv) -{ - struct skcipher_walk walk; - u32 state[16]; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - crypto_chacha_init(state, ctx, iv); - - while (walk.nbytes > 0) { - unsigned int nbytes = walk.nbytes; - - if (nbytes < walk.total) - nbytes = round_down(nbytes, walk.stride); - - kernel_neon_begin(); - chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr, - nbytes, ctx->nrounds); - kernel_neon_end(); - err = skcipher_walk_done(&walk, walk.nbytes - nbytes); - } - - return err; -} - -static int chacha_neon(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); - - if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) - return crypto_chacha_crypt(req); - - return chacha_neon_stream_xor(req, ctx, req->iv); -} - -static int xchacha_neon(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); - struct chacha_ctx subctx; - u32 state[16]; - u8 real_iv[16]; - - if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) - return crypto_xchacha_crypt(req); - - crypto_chacha_init(state, ctx, req->iv); - - kernel_neon_begin(); - hchacha_block_neon(state, subctx.key, ctx->nrounds); - kernel_neon_end(); - subctx.nrounds = ctx->nrounds; - - memcpy(&real_iv[0], req->iv + 24, 8); - memcpy(&real_iv[8], req->iv + 16, 8); - return chacha_neon_stream_xor(req, &subctx, real_iv); -} - -static struct skcipher_alg algs[] = { - { - .base.cra_name = "chacha20", - .base.cra_driver_name = "chacha20-neon", - .base.cra_priority = 300, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = CHACHA_KEY_SIZE, - .max_keysize = CHACHA_KEY_SIZE, - .ivsize = CHACHA_IV_SIZE, - .chunksize = CHACHA_BLOCK_SIZE, - .walksize = 4 * CHACHA_BLOCK_SIZE, - .setkey = crypto_chacha20_setkey, - .encrypt = chacha_neon, - .decrypt = chacha_neon, - }, { - .base.cra_name = "xchacha20", - .base.cra_driver_name = "xchacha20-neon", - .base.cra_priority = 300, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = CHACHA_KEY_SIZE, - .max_keysize = CHACHA_KEY_SIZE, - .ivsize = XCHACHA_IV_SIZE, - .chunksize = CHACHA_BLOCK_SIZE, - .walksize = 4 * CHACHA_BLOCK_SIZE, - .setkey = crypto_chacha20_setkey, - .encrypt = xchacha_neon, - .decrypt = xchacha_neon, - }, { - .base.cra_name = "xchacha12", - .base.cra_driver_name = "xchacha12-neon", - .base.cra_priority = 300, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = CHACHA_KEY_SIZE, - .max_keysize = CHACHA_KEY_SIZE, - .ivsize = XCHACHA_IV_SIZE, - .chunksize = CHACHA_BLOCK_SIZE, - .walksize = 4 * CHACHA_BLOCK_SIZE, - .setkey = crypto_chacha12_setkey, - .encrypt = xchacha_neon, - .decrypt = xchacha_neon, - } -}; - -static int __init chacha_simd_mod_init(void) -{ - if (!(elf_hwcap & HWCAP_NEON)) - return -ENODEV; - - return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); -} - -static void __exit chacha_simd_mod_fini(void) -{ - crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); -} - -module_init(chacha_simd_mod_init); -module_exit(chacha_simd_mod_fini); - -MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)"); -MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); -MODULE_LICENSE("GPL v2"); -MODULE_ALIAS_CRYPTO("chacha20"); -MODULE_ALIAS_CRYPTO("chacha20-neon"); -MODULE_ALIAS_CRYPTO("xchacha20"); -MODULE_ALIAS_CRYPTO("xchacha20-neon"); -MODULE_ALIAS_CRYPTO("xchacha12"); -MODULE_ALIAS_CRYPTO("xchacha12-neon"); diff --git a/arch/arm/crypto/chacha-scalar-core.S b/arch/arm/crypto/chacha-scalar-core.S new file mode 100644 index 000000000000..2985b80a45b5 --- /dev/null +++ b/arch/arm/crypto/chacha-scalar-core.S @@ -0,0 +1,460 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018 Google, Inc. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + +/* + * Design notes: + * + * 16 registers would be needed to hold the state matrix, but only 14 are + * available because 'sp' and 'pc' cannot be used. So we spill the elements + * (x8, x9) to the stack and swap them out with (x10, x11). This adds one + * 'ldrd' and one 'strd' instruction per round. + * + * All rotates are performed using the implicit rotate operand accepted by the + * 'add' and 'eor' instructions. This is faster than using explicit rotate + * instructions. To make this work, we allow the values in the second and last + * rows of the ChaCha state matrix (rows 'b' and 'd') to temporarily have the + * wrong rotation amount. The rotation amount is then fixed up just in time + * when the values are used. 'brot' is the number of bits the values in row 'b' + * need to be rotated right to arrive at the correct values, and 'drot' + * similarly for row 'd'. (brot, drot) start out as (0, 0) but we make it such + * that they end up as (25, 24) after every round. + */ + + // ChaCha state registers + X0 .req r0 + X1 .req r1 + X2 .req r2 + X3 .req r3 + X4 .req r4 + X5 .req r5 + X6 .req r6 + X7 .req r7 + X8_X10 .req r8 // shared by x8 and x10 + X9_X11 .req r9 // shared by x9 and x11 + X12 .req r10 + X13 .req r11 + X14 .req r12 + X15 .req r14 + +.macro __rev out, in, t0, t1, t2 +.if __LINUX_ARM_ARCH__ >= 6 + rev \out, \in +.else + lsl \t0, \in, #24 + and \t1, \in, #0xff00 + and \t2, \in, #0xff0000 + orr \out, \t0, \in, lsr #24 + orr \out, \out, \t1, lsl #8 + orr \out, \out, \t2, lsr #8 +.endif +.endm + +.macro _le32_bswap x, t0, t1, t2 +#ifdef __ARMEB__ + __rev \x, \x, \t0, \t1, \t2 +#endif +.endm + +.macro _le32_bswap_4x a, b, c, d, t0, t1, t2 + _le32_bswap \a, \t0, \t1, \t2 + _le32_bswap \b, \t0, \t1, \t2 + _le32_bswap \c, \t0, \t1, \t2 + _le32_bswap \d, \t0, \t1, \t2 +.endm + +.macro __ldrd a, b, src, offset +#if __LINUX_ARM_ARCH__ >= 6 + ldrd \a, \b, [\src, #\offset] +#else + ldr \a, [\src, #\offset] + ldr \b, [\src, #\offset + 4] +#endif +.endm + +.macro __strd a, b, dst, offset +#if __LINUX_ARM_ARCH__ >= 6 + strd \a, \b, [\dst, #\offset] +#else + str \a, [\dst, #\offset] + str \b, [\dst, #\offset + 4] +#endif +.endm + +.macro _halfround a1, b1, c1, d1, a2, b2, c2, d2 + + // a += b; d ^= a; d = rol(d, 16); + add \a1, \a1, \b1, ror #brot + add \a2, \a2, \b2, ror #brot + eor \d1, \a1, \d1, ror #drot + eor \d2, \a2, \d2, ror #drot + // drot == 32 - 16 == 16 + + // c += d; b ^= c; b = rol(b, 12); + add \c1, \c1, \d1, ror #16 + add \c2, \c2, \d2, ror #16 + eor \b1, \c1, \b1, ror #brot + eor \b2, \c2, \b2, ror #brot + // brot == 32 - 12 == 20 + + // a += b; d ^= a; d = rol(d, 8); + add \a1, \a1, \b1, ror #20 + add \a2, \a2, \b2, ror #20 + eor \d1, \a1, \d1, ror #16 + eor \d2, \a2, \d2, ror #16 + // drot == 32 - 8 == 24 + + // c += d; b ^= c; b = rol(b, 7); + add \c1, \c1, \d1, ror #24 + add \c2, \c2, \d2, ror #24 + eor \b1, \c1, \b1, ror #20 + eor \b2, \c2, \b2, ror #20 + // brot == 32 - 7 == 25 +.endm + +.macro _doubleround + + // column round + + // quarterrounds: (x0, x4, x8, x12) and (x1, x5, x9, x13) + _halfround X0, X4, X8_X10, X12, X1, X5, X9_X11, X13 + + // save (x8, x9); restore (x10, x11) + __strd X8_X10, X9_X11, sp, 0 + __ldrd X8_X10, X9_X11, sp, 8 + + // quarterrounds: (x2, x6, x10, x14) and (x3, x7, x11, x15) + _halfround X2, X6, X8_X10, X14, X3, X7, X9_X11, X15 + + .set brot, 25 + .set drot, 24 + + // diagonal round + + // quarterrounds: (x0, x5, x10, x15) and (x1, x6, x11, x12) + _halfround X0, X5, X8_X10, X15, X1, X6, X9_X11, X12 + + // save (x10, x11); restore (x8, x9) + __strd X8_X10, X9_X11, sp, 8 + __ldrd X8_X10, X9_X11, sp, 0 + + // quarterrounds: (x2, x7, x8, x13) and (x3, x4, x9, x14) + _halfround X2, X7, X8_X10, X13, X3, X4, X9_X11, X14 +.endm + +.macro _chacha_permute nrounds + .set brot, 0 + .set drot, 0 + .rept \nrounds / 2 + _doubleround + .endr +.endm + +.macro _chacha nrounds + +.Lnext_block\@: + // Stack: unused0-unused1 x10-x11 x0-x15 OUT IN LEN + // Registers contain x0-x9,x12-x15. + + // Do the core ChaCha permutation to update x0-x15. + _chacha_permute \nrounds + + add sp, #8 + // Stack: x10-x11 orig_x0-orig_x15 OUT IN LEN + // Registers contain x0-x9,x12-x15. + // x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'. + + // Free up some registers (r8-r12,r14) by pushing (x8-x9,x12-x15). + push {X8_X10, X9_X11, X12, X13, X14, X15} + + // Load (OUT, IN, LEN). + ldr r14, [sp, #96] + ldr r12, [sp, #100] + ldr r11, [sp, #104] + + orr r10, r14, r12 + + // Use slow path if fewer than 64 bytes remain. + cmp r11, #64 + blt .Lxor_slowpath\@ + + // Use slow path if IN and/or OUT isn't 4-byte aligned. Needed even on + // ARMv6+, since ldmia and stmia (used below) still require alignment. + tst r10, #3 + bne .Lxor_slowpath\@ + + // Fast path: XOR 64 bytes of aligned data. + + // Stack: x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN + // Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is OUT. + // x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'. + + // x0-x3 + __ldrd r8, r9, sp, 32 + __ldrd r10, r11, sp, 40 + add X0, X0, r8 + add X1, X1, r9 + add X2, X2, r10 + add X3, X3, r11 + _le32_bswap_4x X0, X1, X2, X3, r8, r9, r10 + ldmia r12!, {r8-r11} + eor X0, X0, r8 + eor X1, X1, r9 + eor X2, X2, r10 + eor X3, X3, r11 + stmia r14!, {X0-X3} + + // x4-x7 + __ldrd r8, r9, sp, 48 + __ldrd r10, r11, sp, 56 + add X4, r8, X4, ror #brot + add X5, r9, X5, ror #brot + ldmia r12!, {X0-X3} + add X6, r10, X6, ror #brot + add X7, r11, X7, ror #brot + _le32_bswap_4x X4, X5, X6, X7, r8, r9, r10 + eor X4, X4, X0 + eor X5, X5, X1 + eor X6, X6, X2 + eor X7, X7, X3 + stmia r14!, {X4-X7} + + // x8-x15 + pop {r0-r7} // (x8-x9,x12-x15,x10-x11) + __ldrd r8, r9, sp, 32 + __ldrd r10, r11, sp, 40 + add r0, r0, r8 // x8 + add r1, r1, r9 // x9 + add r6, r6, r10 // x10 + add r7, r7, r11 // x11 + _le32_bswap_4x r0, r1, r6, r7, r8, r9, r10 + ldmia r12!, {r8-r11} + eor r0, r0, r8 // x8 + eor r1, r1, r9 // x9 + eor r6, r6, r10 // x10 + eor r7, r7, r11 // x11 + stmia r14!, {r0,r1,r6,r7} + ldmia r12!, {r0,r1,r6,r7} + __ldrd r8, r9, sp, 48 + __ldrd r10, r11, sp, 56 + add r2, r8, r2, ror #drot // x12 + add r3, r9, r3, ror #drot // x13 + add r4, r10, r4, ror #drot // x14 + add r5, r11, r5, ror #drot // x15 + _le32_bswap_4x r2, r3, r4, r5, r9, r10, r11 + ldr r9, [sp, #72] // load LEN + eor r2, r2, r0 // x12 + eor r3, r3, r1 // x13 + eor r4, r4, r6 // x14 + eor r5, r5, r7 // x15 + subs r9, #64 // decrement and check LEN + stmia r14!, {r2-r5} + + beq .Ldone\@ + +.Lprepare_for_next_block\@: + + // Stack: x0-x15 OUT IN LEN + + // Increment block counter (x12) + add r8, #1 + + // Store updated (OUT, IN, LEN) + str r14, [sp, #64] + str r12, [sp, #68] + str r9, [sp, #72] + + mov r14, sp + + // Store updated block counter (x12) + str r8, [sp, #48] + + sub sp, #16 + + // Reload state and do next block + ldmia r14!, {r0-r11} // load x0-x11 + __strd r10, r11, sp, 8 // store x10-x11 before state + ldmia r14, {r10-r12,r14} // load x12-x15 + b .Lnext_block\@ + +.Lxor_slowpath\@: + // Slow path: < 64 bytes remaining, or unaligned input or output buffer. + // We handle it by storing the 64 bytes of keystream to the stack, then + // XOR-ing the needed portion with the data. + + // Allocate keystream buffer + sub sp, #64 + mov r14, sp + + // Stack: ks0-ks15 x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN + // Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is &ks0. + // x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'. + + // Save keystream for x0-x3 + __ldrd r8, r9, sp, 96 + __ldrd r10, r11, sp, 104 + add X0, X0, r8 + add X1, X1, r9 + add X2, X2, r10 + add X3, X3, r11 + _le32_bswap_4x X0, X1, X2, X3, r8, r9, r10 + stmia r14!, {X0-X3} + + // Save keystream for x4-x7 + __ldrd r8, r9, sp, 112 + __ldrd r10, r11, sp, 120 + add X4, r8, X4, ror #brot + add X5, r9, X5, ror #brot + add X6, r10, X6, ror #brot + add X7, r11, X7, ror #brot + _le32_bswap_4x X4, X5, X6, X7, r8, r9, r10 + add r8, sp, #64 + stmia r14!, {X4-X7} + + // Save keystream for x8-x15 + ldm r8, {r0-r7} // (x8-x9,x12-x15,x10-x11) + __ldrd r8, r9, sp, 128 + __ldrd r10, r11, sp, 136 + add r0, r0, r8 // x8 + add r1, r1, r9 // x9 + add r6, r6, r10 // x10 + add r7, r7, r11 // x11 + _le32_bswap_4x r0, r1, r6, r7, r8, r9, r10 + stmia r14!, {r0,r1,r6,r7} + __ldrd r8, r9, sp, 144 + __ldrd r10, r11, sp, 152 + add r2, r8, r2, ror #drot // x12 + add r3, r9, r3, ror #drot // x13 + add r4, r10, r4, ror #drot // x14 + add r5, r11, r5, ror #drot // x15 + _le32_bswap_4x r2, r3, r4, r5, r9, r10, r11 + stmia r14, {r2-r5} + + // Stack: ks0-ks15 unused0-unused7 x0-x15 OUT IN LEN + // Registers: r8 is block counter, r12 is IN. + + ldr r9, [sp, #168] // LEN + ldr r14, [sp, #160] // OUT + cmp r9, #64 + mov r0, sp + movle r1, r9 + movgt r1, #64 + // r1 is number of bytes to XOR, in range [1, 64] + +.if __LINUX_ARM_ARCH__ < 6 + orr r2, r12, r14 + tst r2, #3 // IN or OUT misaligned? + bne .Lxor_next_byte\@ +.endif + + // XOR a word at a time +.rept 16 + subs r1, #4 + blt .Lxor_words_done\@ + ldr r2, [r12], #4 + ldr r3, [r0], #4 + eor r2, r2, r3 + str r2, [r14], #4 +.endr + b .Lxor_slowpath_done\@ +.Lxor_words_done\@: + ands r1, r1, #3 + beq .Lxor_slowpath_done\@ + + // XOR a byte at a time +.Lxor_next_byte\@: + ldrb r2, [r12], #1 + ldrb r3, [r0], #1 + eor r2, r2, r3 + strb r2, [r14], #1 + subs r1, #1 + bne .Lxor_next_byte\@ + +.Lxor_slowpath_done\@: + subs r9, #64 + add sp, #96 + bgt .Lprepare_for_next_block\@ + +.Ldone\@: +.endm // _chacha + +/* + * void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes, + * const u32 *state, int nrounds); + */ +ENTRY(chacha_doarm) + cmp r2, #0 // len == 0? + reteq lr + + ldr ip, [sp] + cmp ip, #12 + + push {r0-r2,r4-r11,lr} + + // Push state x0-x15 onto stack. + // Also store an extra copy of x10-x11 just before the state. + + add X12, r3, #48 + ldm X12, {X12,X13,X14,X15} + push {X12,X13,X14,X15} + sub sp, sp, #64 + + __ldrd X8_X10, X9_X11, r3, 40 + __strd X8_X10, X9_X11, sp, 8 + __strd X8_X10, X9_X11, sp, 56 + ldm r3, {X0-X9_X11} + __strd X0, X1, sp, 16 + __strd X2, X3, sp, 24 + __strd X4, X5, sp, 32 + __strd X6, X7, sp, 40 + __strd X8_X10, X9_X11, sp, 48 + + beq 1f + _chacha 20 + +0: add sp, #76 + pop {r4-r11, pc} + +1: _chacha 12 + b 0b +ENDPROC(chacha_doarm) + +/* + * void hchacha_block_arm(const u32 state[16], u32 out[8], int nrounds); + */ +ENTRY(hchacha_block_arm) + push {r1,r4-r11,lr} + + cmp r2, #12 // ChaCha12 ? + + mov r14, r0 + ldmia r14!, {r0-r11} // load x0-x11 + push {r10-r11} // store x10-x11 to stack + ldm r14, {r10-r12,r14} // load x12-x15 + sub sp, #8 + + beq 1f + _chacha_permute 20 + + // Skip over (unused0-unused1, x10-x11) +0: add sp, #16 + + // Fix up rotations of x12-x15 + ror X12, X12, #drot + ror X13, X13, #drot + pop {r4} // load 'out' + ror X14, X14, #drot + ror X15, X15, #drot + + // Store (x0-x3,x12-x15) to 'out' + stm r4, {X0,X1,X2,X3,X12,X13,X14,X15} + + pop {r4-r11,pc} + +1: _chacha_permute 12 + b 0b +ENDPROC(hchacha_block_arm) diff --git a/arch/arm/crypto/crct10dif-ce-core.S b/arch/arm/crypto/crct10dif-ce-core.S index 86be258a803f..46c02c518a30 100644 --- a/arch/arm/crypto/crct10dif-ce-core.S +++ b/arch/arm/crypto/crct10dif-ce-core.S @@ -72,7 +72,7 @@ #endif .text - .arch armv7-a + .arch armv8-a .fpu crypto-neon-fp-armv8 init_crc .req r0 diff --git a/arch/arm/crypto/curve25519-core.S b/arch/arm/crypto/curve25519-core.S new file mode 100644 index 000000000000..be18af52e7dc --- /dev/null +++ b/arch/arm/crypto/curve25519-core.S @@ -0,0 +1,2062 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + * + * Based on public domain code from Daniel J. Bernstein and Peter Schwabe. This + * began from SUPERCOP's curve25519/neon2/scalarmult.s, but has subsequently been + * manually reworked for use in kernel space. + */ + +#include <linux/linkage.h> + +.text +.fpu neon +.arch armv7-a +.align 4 + +ENTRY(curve25519_neon) + push {r4-r11, lr} + mov ip, sp + sub r3, sp, #704 + and r3, r3, #0xfffffff0 + mov sp, r3 + movw r4, #0 + movw r5, #254 + vmov.i32 q0, #1 + vshr.u64 q1, q0, #7 + vshr.u64 q0, q0, #8 + vmov.i32 d4, #19 + vmov.i32 d5, #38 + add r6, sp, #480 + vst1.8 {d2-d3}, [r6, : 128]! + vst1.8 {d0-d1}, [r6, : 128]! + vst1.8 {d4-d5}, [r6, : 128] + add r6, r3, #0 + vmov.i32 q2, #0 + vst1.8 {d4-d5}, [r6, : 128]! + vst1.8 {d4-d5}, [r6, : 128]! + vst1.8 d4, [r6, : 64] + add r6, r3, #0 + movw r7, #960 + sub r7, r7, #2 + neg r7, r7 + sub r7, r7, r7, LSL #7 + str r7, [r6] + add r6, sp, #672 + vld1.8 {d4-d5}, [r1]! + vld1.8 {d6-d7}, [r1] + vst1.8 {d4-d5}, [r6, : 128]! + vst1.8 {d6-d7}, [r6, : 128] + sub r1, r6, #16 + ldrb r6, [r1] + and r6, r6, #248 + strb r6, [r1] + ldrb r6, [r1, #31] + and r6, r6, #127 + orr r6, r6, #64 + strb r6, [r1, #31] + vmov.i64 q2, #0xffffffff + vshr.u64 q3, q2, #7 + vshr.u64 q2, q2, #6 + vld1.8 {d8}, [r2] + vld1.8 {d10}, [r2] + add r2, r2, #6 + vld1.8 {d12}, [r2] + vld1.8 {d14}, [r2] + add r2, r2, #6 + vld1.8 {d16}, [r2] + add r2, r2, #4 + vld1.8 {d18}, [r2] + vld1.8 {d20}, [r2] + add r2, r2, #6 + vld1.8 {d22}, [r2] + add r2, r2, #2 + vld1.8 {d24}, [r2] + vld1.8 {d26}, [r2] + vshr.u64 q5, q5, #26 + vshr.u64 q6, q6, #3 + vshr.u64 q7, q7, #29 + vshr.u64 q8, q8, #6 + vshr.u64 q10, q10, #25 + vshr.u64 q11, q11, #3 + vshr.u64 q12, q12, #12 + vshr.u64 q13, q13, #38 + vand q4, q4, q2 + vand q6, q6, q2 + vand q8, q8, q2 + vand q10, q10, q2 + vand q2, q12, q2 + vand q5, q5, q3 + vand q7, q7, q3 + vand q9, q9, q3 + vand q11, q11, q3 + vand q3, q13, q3 + add r2, r3, #48 + vadd.i64 q12, q4, q1 + vadd.i64 q13, q10, q1 + vshr.s64 q12, q12, #26 + vshr.s64 q13, q13, #26 + vadd.i64 q5, q5, q12 + vshl.i64 q12, q12, #26 + vadd.i64 q14, q5, q0 + vadd.i64 q11, q11, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q15, q11, q0 + vsub.i64 q4, q4, q12 + vshr.s64 q12, q14, #25 + vsub.i64 q10, q10, q13 + vshr.s64 q13, q15, #25 + vadd.i64 q6, q6, q12 + vshl.i64 q12, q12, #25 + vadd.i64 q14, q6, q1 + vadd.i64 q2, q2, q13 + vsub.i64 q5, q5, q12 + vshr.s64 q12, q14, #26 + vshl.i64 q13, q13, #25 + vadd.i64 q14, q2, q1 + vadd.i64 q7, q7, q12 + vshl.i64 q12, q12, #26 + vadd.i64 q15, q7, q0 + vsub.i64 q11, q11, q13 + vshr.s64 q13, q14, #26 + vsub.i64 q6, q6, q12 + vshr.s64 q12, q15, #25 + vadd.i64 q3, q3, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q14, q3, q0 + vadd.i64 q8, q8, q12 + vshl.i64 q12, q12, #25 + vadd.i64 q15, q8, q1 + add r2, r2, #8 + vsub.i64 q2, q2, q13 + vshr.s64 q13, q14, #25 + vsub.i64 q7, q7, q12 + vshr.s64 q12, q15, #26 + vadd.i64 q14, q13, q13 + vadd.i64 q9, q9, q12 + vtrn.32 d12, d14 + vshl.i64 q12, q12, #26 + vtrn.32 d13, d15 + vadd.i64 q0, q9, q0 + vadd.i64 q4, q4, q14 + vst1.8 d12, [r2, : 64]! + vshl.i64 q6, q13, #4 + vsub.i64 q7, q8, q12 + vshr.s64 q0, q0, #25 + vadd.i64 q4, q4, q6 + vadd.i64 q6, q10, q0 + vshl.i64 q0, q0, #25 + vadd.i64 q8, q6, q1 + vadd.i64 q4, q4, q13 + vshl.i64 q10, q13, #25 + vadd.i64 q1, q4, q1 + vsub.i64 q0, q9, q0 + vshr.s64 q8, q8, #26 + vsub.i64 q3, q3, q10 + vtrn.32 d14, d0 + vshr.s64 q1, q1, #26 + vtrn.32 d15, d1 + vadd.i64 q0, q11, q8 + vst1.8 d14, [r2, : 64] + vshl.i64 q7, q8, #26 + vadd.i64 q5, q5, q1 + vtrn.32 d4, d6 + vshl.i64 q1, q1, #26 + vtrn.32 d5, d7 + vsub.i64 q3, q6, q7 + add r2, r2, #16 + vsub.i64 q1, q4, q1 + vst1.8 d4, [r2, : 64] + vtrn.32 d6, d0 + vtrn.32 d7, d1 + sub r2, r2, #8 + vtrn.32 d2, d10 + vtrn.32 d3, d11 + vst1.8 d6, [r2, : 64] + sub r2, r2, #24 + vst1.8 d2, [r2, : 64] + add r2, r3, #96 + vmov.i32 q0, #0 + vmov.i64 d2, #0xff + vmov.i64 d3, #0 + vshr.u32 q1, q1, #7 + vst1.8 {d2-d3}, [r2, : 128]! + vst1.8 {d0-d1}, [r2, : 128]! + vst1.8 d0, [r2, : 64] + add r2, r3, #144 + vmov.i32 q0, #0 + vst1.8 {d0-d1}, [r2, : 128]! + vst1.8 {d0-d1}, [r2, : 128]! + vst1.8 d0, [r2, : 64] + add r2, r3, #240 + vmov.i32 q0, #0 + vmov.i64 d2, #0xff + vmov.i64 d3, #0 + vshr.u32 q1, q1, #7 + vst1.8 {d2-d3}, [r2, : 128]! + vst1.8 {d0-d1}, [r2, : 128]! + vst1.8 d0, [r2, : 64] + add r2, r3, #48 + add r6, r3, #192 + vld1.8 {d0-d1}, [r2, : 128]! + vld1.8 {d2-d3}, [r2, : 128]! + vld1.8 {d4}, [r2, : 64] + vst1.8 {d0-d1}, [r6, : 128]! + vst1.8 {d2-d3}, [r6, : 128]! + vst1.8 d4, [r6, : 64] +.Lmainloop: + mov r2, r5, LSR #3 + and r6, r5, #7 + ldrb r2, [r1, r2] + mov r2, r2, LSR r6 + and r2, r2, #1 + str r5, [sp, #456] + eor r4, r4, r2 + str r2, [sp, #460] + neg r2, r4 + add r4, r3, #96 + add r5, r3, #192 + add r6, r3, #144 + vld1.8 {d8-d9}, [r4, : 128]! + add r7, r3, #240 + vld1.8 {d10-d11}, [r5, : 128]! + veor q6, q4, q5 + vld1.8 {d14-d15}, [r6, : 128]! + vdup.i32 q8, r2 + vld1.8 {d18-d19}, [r7, : 128]! + veor q10, q7, q9 + vld1.8 {d22-d23}, [r4, : 128]! + vand q6, q6, q8 + vld1.8 {d24-d25}, [r5, : 128]! + vand q10, q10, q8 + vld1.8 {d26-d27}, [r6, : 128]! + veor q4, q4, q6 + vld1.8 {d28-d29}, [r7, : 128]! + veor q5, q5, q6 + vld1.8 {d0}, [r4, : 64] + veor q6, q7, q10 + vld1.8 {d2}, [r5, : 64] + veor q7, q9, q10 + vld1.8 {d4}, [r6, : 64] + veor q9, q11, q12 + vld1.8 {d6}, [r7, : 64] + veor q10, q0, q1 + sub r2, r4, #32 + vand q9, q9, q8 + sub r4, r5, #32 + vand q10, q10, q8 + sub r5, r6, #32 + veor q11, q11, q9 + sub r6, r7, #32 + veor q0, q0, q10 + veor q9, q12, q9 + veor q1, q1, q10 + veor q10, q13, q14 + veor q12, q2, q3 + vand q10, q10, q8 + vand q8, q12, q8 + veor q12, q13, q10 + veor q2, q2, q8 + veor q10, q14, q10 + veor q3, q3, q8 + vadd.i32 q8, q4, q6 + vsub.i32 q4, q4, q6 + vst1.8 {d16-d17}, [r2, : 128]! + vadd.i32 q6, q11, q12 + vst1.8 {d8-d9}, [r5, : 128]! + vsub.i32 q4, q11, q12 + vst1.8 {d12-d13}, [r2, : 128]! + vadd.i32 q6, q0, q2 + vst1.8 {d8-d9}, [r5, : 128]! + vsub.i32 q0, q0, q2 + vst1.8 d12, [r2, : 64] + vadd.i32 q2, q5, q7 + vst1.8 d0, [r5, : 64] + vsub.i32 q0, q5, q7 + vst1.8 {d4-d5}, [r4, : 128]! + vadd.i32 q2, q9, q10 + vst1.8 {d0-d1}, [r6, : 128]! + vsub.i32 q0, q9, q10 + vst1.8 {d4-d5}, [r4, : 128]! + vadd.i32 q2, q1, q3 + vst1.8 {d0-d1}, [r6, : 128]! + vsub.i32 q0, q1, q3 + vst1.8 d4, [r4, : 64] + vst1.8 d0, [r6, : 64] + add r2, sp, #512 + add r4, r3, #96 + add r5, r3, #144 + vld1.8 {d0-d1}, [r2, : 128] + vld1.8 {d2-d3}, [r4, : 128]! + vld1.8 {d4-d5}, [r5, : 128]! + vzip.i32 q1, q2 + vld1.8 {d6-d7}, [r4, : 128]! + vld1.8 {d8-d9}, [r5, : 128]! + vshl.i32 q5, q1, #1 + vzip.i32 q3, q4 + vshl.i32 q6, q2, #1 + vld1.8 {d14}, [r4, : 64] + vshl.i32 q8, q3, #1 + vld1.8 {d15}, [r5, : 64] + vshl.i32 q9, q4, #1 + vmul.i32 d21, d7, d1 + vtrn.32 d14, d15 + vmul.i32 q11, q4, q0 + vmul.i32 q0, q7, q0 + vmull.s32 q12, d2, d2 + vmlal.s32 q12, d11, d1 + vmlal.s32 q12, d12, d0 + vmlal.s32 q12, d13, d23 + vmlal.s32 q12, d16, d22 + vmlal.s32 q12, d7, d21 + vmull.s32 q10, d2, d11 + vmlal.s32 q10, d4, d1 + vmlal.s32 q10, d13, d0 + vmlal.s32 q10, d6, d23 + vmlal.s32 q10, d17, d22 + vmull.s32 q13, d10, d4 + vmlal.s32 q13, d11, d3 + vmlal.s32 q13, d13, d1 + vmlal.s32 q13, d16, d0 + vmlal.s32 q13, d17, d23 + vmlal.s32 q13, d8, d22 + vmull.s32 q1, d10, d5 + vmlal.s32 q1, d11, d4 + vmlal.s32 q1, d6, d1 + vmlal.s32 q1, d17, d0 + vmlal.s32 q1, d8, d23 + vmull.s32 q14, d10, d6 + vmlal.s32 q14, d11, d13 + vmlal.s32 q14, d4, d4 + vmlal.s32 q14, d17, d1 + vmlal.s32 q14, d18, d0 + vmlal.s32 q14, d9, d23 + vmull.s32 q11, d10, d7 + vmlal.s32 q11, d11, d6 + vmlal.s32 q11, d12, d5 + vmlal.s32 q11, d8, d1 + vmlal.s32 q11, d19, d0 + vmull.s32 q15, d10, d8 + vmlal.s32 q15, d11, d17 + vmlal.s32 q15, d12, d6 + vmlal.s32 q15, d13, d5 + vmlal.s32 q15, d19, d1 + vmlal.s32 q15, d14, d0 + vmull.s32 q2, d10, d9 + vmlal.s32 q2, d11, d8 + vmlal.s32 q2, d12, d7 + vmlal.s32 q2, d13, d6 + vmlal.s32 q2, d14, d1 + vmull.s32 q0, d15, d1 + vmlal.s32 q0, d10, d14 + vmlal.s32 q0, d11, d19 + vmlal.s32 q0, d12, d8 + vmlal.s32 q0, d13, d17 + vmlal.s32 q0, d6, d6 + add r2, sp, #480 + vld1.8 {d18-d19}, [r2, : 128]! + vmull.s32 q3, d16, d7 + vmlal.s32 q3, d10, d15 + vmlal.s32 q3, d11, d14 + vmlal.s32 q3, d12, d9 + vmlal.s32 q3, d13, d8 + vld1.8 {d8-d9}, [r2, : 128] + vadd.i64 q5, q12, q9 + vadd.i64 q6, q15, q9 + vshr.s64 q5, q5, #26 + vshr.s64 q6, q6, #26 + vadd.i64 q7, q10, q5 + vshl.i64 q5, q5, #26 + vadd.i64 q8, q7, q4 + vadd.i64 q2, q2, q6 + vshl.i64 q6, q6, #26 + vadd.i64 q10, q2, q4 + vsub.i64 q5, q12, q5 + vshr.s64 q8, q8, #25 + vsub.i64 q6, q15, q6 + vshr.s64 q10, q10, #25 + vadd.i64 q12, q13, q8 + vshl.i64 q8, q8, #25 + vadd.i64 q13, q12, q9 + vadd.i64 q0, q0, q10 + vsub.i64 q7, q7, q8 + vshr.s64 q8, q13, #26 + vshl.i64 q10, q10, #25 + vadd.i64 q13, q0, q9 + vadd.i64 q1, q1, q8 + vshl.i64 q8, q8, #26 + vadd.i64 q15, q1, q4 + vsub.i64 q2, q2, q10 + vshr.s64 q10, q13, #26 + vsub.i64 q8, q12, q8 + vshr.s64 q12, q15, #25 + vadd.i64 q3, q3, q10 + vshl.i64 q10, q10, #26 + vadd.i64 q13, q3, q4 + vadd.i64 q14, q14, q12 + add r2, r3, #288 + vshl.i64 q12, q12, #25 + add r4, r3, #336 + vadd.i64 q15, q14, q9 + add r2, r2, #8 + vsub.i64 q0, q0, q10 + add r4, r4, #8 + vshr.s64 q10, q13, #25 + vsub.i64 q1, q1, q12 + vshr.s64 q12, q15, #26 + vadd.i64 q13, q10, q10 + vadd.i64 q11, q11, q12 + vtrn.32 d16, d2 + vshl.i64 q12, q12, #26 + vtrn.32 d17, d3 + vadd.i64 q1, q11, q4 + vadd.i64 q4, q5, q13 + vst1.8 d16, [r2, : 64]! + vshl.i64 q5, q10, #4 + vst1.8 d17, [r4, : 64]! + vsub.i64 q8, q14, q12 + vshr.s64 q1, q1, #25 + vadd.i64 q4, q4, q5 + vadd.i64 q5, q6, q1 + vshl.i64 q1, q1, #25 + vadd.i64 q6, q5, q9 + vadd.i64 q4, q4, q10 + vshl.i64 q10, q10, #25 + vadd.i64 q9, q4, q9 + vsub.i64 q1, q11, q1 + vshr.s64 q6, q6, #26 + vsub.i64 q3, q3, q10 + vtrn.32 d16, d2 + vshr.s64 q9, q9, #26 + vtrn.32 d17, d3 + vadd.i64 q1, q2, q6 + vst1.8 d16, [r2, : 64] + vshl.i64 q2, q6, #26 + vst1.8 d17, [r4, : 64] + vadd.i64 q6, q7, q9 + vtrn.32 d0, d6 + vshl.i64 q7, q9, #26 + vtrn.32 d1, d7 + vsub.i64 q2, q5, q2 + add r2, r2, #16 + vsub.i64 q3, q4, q7 + vst1.8 d0, [r2, : 64] + add r4, r4, #16 + vst1.8 d1, [r4, : 64] + vtrn.32 d4, d2 + vtrn.32 d5, d3 + sub r2, r2, #8 + sub r4, r4, #8 + vtrn.32 d6, d12 + vtrn.32 d7, d13 + vst1.8 d4, [r2, : 64] + vst1.8 d5, [r4, : 64] + sub r2, r2, #24 + sub r4, r4, #24 + vst1.8 d6, [r2, : 64] + vst1.8 d7, [r4, : 64] + add r2, r3, #240 + add r4, r3, #96 + vld1.8 {d0-d1}, [r4, : 128]! + vld1.8 {d2-d3}, [r4, : 128]! + vld1.8 {d4}, [r4, : 64] + add r4, r3, #144 + vld1.8 {d6-d7}, [r4, : 128]! + vtrn.32 q0, q3 + vld1.8 {d8-d9}, [r4, : 128]! + vshl.i32 q5, q0, #4 + vtrn.32 q1, q4 + vshl.i32 q6, q3, #4 + vadd.i32 q5, q5, q0 + vadd.i32 q6, q6, q3 + vshl.i32 q7, q1, #4 + vld1.8 {d5}, [r4, : 64] + vshl.i32 q8, q4, #4 + vtrn.32 d4, d5 + vadd.i32 q7, q7, q1 + vadd.i32 q8, q8, q4 + vld1.8 {d18-d19}, [r2, : 128]! + vshl.i32 q10, q2, #4 + vld1.8 {d22-d23}, [r2, : 128]! + vadd.i32 q10, q10, q2 + vld1.8 {d24}, [r2, : 64] + vadd.i32 q5, q5, q0 + add r2, r3, #192 + vld1.8 {d26-d27}, [r2, : 128]! + vadd.i32 q6, q6, q3 + vld1.8 {d28-d29}, [r2, : 128]! + vadd.i32 q8, q8, q4 + vld1.8 {d25}, [r2, : 64] + vadd.i32 q10, q10, q2 + vtrn.32 q9, q13 + vadd.i32 q7, q7, q1 + vadd.i32 q5, q5, q0 + vtrn.32 q11, q14 + vadd.i32 q6, q6, q3 + add r2, sp, #528 + vadd.i32 q10, q10, q2 + vtrn.32 d24, d25 + vst1.8 {d12-d13}, [r2, : 128]! + vshl.i32 q6, q13, #1 + vst1.8 {d20-d21}, [r2, : 128]! + vshl.i32 q10, q14, #1 + vst1.8 {d12-d13}, [r2, : 128]! + vshl.i32 q15, q12, #1 + vadd.i32 q8, q8, q4 + vext.32 d10, d31, d30, #0 + vadd.i32 q7, q7, q1 + vst1.8 {d16-d17}, [r2, : 128]! + vmull.s32 q8, d18, d5 + vmlal.s32 q8, d26, d4 + vmlal.s32 q8, d19, d9 + vmlal.s32 q8, d27, d3 + vmlal.s32 q8, d22, d8 + vmlal.s32 q8, d28, d2 + vmlal.s32 q8, d23, d7 + vmlal.s32 q8, d29, d1 + vmlal.s32 q8, d24, d6 + vmlal.s32 q8, d25, d0 + vst1.8 {d14-d15}, [r2, : 128]! + vmull.s32 q2, d18, d4 + vmlal.s32 q2, d12, d9 + vmlal.s32 q2, d13, d8 + vmlal.s32 q2, d19, d3 + vmlal.s32 q2, d22, d2 + vmlal.s32 q2, d23, d1 + vmlal.s32 q2, d24, d0 + vst1.8 {d20-d21}, [r2, : 128]! + vmull.s32 q7, d18, d9 + vmlal.s32 q7, d26, d3 + vmlal.s32 q7, d19, d8 + vmlal.s32 q7, d27, d2 + vmlal.s32 q7, d22, d7 + vmlal.s32 q7, d28, d1 + vmlal.s32 q7, d23, d6 + vmlal.s32 q7, d29, d0 + vst1.8 {d10-d11}, [r2, : 128]! + vmull.s32 q5, d18, d3 + vmlal.s32 q5, d19, d2 + vmlal.s32 q5, d22, d1 + vmlal.s32 q5, d23, d0 + vmlal.s32 q5, d12, d8 + vst1.8 {d16-d17}, [r2, : 128] + vmull.s32 q4, d18, d8 + vmlal.s32 q4, d26, d2 + vmlal.s32 q4, d19, d7 + vmlal.s32 q4, d27, d1 + vmlal.s32 q4, d22, d6 + vmlal.s32 q4, d28, d0 + vmull.s32 q8, d18, d7 + vmlal.s32 q8, d26, d1 + vmlal.s32 q8, d19, d6 + vmlal.s32 q8, d27, d0 + add r2, sp, #544 + vld1.8 {d20-d21}, [r2, : 128] + vmlal.s32 q7, d24, d21 + vmlal.s32 q7, d25, d20 + vmlal.s32 q4, d23, d21 + vmlal.s32 q4, d29, d20 + vmlal.s32 q8, d22, d21 + vmlal.s32 q8, d28, d20 + vmlal.s32 q5, d24, d20 + vst1.8 {d14-d15}, [r2, : 128] + vmull.s32 q7, d18, d6 + vmlal.s32 q7, d26, d0 + add r2, sp, #624 + vld1.8 {d30-d31}, [r2, : 128] + vmlal.s32 q2, d30, d21 + vmlal.s32 q7, d19, d21 + vmlal.s32 q7, d27, d20 + add r2, sp, #592 + vld1.8 {d26-d27}, [r2, : 128] + vmlal.s32 q4, d25, d27 + vmlal.s32 q8, d29, d27 + vmlal.s32 q8, d25, d26 + vmlal.s32 q7, d28, d27 + vmlal.s32 q7, d29, d26 + add r2, sp, #576 + vld1.8 {d28-d29}, [r2, : 128] + vmlal.s32 q4, d24, d29 + vmlal.s32 q8, d23, d29 + vmlal.s32 q8, d24, d28 + vmlal.s32 q7, d22, d29 + vmlal.s32 q7, d23, d28 + vst1.8 {d8-d9}, [r2, : 128] + add r2, sp, #528 + vld1.8 {d8-d9}, [r2, : 128] + vmlal.s32 q7, d24, d9 + vmlal.s32 q7, d25, d31 + vmull.s32 q1, d18, d2 + vmlal.s32 q1, d19, d1 + vmlal.s32 q1, d22, d0 + vmlal.s32 q1, d24, d27 + vmlal.s32 q1, d23, d20 + vmlal.s32 q1, d12, d7 + vmlal.s32 q1, d13, d6 + vmull.s32 q6, d18, d1 + vmlal.s32 q6, d19, d0 + vmlal.s32 q6, d23, d27 + vmlal.s32 q6, d22, d20 + vmlal.s32 q6, d24, d26 + vmull.s32 q0, d18, d0 + vmlal.s32 q0, d22, d27 + vmlal.s32 q0, d23, d26 + vmlal.s32 q0, d24, d31 + vmlal.s32 q0, d19, d20 + add r2, sp, #608 + vld1.8 {d18-d19}, [r2, : 128] + vmlal.s32 q2, d18, d7 + vmlal.s32 q5, d18, d6 + vmlal.s32 q1, d18, d21 + vmlal.s32 q0, d18, d28 + vmlal.s32 q6, d18, d29 + vmlal.s32 q2, d19, d6 + vmlal.s32 q5, d19, d21 + vmlal.s32 q1, d19, d29 + vmlal.s32 q0, d19, d9 + vmlal.s32 q6, d19, d28 + add r2, sp, #560 + vld1.8 {d18-d19}, [r2, : 128] + add r2, sp, #480 + vld1.8 {d22-d23}, [r2, : 128] + vmlal.s32 q5, d19, d7 + vmlal.s32 q0, d18, d21 + vmlal.s32 q0, d19, d29 + vmlal.s32 q6, d18, d6 + add r2, sp, #496 + vld1.8 {d6-d7}, [r2, : 128] + vmlal.s32 q6, d19, d21 + add r2, sp, #544 + vld1.8 {d18-d19}, [r2, : 128] + vmlal.s32 q0, d30, d8 + add r2, sp, #640 + vld1.8 {d20-d21}, [r2, : 128] + vmlal.s32 q5, d30, d29 + add r2, sp, #576 + vld1.8 {d24-d25}, [r2, : 128] + vmlal.s32 q1, d30, d28 + vadd.i64 q13, q0, q11 + vadd.i64 q14, q5, q11 + vmlal.s32 q6, d30, d9 + vshr.s64 q4, q13, #26 + vshr.s64 q13, q14, #26 + vadd.i64 q7, q7, q4 + vshl.i64 q4, q4, #26 + vadd.i64 q14, q7, q3 + vadd.i64 q9, q9, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q15, q9, q3 + vsub.i64 q0, q0, q4 + vshr.s64 q4, q14, #25 + vsub.i64 q5, q5, q13 + vshr.s64 q13, q15, #25 + vadd.i64 q6, q6, q4 + vshl.i64 q4, q4, #25 + vadd.i64 q14, q6, q11 + vadd.i64 q2, q2, q13 + vsub.i64 q4, q7, q4 + vshr.s64 q7, q14, #26 + vshl.i64 q13, q13, #25 + vadd.i64 q14, q2, q11 + vadd.i64 q8, q8, q7 + vshl.i64 q7, q7, #26 + vadd.i64 q15, q8, q3 + vsub.i64 q9, q9, q13 + vshr.s64 q13, q14, #26 + vsub.i64 q6, q6, q7 + vshr.s64 q7, q15, #25 + vadd.i64 q10, q10, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q14, q10, q3 + vadd.i64 q1, q1, q7 + add r2, r3, #144 + vshl.i64 q7, q7, #25 + add r4, r3, #96 + vadd.i64 q15, q1, q11 + add r2, r2, #8 + vsub.i64 q2, q2, q13 + add r4, r4, #8 + vshr.s64 q13, q14, #25 + vsub.i64 q7, q8, q7 + vshr.s64 q8, q15, #26 + vadd.i64 q14, q13, q13 + vadd.i64 q12, q12, q8 + vtrn.32 d12, d14 + vshl.i64 q8, q8, #26 + vtrn.32 d13, d15 + vadd.i64 q3, q12, q3 + vadd.i64 q0, q0, q14 + vst1.8 d12, [r2, : 64]! + vshl.i64 q7, q13, #4 + vst1.8 d13, [r4, : 64]! + vsub.i64 q1, q1, q8 + vshr.s64 q3, q3, #25 + vadd.i64 q0, q0, q7 + vadd.i64 q5, q5, q3 + vshl.i64 q3, q3, #25 + vadd.i64 q6, q5, q11 + vadd.i64 q0, q0, q13 + vshl.i64 q7, q13, #25 + vadd.i64 q8, q0, q11 + vsub.i64 q3, q12, q3 + vshr.s64 q6, q6, #26 + vsub.i64 q7, q10, q7 + vtrn.32 d2, d6 + vshr.s64 q8, q8, #26 + vtrn.32 d3, d7 + vadd.i64 q3, q9, q6 + vst1.8 d2, [r2, : 64] + vshl.i64 q6, q6, #26 + vst1.8 d3, [r4, : 64] + vadd.i64 q1, q4, q8 + vtrn.32 d4, d14 + vshl.i64 q4, q8, #26 + vtrn.32 d5, d15 + vsub.i64 q5, q5, q6 + add r2, r2, #16 + vsub.i64 q0, q0, q4 + vst1.8 d4, [r2, : 64] + add r4, r4, #16 + vst1.8 d5, [r4, : 64] + vtrn.32 d10, d6 + vtrn.32 d11, d7 + sub r2, r2, #8 + sub r4, r4, #8 + vtrn.32 d0, d2 + vtrn.32 d1, d3 + vst1.8 d10, [r2, : 64] + vst1.8 d11, [r4, : 64] + sub r2, r2, #24 + sub r4, r4, #24 + vst1.8 d0, [r2, : 64] + vst1.8 d1, [r4, : 64] + add r2, r3, #288 + add r4, r3, #336 + vld1.8 {d0-d1}, [r2, : 128]! + vld1.8 {d2-d3}, [r4, : 128]! + vsub.i32 q0, q0, q1 + vld1.8 {d2-d3}, [r2, : 128]! + vld1.8 {d4-d5}, [r4, : 128]! + vsub.i32 q1, q1, q2 + add r5, r3, #240 + vld1.8 {d4}, [r2, : 64] + vld1.8 {d6}, [r4, : 64] + vsub.i32 q2, q2, q3 + vst1.8 {d0-d1}, [r5, : 128]! + vst1.8 {d2-d3}, [r5, : 128]! + vst1.8 d4, [r5, : 64] + add r2, r3, #144 + add r4, r3, #96 + add r5, r3, #144 + add r6, r3, #192 + vld1.8 {d0-d1}, [r2, : 128]! + vld1.8 {d2-d3}, [r4, : 128]! + vsub.i32 q2, q0, q1 + vadd.i32 q0, q0, q1 + vld1.8 {d2-d3}, [r2, : 128]! + vld1.8 {d6-d7}, [r4, : 128]! + vsub.i32 q4, q1, q3 + vadd.i32 q1, q1, q3 + vld1.8 {d6}, [r2, : 64] + vld1.8 {d10}, [r4, : 64] + vsub.i32 q6, q3, q5 + vadd.i32 q3, q3, q5 + vst1.8 {d4-d5}, [r5, : 128]! + vst1.8 {d0-d1}, [r6, : 128]! + vst1.8 {d8-d9}, [r5, : 128]! + vst1.8 {d2-d3}, [r6, : 128]! + vst1.8 d12, [r5, : 64] + vst1.8 d6, [r6, : 64] + add r2, r3, #0 + add r4, r3, #240 + vld1.8 {d0-d1}, [r4, : 128]! + vld1.8 {d2-d3}, [r4, : 128]! + vld1.8 {d4}, [r4, : 64] + add r4, r3, #336 + vld1.8 {d6-d7}, [r4, : 128]! + vtrn.32 q0, q3 + vld1.8 {d8-d9}, [r4, : 128]! + vshl.i32 q5, q0, #4 + vtrn.32 q1, q4 + vshl.i32 q6, q3, #4 + vadd.i32 q5, q5, q0 + vadd.i32 q6, q6, q3 + vshl.i32 q7, q1, #4 + vld1.8 {d5}, [r4, : 64] + vshl.i32 q8, q4, #4 + vtrn.32 d4, d5 + vadd.i32 q7, q7, q1 + vadd.i32 q8, q8, q4 + vld1.8 {d18-d19}, [r2, : 128]! + vshl.i32 q10, q2, #4 + vld1.8 {d22-d23}, [r2, : 128]! + vadd.i32 q10, q10, q2 + vld1.8 {d24}, [r2, : 64] + vadd.i32 q5, q5, q0 + add r2, r3, #288 + vld1.8 {d26-d27}, [r2, : 128]! + vadd.i32 q6, q6, q3 + vld1.8 {d28-d29}, [r2, : 128]! + vadd.i32 q8, q8, q4 + vld1.8 {d25}, [r2, : 64] + vadd.i32 q10, q10, q2 + vtrn.32 q9, q13 + vadd.i32 q7, q7, q1 + vadd.i32 q5, q5, q0 + vtrn.32 q11, q14 + vadd.i32 q6, q6, q3 + add r2, sp, #528 + vadd.i32 q10, q10, q2 + vtrn.32 d24, d25 + vst1.8 {d12-d13}, [r2, : 128]! + vshl.i32 q6, q13, #1 + vst1.8 {d20-d21}, [r2, : 128]! + vshl.i32 q10, q14, #1 + vst1.8 {d12-d13}, [r2, : 128]! + vshl.i32 q15, q12, #1 + vadd.i32 q8, q8, q4 + vext.32 d10, d31, d30, #0 + vadd.i32 q7, q7, q1 + vst1.8 {d16-d17}, [r2, : 128]! + vmull.s32 q8, d18, d5 + vmlal.s32 q8, d26, d4 + vmlal.s32 q8, d19, d9 + vmlal.s32 q8, d27, d3 + vmlal.s32 q8, d22, d8 + vmlal.s32 q8, d28, d2 + vmlal.s32 q8, d23, d7 + vmlal.s32 q8, d29, d1 + vmlal.s32 q8, d24, d6 + vmlal.s32 q8, d25, d0 + vst1.8 {d14-d15}, [r2, : 128]! + vmull.s32 q2, d18, d4 + vmlal.s32 q2, d12, d9 + vmlal.s32 q2, d13, d8 + vmlal.s32 q2, d19, d3 + vmlal.s32 q2, d22, d2 + vmlal.s32 q2, d23, d1 + vmlal.s32 q2, d24, d0 + vst1.8 {d20-d21}, [r2, : 128]! + vmull.s32 q7, d18, d9 + vmlal.s32 q7, d26, d3 + vmlal.s32 q7, d19, d8 + vmlal.s32 q7, d27, d2 + vmlal.s32 q7, d22, d7 + vmlal.s32 q7, d28, d1 + vmlal.s32 q7, d23, d6 + vmlal.s32 q7, d29, d0 + vst1.8 {d10-d11}, [r2, : 128]! + vmull.s32 q5, d18, d3 + vmlal.s32 q5, d19, d2 + vmlal.s32 q5, d22, d1 + vmlal.s32 q5, d23, d0 + vmlal.s32 q5, d12, d8 + vst1.8 {d16-d17}, [r2, : 128]! + vmull.s32 q4, d18, d8 + vmlal.s32 q4, d26, d2 + vmlal.s32 q4, d19, d7 + vmlal.s32 q4, d27, d1 + vmlal.s32 q4, d22, d6 + vmlal.s32 q4, d28, d0 + vmull.s32 q8, d18, d7 + vmlal.s32 q8, d26, d1 + vmlal.s32 q8, d19, d6 + vmlal.s32 q8, d27, d0 + add r2, sp, #544 + vld1.8 {d20-d21}, [r2, : 128] + vmlal.s32 q7, d24, d21 + vmlal.s32 q7, d25, d20 + vmlal.s32 q4, d23, d21 + vmlal.s32 q4, d29, d20 + vmlal.s32 q8, d22, d21 + vmlal.s32 q8, d28, d20 + vmlal.s32 q5, d24, d20 + vst1.8 {d14-d15}, [r2, : 128] + vmull.s32 q7, d18, d6 + vmlal.s32 q7, d26, d0 + add r2, sp, #624 + vld1.8 {d30-d31}, [r2, : 128] + vmlal.s32 q2, d30, d21 + vmlal.s32 q7, d19, d21 + vmlal.s32 q7, d27, d20 + add r2, sp, #592 + vld1.8 {d26-d27}, [r2, : 128] + vmlal.s32 q4, d25, d27 + vmlal.s32 q8, d29, d27 + vmlal.s32 q8, d25, d26 + vmlal.s32 q7, d28, d27 + vmlal.s32 q7, d29, d26 + add r2, sp, #576 + vld1.8 {d28-d29}, [r2, : 128] + vmlal.s32 q4, d24, d29 + vmlal.s32 q8, d23, d29 + vmlal.s32 q8, d24, d28 + vmlal.s32 q7, d22, d29 + vmlal.s32 q7, d23, d28 + vst1.8 {d8-d9}, [r2, : 128] + add r2, sp, #528 + vld1.8 {d8-d9}, [r2, : 128] + vmlal.s32 q7, d24, d9 + vmlal.s32 q7, d25, d31 + vmull.s32 q1, d18, d2 + vmlal.s32 q1, d19, d1 + vmlal.s32 q1, d22, d0 + vmlal.s32 q1, d24, d27 + vmlal.s32 q1, d23, d20 + vmlal.s32 q1, d12, d7 + vmlal.s32 q1, d13, d6 + vmull.s32 q6, d18, d1 + vmlal.s32 q6, d19, d0 + vmlal.s32 q6, d23, d27 + vmlal.s32 q6, d22, d20 + vmlal.s32 q6, d24, d26 + vmull.s32 q0, d18, d0 + vmlal.s32 q0, d22, d27 + vmlal.s32 q0, d23, d26 + vmlal.s32 q0, d24, d31 + vmlal.s32 q0, d19, d20 + add r2, sp, #608 + vld1.8 {d18-d19}, [r2, : 128] + vmlal.s32 q2, d18, d7 + vmlal.s32 q5, d18, d6 + vmlal.s32 q1, d18, d21 + vmlal.s32 q0, d18, d28 + vmlal.s32 q6, d18, d29 + vmlal.s32 q2, d19, d6 + vmlal.s32 q5, d19, d21 + vmlal.s32 q1, d19, d29 + vmlal.s32 q0, d19, d9 + vmlal.s32 q6, d19, d28 + add r2, sp, #560 + vld1.8 {d18-d19}, [r2, : 128] + add r2, sp, #480 + vld1.8 {d22-d23}, [r2, : 128] + vmlal.s32 q5, d19, d7 + vmlal.s32 q0, d18, d21 + vmlal.s32 q0, d19, d29 + vmlal.s32 q6, d18, d6 + add r2, sp, #496 + vld1.8 {d6-d7}, [r2, : 128] + vmlal.s32 q6, d19, d21 + add r2, sp, #544 + vld1.8 {d18-d19}, [r2, : 128] + vmlal.s32 q0, d30, d8 + add r2, sp, #640 + vld1.8 {d20-d21}, [r2, : 128] + vmlal.s32 q5, d30, d29 + add r2, sp, #576 + vld1.8 {d24-d25}, [r2, : 128] + vmlal.s32 q1, d30, d28 + vadd.i64 q13, q0, q11 + vadd.i64 q14, q5, q11 + vmlal.s32 q6, d30, d9 + vshr.s64 q4, q13, #26 + vshr.s64 q13, q14, #26 + vadd.i64 q7, q7, q4 + vshl.i64 q4, q4, #26 + vadd.i64 q14, q7, q3 + vadd.i64 q9, q9, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q15, q9, q3 + vsub.i64 q0, q0, q4 + vshr.s64 q4, q14, #25 + vsub.i64 q5, q5, q13 + vshr.s64 q13, q15, #25 + vadd.i64 q6, q6, q4 + vshl.i64 q4, q4, #25 + vadd.i64 q14, q6, q11 + vadd.i64 q2, q2, q13 + vsub.i64 q4, q7, q4 + vshr.s64 q7, q14, #26 + vshl.i64 q13, q13, #25 + vadd.i64 q14, q2, q11 + vadd.i64 q8, q8, q7 + vshl.i64 q7, q7, #26 + vadd.i64 q15, q8, q3 + vsub.i64 q9, q9, q13 + vshr.s64 q13, q14, #26 + vsub.i64 q6, q6, q7 + vshr.s64 q7, q15, #25 + vadd.i64 q10, q10, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q14, q10, q3 + vadd.i64 q1, q1, q7 + add r2, r3, #288 + vshl.i64 q7, q7, #25 + add r4, r3, #96 + vadd.i64 q15, q1, q11 + add r2, r2, #8 + vsub.i64 q2, q2, q13 + add r4, r4, #8 + vshr.s64 q13, q14, #25 + vsub.i64 q7, q8, q7 + vshr.s64 q8, q15, #26 + vadd.i64 q14, q13, q13 + vadd.i64 q12, q12, q8 + vtrn.32 d12, d14 + vshl.i64 q8, q8, #26 + vtrn.32 d13, d15 + vadd.i64 q3, q12, q3 + vadd.i64 q0, q0, q14 + vst1.8 d12, [r2, : 64]! + vshl.i64 q7, q13, #4 + vst1.8 d13, [r4, : 64]! + vsub.i64 q1, q1, q8 + vshr.s64 q3, q3, #25 + vadd.i64 q0, q0, q7 + vadd.i64 q5, q5, q3 + vshl.i64 q3, q3, #25 + vadd.i64 q6, q5, q11 + vadd.i64 q0, q0, q13 + vshl.i64 q7, q13, #25 + vadd.i64 q8, q0, q11 + vsub.i64 q3, q12, q3 + vshr.s64 q6, q6, #26 + vsub.i64 q7, q10, q7 + vtrn.32 d2, d6 + vshr.s64 q8, q8, #26 + vtrn.32 d3, d7 + vadd.i64 q3, q9, q6 + vst1.8 d2, [r2, : 64] + vshl.i64 q6, q6, #26 + vst1.8 d3, [r4, : 64] + vadd.i64 q1, q4, q8 + vtrn.32 d4, d14 + vshl.i64 q4, q8, #26 + vtrn.32 d5, d15 + vsub.i64 q5, q5, q6 + add r2, r2, #16 + vsub.i64 q0, q0, q4 + vst1.8 d4, [r2, : 64] + add r4, r4, #16 + vst1.8 d5, [r4, : 64] + vtrn.32 d10, d6 + vtrn.32 d11, d7 + sub r2, r2, #8 + sub r4, r4, #8 + vtrn.32 d0, d2 + vtrn.32 d1, d3 + vst1.8 d10, [r2, : 64] + vst1.8 d11, [r4, : 64] + sub r2, r2, #24 + sub r4, r4, #24 + vst1.8 d0, [r2, : 64] + vst1.8 d1, [r4, : 64] + add r2, sp, #512 + add r4, r3, #144 + add r5, r3, #192 + vld1.8 {d0-d1}, [r2, : 128] + vld1.8 {d2-d3}, [r4, : 128]! + vld1.8 {d4-d5}, [r5, : 128]! + vzip.i32 q1, q2 + vld1.8 {d6-d7}, [r4, : 128]! + vld1.8 {d8-d9}, [r5, : 128]! + vshl.i32 q5, q1, #1 + vzip.i32 q3, q4 + vshl.i32 q6, q2, #1 + vld1.8 {d14}, [r4, : 64] + vshl.i32 q8, q3, #1 + vld1.8 {d15}, [r5, : 64] + vshl.i32 q9, q4, #1 + vmul.i32 d21, d7, d1 + vtrn.32 d14, d15 + vmul.i32 q11, q4, q0 + vmul.i32 q0, q7, q0 + vmull.s32 q12, d2, d2 + vmlal.s32 q12, d11, d1 + vmlal.s32 q12, d12, d0 + vmlal.s32 q12, d13, d23 + vmlal.s32 q12, d16, d22 + vmlal.s32 q12, d7, d21 + vmull.s32 q10, d2, d11 + vmlal.s32 q10, d4, d1 + vmlal.s32 q10, d13, d0 + vmlal.s32 q10, d6, d23 + vmlal.s32 q10, d17, d22 + vmull.s32 q13, d10, d4 + vmlal.s32 q13, d11, d3 + vmlal.s32 q13, d13, d1 + vmlal.s32 q13, d16, d0 + vmlal.s32 q13, d17, d23 + vmlal.s32 q13, d8, d22 + vmull.s32 q1, d10, d5 + vmlal.s32 q1, d11, d4 + vmlal.s32 q1, d6, d1 + vmlal.s32 q1, d17, d0 + vmlal.s32 q1, d8, d23 + vmull.s32 q14, d10, d6 + vmlal.s32 q14, d11, d13 + vmlal.s32 q14, d4, d4 + vmlal.s32 q14, d17, d1 + vmlal.s32 q14, d18, d0 + vmlal.s32 q14, d9, d23 + vmull.s32 q11, d10, d7 + vmlal.s32 q11, d11, d6 + vmlal.s32 q11, d12, d5 + vmlal.s32 q11, d8, d1 + vmlal.s32 q11, d19, d0 + vmull.s32 q15, d10, d8 + vmlal.s32 q15, d11, d17 + vmlal.s32 q15, d12, d6 + vmlal.s32 q15, d13, d5 + vmlal.s32 q15, d19, d1 + vmlal.s32 q15, d14, d0 + vmull.s32 q2, d10, d9 + vmlal.s32 q2, d11, d8 + vmlal.s32 q2, d12, d7 + vmlal.s32 q2, d13, d6 + vmlal.s32 q2, d14, d1 + vmull.s32 q0, d15, d1 + vmlal.s32 q0, d10, d14 + vmlal.s32 q0, d11, d19 + vmlal.s32 q0, d12, d8 + vmlal.s32 q0, d13, d17 + vmlal.s32 q0, d6, d6 + add r2, sp, #480 + vld1.8 {d18-d19}, [r2, : 128]! + vmull.s32 q3, d16, d7 + vmlal.s32 q3, d10, d15 + vmlal.s32 q3, d11, d14 + vmlal.s32 q3, d12, d9 + vmlal.s32 q3, d13, d8 + vld1.8 {d8-d9}, [r2, : 128] + vadd.i64 q5, q12, q9 + vadd.i64 q6, q15, q9 + vshr.s64 q5, q5, #26 + vshr.s64 q6, q6, #26 + vadd.i64 q7, q10, q5 + vshl.i64 q5, q5, #26 + vadd.i64 q8, q7, q4 + vadd.i64 q2, q2, q6 + vshl.i64 q6, q6, #26 + vadd.i64 q10, q2, q4 + vsub.i64 q5, q12, q5 + vshr.s64 q8, q8, #25 + vsub.i64 q6, q15, q6 + vshr.s64 q10, q10, #25 + vadd.i64 q12, q13, q8 + vshl.i64 q8, q8, #25 + vadd.i64 q13, q12, q9 + vadd.i64 q0, q0, q10 + vsub.i64 q7, q7, q8 + vshr.s64 q8, q13, #26 + vshl.i64 q10, q10, #25 + vadd.i64 q13, q0, q9 + vadd.i64 q1, q1, q8 + vshl.i64 q8, q8, #26 + vadd.i64 q15, q1, q4 + vsub.i64 q2, q2, q10 + vshr.s64 q10, q13, #26 + vsub.i64 q8, q12, q8 + vshr.s64 q12, q15, #25 + vadd.i64 q3, q3, q10 + vshl.i64 q10, q10, #26 + vadd.i64 q13, q3, q4 + vadd.i64 q14, q14, q12 + add r2, r3, #144 + vshl.i64 q12, q12, #25 + add r4, r3, #192 + vadd.i64 q15, q14, q9 + add r2, r2, #8 + vsub.i64 q0, q0, q10 + add r4, r4, #8 + vshr.s64 q10, q13, #25 + vsub.i64 q1, q1, q12 + vshr.s64 q12, q15, #26 + vadd.i64 q13, q10, q10 + vadd.i64 q11, q11, q12 + vtrn.32 d16, d2 + vshl.i64 q12, q12, #26 + vtrn.32 d17, d3 + vadd.i64 q1, q11, q4 + vadd.i64 q4, q5, q13 + vst1.8 d16, [r2, : 64]! + vshl.i64 q5, q10, #4 + vst1.8 d17, [r4, : 64]! + vsub.i64 q8, q14, q12 + vshr.s64 q1, q1, #25 + vadd.i64 q4, q4, q5 + vadd.i64 q5, q6, q1 + vshl.i64 q1, q1, #25 + vadd.i64 q6, q5, q9 + vadd.i64 q4, q4, q10 + vshl.i64 q10, q10, #25 + vadd.i64 q9, q4, q9 + vsub.i64 q1, q11, q1 + vshr.s64 q6, q6, #26 + vsub.i64 q3, q3, q10 + vtrn.32 d16, d2 + vshr.s64 q9, q9, #26 + vtrn.32 d17, d3 + vadd.i64 q1, q2, q6 + vst1.8 d16, [r2, : 64] + vshl.i64 q2, q6, #26 + vst1.8 d17, [r4, : 64] + vadd.i64 q6, q7, q9 + vtrn.32 d0, d6 + vshl.i64 q7, q9, #26 + vtrn.32 d1, d7 + vsub.i64 q2, q5, q2 + add r2, r2, #16 + vsub.i64 q3, q4, q7 + vst1.8 d0, [r2, : 64] + add r4, r4, #16 + vst1.8 d1, [r4, : 64] + vtrn.32 d4, d2 + vtrn.32 d5, d3 + sub r2, r2, #8 + sub r4, r4, #8 + vtrn.32 d6, d12 + vtrn.32 d7, d13 + vst1.8 d4, [r2, : 64] + vst1.8 d5, [r4, : 64] + sub r2, r2, #24 + sub r4, r4, #24 + vst1.8 d6, [r2, : 64] + vst1.8 d7, [r4, : 64] + add r2, r3, #336 + add r4, r3, #288 + vld1.8 {d0-d1}, [r2, : 128]! + vld1.8 {d2-d3}, [r4, : 128]! + vadd.i32 q0, q0, q1 + vld1.8 {d2-d3}, [r2, : 128]! + vld1.8 {d4-d5}, [r4, : 128]! + vadd.i32 q1, q1, q2 + add r5, r3, #288 + vld1.8 {d4}, [r2, : 64] + vld1.8 {d6}, [r4, : 64] + vadd.i32 q2, q2, q3 + vst1.8 {d0-d1}, [r5, : 128]! + vst1.8 {d2-d3}, [r5, : 128]! + vst1.8 d4, [r5, : 64] + add r2, r3, #48 + add r4, r3, #144 + vld1.8 {d0-d1}, [r4, : 128]! + vld1.8 {d2-d3}, [r4, : 128]! + vld1.8 {d4}, [r4, : 64] + add r4, r3, #288 + vld1.8 {d6-d7}, [r4, : 128]! + vtrn.32 q0, q3 + vld1.8 {d8-d9}, [r4, : 128]! + vshl.i32 q5, q0, #4 + vtrn.32 q1, q4 + vshl.i32 q6, q3, #4 + vadd.i32 q5, q5, q0 + vadd.i32 q6, q6, q3 + vshl.i32 q7, q1, #4 + vld1.8 {d5}, [r4, : 64] + vshl.i32 q8, q4, #4 + vtrn.32 d4, d5 + vadd.i32 q7, q7, q1 + vadd.i32 q8, q8, q4 + vld1.8 {d18-d19}, [r2, : 128]! + vshl.i32 q10, q2, #4 + vld1.8 {d22-d23}, [r2, : 128]! + vadd.i32 q10, q10, q2 + vld1.8 {d24}, [r2, : 64] + vadd.i32 q5, q5, q0 + add r2, r3, #240 + vld1.8 {d26-d27}, [r2, : 128]! + vadd.i32 q6, q6, q3 + vld1.8 {d28-d29}, [r2, : 128]! + vadd.i32 q8, q8, q4 + vld1.8 {d25}, [r2, : 64] + vadd.i32 q10, q10, q2 + vtrn.32 q9, q13 + vadd.i32 q7, q7, q1 + vadd.i32 q5, q5, q0 + vtrn.32 q11, q14 + vadd.i32 q6, q6, q3 + add r2, sp, #528 + vadd.i32 q10, q10, q2 + vtrn.32 d24, d25 + vst1.8 {d12-d13}, [r2, : 128]! + vshl.i32 q6, q13, #1 + vst1.8 {d20-d21}, [r2, : 128]! + vshl.i32 q10, q14, #1 + vst1.8 {d12-d13}, [r2, : 128]! + vshl.i32 q15, q12, #1 + vadd.i32 q8, q8, q4 + vext.32 d10, d31, d30, #0 + vadd.i32 q7, q7, q1 + vst1.8 {d16-d17}, [r2, : 128]! + vmull.s32 q8, d18, d5 + vmlal.s32 q8, d26, d4 + vmlal.s32 q8, d19, d9 + vmlal.s32 q8, d27, d3 + vmlal.s32 q8, d22, d8 + vmlal.s32 q8, d28, d2 + vmlal.s32 q8, d23, d7 + vmlal.s32 q8, d29, d1 + vmlal.s32 q8, d24, d6 + vmlal.s32 q8, d25, d0 + vst1.8 {d14-d15}, [r2, : 128]! + vmull.s32 q2, d18, d4 + vmlal.s32 q2, d12, d9 + vmlal.s32 q2, d13, d8 + vmlal.s32 q2, d19, d3 + vmlal.s32 q2, d22, d2 + vmlal.s32 q2, d23, d1 + vmlal.s32 q2, d24, d0 + vst1.8 {d20-d21}, [r2, : 128]! + vmull.s32 q7, d18, d9 + vmlal.s32 q7, d26, d3 + vmlal.s32 q7, d19, d8 + vmlal.s32 q7, d27, d2 + vmlal.s32 q7, d22, d7 + vmlal.s32 q7, d28, d1 + vmlal.s32 q7, d23, d6 + vmlal.s32 q7, d29, d0 + vst1.8 {d10-d11}, [r2, : 128]! + vmull.s32 q5, d18, d3 + vmlal.s32 q5, d19, d2 + vmlal.s32 q5, d22, d1 + vmlal.s32 q5, d23, d0 + vmlal.s32 q5, d12, d8 + vst1.8 {d16-d17}, [r2, : 128]! + vmull.s32 q4, d18, d8 + vmlal.s32 q4, d26, d2 + vmlal.s32 q4, d19, d7 + vmlal.s32 q4, d27, d1 + vmlal.s32 q4, d22, d6 + vmlal.s32 q4, d28, d0 + vmull.s32 q8, d18, d7 + vmlal.s32 q8, d26, d1 + vmlal.s32 q8, d19, d6 + vmlal.s32 q8, d27, d0 + add r2, sp, #544 + vld1.8 {d20-d21}, [r2, : 128] + vmlal.s32 q7, d24, d21 + vmlal.s32 q7, d25, d20 + vmlal.s32 q4, d23, d21 + vmlal.s32 q4, d29, d20 + vmlal.s32 q8, d22, d21 + vmlal.s32 q8, d28, d20 + vmlal.s32 q5, d24, d20 + vst1.8 {d14-d15}, [r2, : 128] + vmull.s32 q7, d18, d6 + vmlal.s32 q7, d26, d0 + add r2, sp, #624 + vld1.8 {d30-d31}, [r2, : 128] + vmlal.s32 q2, d30, d21 + vmlal.s32 q7, d19, d21 + vmlal.s32 q7, d27, d20 + add r2, sp, #592 + vld1.8 {d26-d27}, [r2, : 128] + vmlal.s32 q4, d25, d27 + vmlal.s32 q8, d29, d27 + vmlal.s32 q8, d25, d26 + vmlal.s32 q7, d28, d27 + vmlal.s32 q7, d29, d26 + add r2, sp, #576 + vld1.8 {d28-d29}, [r2, : 128] + vmlal.s32 q4, d24, d29 + vmlal.s32 q8, d23, d29 + vmlal.s32 q8, d24, d28 + vmlal.s32 q7, d22, d29 + vmlal.s32 q7, d23, d28 + vst1.8 {d8-d9}, [r2, : 128] + add r2, sp, #528 + vld1.8 {d8-d9}, [r2, : 128] + vmlal.s32 q7, d24, d9 + vmlal.s32 q7, d25, d31 + vmull.s32 q1, d18, d2 + vmlal.s32 q1, d19, d1 + vmlal.s32 q1, d22, d0 + vmlal.s32 q1, d24, d27 + vmlal.s32 q1, d23, d20 + vmlal.s32 q1, d12, d7 + vmlal.s32 q1, d13, d6 + vmull.s32 q6, d18, d1 + vmlal.s32 q6, d19, d0 + vmlal.s32 q6, d23, d27 + vmlal.s32 q6, d22, d20 + vmlal.s32 q6, d24, d26 + vmull.s32 q0, d18, d0 + vmlal.s32 q0, d22, d27 + vmlal.s32 q0, d23, d26 + vmlal.s32 q0, d24, d31 + vmlal.s32 q0, d19, d20 + add r2, sp, #608 + vld1.8 {d18-d19}, [r2, : 128] + vmlal.s32 q2, d18, d7 + vmlal.s32 q5, d18, d6 + vmlal.s32 q1, d18, d21 + vmlal.s32 q0, d18, d28 + vmlal.s32 q6, d18, d29 + vmlal.s32 q2, d19, d6 + vmlal.s32 q5, d19, d21 + vmlal.s32 q1, d19, d29 + vmlal.s32 q0, d19, d9 + vmlal.s32 q6, d19, d28 + add r2, sp, #560 + vld1.8 {d18-d19}, [r2, : 128] + add r2, sp, #480 + vld1.8 {d22-d23}, [r2, : 128] + vmlal.s32 q5, d19, d7 + vmlal.s32 q0, d18, d21 + vmlal.s32 q0, d19, d29 + vmlal.s32 q6, d18, d6 + add r2, sp, #496 + vld1.8 {d6-d7}, [r2, : 128] + vmlal.s32 q6, d19, d21 + add r2, sp, #544 + vld1.8 {d18-d19}, [r2, : 128] + vmlal.s32 q0, d30, d8 + add r2, sp, #640 + vld1.8 {d20-d21}, [r2, : 128] + vmlal.s32 q5, d30, d29 + add r2, sp, #576 + vld1.8 {d24-d25}, [r2, : 128] + vmlal.s32 q1, d30, d28 + vadd.i64 q13, q0, q11 + vadd.i64 q14, q5, q11 + vmlal.s32 q6, d30, d9 + vshr.s64 q4, q13, #26 + vshr.s64 q13, q14, #26 + vadd.i64 q7, q7, q4 + vshl.i64 q4, q4, #26 + vadd.i64 q14, q7, q3 + vadd.i64 q9, q9, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q15, q9, q3 + vsub.i64 q0, q0, q4 + vshr.s64 q4, q14, #25 + vsub.i64 q5, q5, q13 + vshr.s64 q13, q15, #25 + vadd.i64 q6, q6, q4 + vshl.i64 q4, q4, #25 + vadd.i64 q14, q6, q11 + vadd.i64 q2, q2, q13 + vsub.i64 q4, q7, q4 + vshr.s64 q7, q14, #26 + vshl.i64 q13, q13, #25 + vadd.i64 q14, q2, q11 + vadd.i64 q8, q8, q7 + vshl.i64 q7, q7, #26 + vadd.i64 q15, q8, q3 + vsub.i64 q9, q9, q13 + vshr.s64 q13, q14, #26 + vsub.i64 q6, q6, q7 + vshr.s64 q7, q15, #25 + vadd.i64 q10, q10, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q14, q10, q3 + vadd.i64 q1, q1, q7 + add r2, r3, #240 + vshl.i64 q7, q7, #25 + add r4, r3, #144 + vadd.i64 q15, q1, q11 + add r2, r2, #8 + vsub.i64 q2, q2, q13 + add r4, r4, #8 + vshr.s64 q13, q14, #25 + vsub.i64 q7, q8, q7 + vshr.s64 q8, q15, #26 + vadd.i64 q14, q13, q13 + vadd.i64 q12, q12, q8 + vtrn.32 d12, d14 + vshl.i64 q8, q8, #26 + vtrn.32 d13, d15 + vadd.i64 q3, q12, q3 + vadd.i64 q0, q0, q14 + vst1.8 d12, [r2, : 64]! + vshl.i64 q7, q13, #4 + vst1.8 d13, [r4, : 64]! + vsub.i64 q1, q1, q8 + vshr.s64 q3, q3, #25 + vadd.i64 q0, q0, q7 + vadd.i64 q5, q5, q3 + vshl.i64 q3, q3, #25 + vadd.i64 q6, q5, q11 + vadd.i64 q0, q0, q13 + vshl.i64 q7, q13, #25 + vadd.i64 q8, q0, q11 + vsub.i64 q3, q12, q3 + vshr.s64 q6, q6, #26 + vsub.i64 q7, q10, q7 + vtrn.32 d2, d6 + vshr.s64 q8, q8, #26 + vtrn.32 d3, d7 + vadd.i64 q3, q9, q6 + vst1.8 d2, [r2, : 64] + vshl.i64 q6, q6, #26 + vst1.8 d3, [r4, : 64] + vadd.i64 q1, q4, q8 + vtrn.32 d4, d14 + vshl.i64 q4, q8, #26 + vtrn.32 d5, d15 + vsub.i64 q5, q5, q6 + add r2, r2, #16 + vsub.i64 q0, q0, q4 + vst1.8 d4, [r2, : 64] + add r4, r4, #16 + vst1.8 d5, [r4, : 64] + vtrn.32 d10, d6 + vtrn.32 d11, d7 + sub r2, r2, #8 + sub r4, r4, #8 + vtrn.32 d0, d2 + vtrn.32 d1, d3 + vst1.8 d10, [r2, : 64] + vst1.8 d11, [r4, : 64] + sub r2, r2, #24 + sub r4, r4, #24 + vst1.8 d0, [r2, : 64] + vst1.8 d1, [r4, : 64] + ldr r2, [sp, #456] + ldr r4, [sp, #460] + subs r5, r2, #1 + bge .Lmainloop + add r1, r3, #144 + add r2, r3, #336 + vld1.8 {d0-d1}, [r1, : 128]! + vld1.8 {d2-d3}, [r1, : 128]! + vld1.8 {d4}, [r1, : 64] + vst1.8 {d0-d1}, [r2, : 128]! + vst1.8 {d2-d3}, [r2, : 128]! + vst1.8 d4, [r2, : 64] + movw r1, #0 +.Linvertloop: + add r2, r3, #144 + movw r4, #0 + movw r5, #2 + cmp r1, #1 + moveq r5, #1 + addeq r2, r3, #336 + addeq r4, r3, #48 + cmp r1, #2 + moveq r5, #1 + addeq r2, r3, #48 + cmp r1, #3 + moveq r5, #5 + addeq r4, r3, #336 + cmp r1, #4 + moveq r5, #10 + cmp r1, #5 + moveq r5, #20 + cmp r1, #6 + moveq r5, #10 + addeq r2, r3, #336 + addeq r4, r3, #336 + cmp r1, #7 + moveq r5, #50 + cmp r1, #8 + moveq r5, #100 + cmp r1, #9 + moveq r5, #50 + addeq r2, r3, #336 + cmp r1, #10 + moveq r5, #5 + addeq r2, r3, #48 + cmp r1, #11 + moveq r5, #0 + addeq r2, r3, #96 + add r6, r3, #144 + add r7, r3, #288 + vld1.8 {d0-d1}, [r6, : 128]! + vld1.8 {d2-d3}, [r6, : 128]! + vld1.8 {d4}, [r6, : 64] + vst1.8 {d0-d1}, [r7, : 128]! + vst1.8 {d2-d3}, [r7, : 128]! + vst1.8 d4, [r7, : 64] + cmp r5, #0 + beq .Lskipsquaringloop +.Lsquaringloop: + add r6, r3, #288 + add r7, r3, #288 + add r8, r3, #288 + vmov.i32 q0, #19 + vmov.i32 q1, #0 + vmov.i32 q2, #1 + vzip.i32 q1, q2 + vld1.8 {d4-d5}, [r7, : 128]! + vld1.8 {d6-d7}, [r7, : 128]! + vld1.8 {d9}, [r7, : 64] + vld1.8 {d10-d11}, [r6, : 128]! + add r7, sp, #384 + vld1.8 {d12-d13}, [r6, : 128]! + vmul.i32 q7, q2, q0 + vld1.8 {d8}, [r6, : 64] + vext.32 d17, d11, d10, #1 + vmul.i32 q9, q3, q0 + vext.32 d16, d10, d8, #1 + vshl.u32 q10, q5, q1 + vext.32 d22, d14, d4, #1 + vext.32 d24, d18, d6, #1 + vshl.u32 q13, q6, q1 + vshl.u32 d28, d8, d2 + vrev64.i32 d22, d22 + vmul.i32 d1, d9, d1 + vrev64.i32 d24, d24 + vext.32 d29, d8, d13, #1 + vext.32 d0, d1, d9, #1 + vrev64.i32 d0, d0 + vext.32 d2, d9, d1, #1 + vext.32 d23, d15, d5, #1 + vmull.s32 q4, d20, d4 + vrev64.i32 d23, d23 + vmlal.s32 q4, d21, d1 + vrev64.i32 d2, d2 + vmlal.s32 q4, d26, d19 + vext.32 d3, d5, d15, #1 + vmlal.s32 q4, d27, d18 + vrev64.i32 d3, d3 + vmlal.s32 q4, d28, d15 + vext.32 d14, d12, d11, #1 + vmull.s32 q5, d16, d23 + vext.32 d15, d13, d12, #1 + vmlal.s32 q5, d17, d4 + vst1.8 d8, [r7, : 64]! + vmlal.s32 q5, d14, d1 + vext.32 d12, d9, d8, #0 + vmlal.s32 q5, d15, d19 + vmov.i64 d13, #0 + vmlal.s32 q5, d29, d18 + vext.32 d25, d19, d7, #1 + vmlal.s32 q6, d20, d5 + vrev64.i32 d25, d25 + vmlal.s32 q6, d21, d4 + vst1.8 d11, [r7, : 64]! + vmlal.s32 q6, d26, d1 + vext.32 d9, d10, d10, #0 + vmlal.s32 q6, d27, d19 + vmov.i64 d8, #0 + vmlal.s32 q6, d28, d18 + vmlal.s32 q4, d16, d24 + vmlal.s32 q4, d17, d5 + vmlal.s32 q4, d14, d4 + vst1.8 d12, [r7, : 64]! + vmlal.s32 q4, d15, d1 + vext.32 d10, d13, d12, #0 + vmlal.s32 q4, d29, d19 + vmov.i64 d11, #0 + vmlal.s32 q5, d20, d6 + vmlal.s32 q5, d21, d5 + vmlal.s32 q5, d26, d4 + vext.32 d13, d8, d8, #0 + vmlal.s32 q5, d27, d1 + vmov.i64 d12, #0 + vmlal.s32 q5, d28, d19 + vst1.8 d9, [r7, : 64]! + vmlal.s32 q6, d16, d25 + vmlal.s32 q6, d17, d6 + vst1.8 d10, [r7, : 64] + vmlal.s32 q6, d14, d5 + vext.32 d8, d11, d10, #0 + vmlal.s32 q6, d15, d4 + vmov.i64 d9, #0 + vmlal.s32 q6, d29, d1 + vmlal.s32 q4, d20, d7 + vmlal.s32 q4, d21, d6 + vmlal.s32 q4, d26, d5 + vext.32 d11, d12, d12, #0 + vmlal.s32 q4, d27, d4 + vmov.i64 d10, #0 + vmlal.s32 q4, d28, d1 + vmlal.s32 q5, d16, d0 + sub r6, r7, #32 + vmlal.s32 q5, d17, d7 + vmlal.s32 q5, d14, d6 + vext.32 d30, d9, d8, #0 + vmlal.s32 q5, d15, d5 + vld1.8 {d31}, [r6, : 64]! + vmlal.s32 q5, d29, d4 + vmlal.s32 q15, d20, d0 + vext.32 d0, d6, d18, #1 + vmlal.s32 q15, d21, d25 + vrev64.i32 d0, d0 + vmlal.s32 q15, d26, d24 + vext.32 d1, d7, d19, #1 + vext.32 d7, d10, d10, #0 + vmlal.s32 q15, d27, d23 + vrev64.i32 d1, d1 + vld1.8 {d6}, [r6, : 64] + vmlal.s32 q15, d28, d22 + vmlal.s32 q3, d16, d4 + add r6, r6, #24 + vmlal.s32 q3, d17, d2 + vext.32 d4, d31, d30, #0 + vmov d17, d11 + vmlal.s32 q3, d14, d1 + vext.32 d11, d13, d13, #0 + vext.32 d13, d30, d30, #0 + vmlal.s32 q3, d15, d0 + vext.32 d1, d8, d8, #0 + vmlal.s32 q3, d29, d3 + vld1.8 {d5}, [r6, : 64] + sub r6, r6, #16 + vext.32 d10, d6, d6, #0 + vmov.i32 q1, #0xffffffff + vshl.i64 q4, q1, #25 + add r7, sp, #480 + vld1.8 {d14-d15}, [r7, : 128] + vadd.i64 q9, q2, q7 + vshl.i64 q1, q1, #26 + vshr.s64 q10, q9, #26 + vld1.8 {d0}, [r6, : 64]! + vadd.i64 q5, q5, q10 + vand q9, q9, q1 + vld1.8 {d16}, [r6, : 64]! + add r6, sp, #496 + vld1.8 {d20-d21}, [r6, : 128] + vadd.i64 q11, q5, q10 + vsub.i64 q2, q2, q9 + vshr.s64 q9, q11, #25 + vext.32 d12, d5, d4, #0 + vand q11, q11, q4 + vadd.i64 q0, q0, q9 + vmov d19, d7 + vadd.i64 q3, q0, q7 + vsub.i64 q5, q5, q11 + vshr.s64 q11, q3, #26 + vext.32 d18, d11, d10, #0 + vand q3, q3, q1 + vadd.i64 q8, q8, q11 + vadd.i64 q11, q8, q10 + vsub.i64 q0, q0, q3 + vshr.s64 q3, q11, #25 + vand q11, q11, q4 + vadd.i64 q3, q6, q3 + vadd.i64 q6, q3, q7 + vsub.i64 q8, q8, q11 + vshr.s64 q11, q6, #26 + vand q6, q6, q1 + vadd.i64 q9, q9, q11 + vadd.i64 d25, d19, d21 + vsub.i64 q3, q3, q6 + vshr.s64 d23, d25, #25 + vand q4, q12, q4 + vadd.i64 d21, d23, d23 + vshl.i64 d25, d23, #4 + vadd.i64 d21, d21, d23 + vadd.i64 d25, d25, d21 + vadd.i64 d4, d4, d25 + vzip.i32 q0, q8 + vadd.i64 d12, d4, d14 + add r6, r8, #8 + vst1.8 d0, [r6, : 64] + vsub.i64 d19, d19, d9 + add r6, r6, #16 + vst1.8 d16, [r6, : 64] + vshr.s64 d22, d12, #26 + vand q0, q6, q1 + vadd.i64 d10, d10, d22 + vzip.i32 q3, q9 + vsub.i64 d4, d4, d0 + sub r6, r6, #8 + vst1.8 d6, [r6, : 64] + add r6, r6, #16 + vst1.8 d18, [r6, : 64] + vzip.i32 q2, q5 + sub r6, r6, #32 + vst1.8 d4, [r6, : 64] + subs r5, r5, #1 + bhi .Lsquaringloop +.Lskipsquaringloop: + mov r2, r2 + add r5, r3, #288 + add r6, r3, #144 + vmov.i32 q0, #19 + vmov.i32 q1, #0 + vmov.i32 q2, #1 + vzip.i32 q1, q2 + vld1.8 {d4-d5}, [r5, : 128]! + vld1.8 {d6-d7}, [r5, : 128]! + vld1.8 {d9}, [r5, : 64] + vld1.8 {d10-d11}, [r2, : 128]! + add r5, sp, #384 + vld1.8 {d12-d13}, [r2, : 128]! + vmul.i32 q7, q2, q0 + vld1.8 {d8}, [r2, : 64] + vext.32 d17, d11, d10, #1 + vmul.i32 q9, q3, q0 + vext.32 d16, d10, d8, #1 + vshl.u32 q10, q5, q1 + vext.32 d22, d14, d4, #1 + vext.32 d24, d18, d6, #1 + vshl.u32 q13, q6, q1 + vshl.u32 d28, d8, d2 + vrev64.i32 d22, d22 + vmul.i32 d1, d9, d1 + vrev64.i32 d24, d24 + vext.32 d29, d8, d13, #1 + vext.32 d0, d1, d9, #1 + vrev64.i32 d0, d0 + vext.32 d2, d9, d1, #1 + vext.32 d23, d15, d5, #1 + vmull.s32 q4, d20, d4 + vrev64.i32 d23, d23 + vmlal.s32 q4, d21, d1 + vrev64.i32 d2, d2 + vmlal.s32 q4, d26, d19 + vext.32 d3, d5, d15, #1 + vmlal.s32 q4, d27, d18 + vrev64.i32 d3, d3 + vmlal.s32 q4, d28, d15 + vext.32 d14, d12, d11, #1 + vmull.s32 q5, d16, d23 + vext.32 d15, d13, d12, #1 + vmlal.s32 q5, d17, d4 + vst1.8 d8, [r5, : 64]! + vmlal.s32 q5, d14, d1 + vext.32 d12, d9, d8, #0 + vmlal.s32 q5, d15, d19 + vmov.i64 d13, #0 + vmlal.s32 q5, d29, d18 + vext.32 d25, d19, d7, #1 + vmlal.s32 q6, d20, d5 + vrev64.i32 d25, d25 + vmlal.s32 q6, d21, d4 + vst1.8 d11, [r5, : 64]! + vmlal.s32 q6, d26, d1 + vext.32 d9, d10, d10, #0 + vmlal.s32 q6, d27, d19 + vmov.i64 d8, #0 + vmlal.s32 q6, d28, d18 + vmlal.s32 q4, d16, d24 + vmlal.s32 q4, d17, d5 + vmlal.s32 q4, d14, d4 + vst1.8 d12, [r5, : 64]! + vmlal.s32 q4, d15, d1 + vext.32 d10, d13, d12, #0 + vmlal.s32 q4, d29, d19 + vmov.i64 d11, #0 + vmlal.s32 q5, d20, d6 + vmlal.s32 q5, d21, d5 + vmlal.s32 q5, d26, d4 + vext.32 d13, d8, d8, #0 + vmlal.s32 q5, d27, d1 + vmov.i64 d12, #0 + vmlal.s32 q5, d28, d19 + vst1.8 d9, [r5, : 64]! + vmlal.s32 q6, d16, d25 + vmlal.s32 q6, d17, d6 + vst1.8 d10, [r5, : 64] + vmlal.s32 q6, d14, d5 + vext.32 d8, d11, d10, #0 + vmlal.s32 q6, d15, d4 + vmov.i64 d9, #0 + vmlal.s32 q6, d29, d1 + vmlal.s32 q4, d20, d7 + vmlal.s32 q4, d21, d6 + vmlal.s32 q4, d26, d5 + vext.32 d11, d12, d12, #0 + vmlal.s32 q4, d27, d4 + vmov.i64 d10, #0 + vmlal.s32 q4, d28, d1 + vmlal.s32 q5, d16, d0 + sub r2, r5, #32 + vmlal.s32 q5, d17, d7 + vmlal.s32 q5, d14, d6 + vext.32 d30, d9, d8, #0 + vmlal.s32 q5, d15, d5 + vld1.8 {d31}, [r2, : 64]! + vmlal.s32 q5, d29, d4 + vmlal.s32 q15, d20, d0 + vext.32 d0, d6, d18, #1 + vmlal.s32 q15, d21, d25 + vrev64.i32 d0, d0 + vmlal.s32 q15, d26, d24 + vext.32 d1, d7, d19, #1 + vext.32 d7, d10, d10, #0 + vmlal.s32 q15, d27, d23 + vrev64.i32 d1, d1 + vld1.8 {d6}, [r2, : 64] + vmlal.s32 q15, d28, d22 + vmlal.s32 q3, d16, d4 + add r2, r2, #24 + vmlal.s32 q3, d17, d2 + vext.32 d4, d31, d30, #0 + vmov d17, d11 + vmlal.s32 q3, d14, d1 + vext.32 d11, d13, d13, #0 + vext.32 d13, d30, d30, #0 + vmlal.s32 q3, d15, d0 + vext.32 d1, d8, d8, #0 + vmlal.s32 q3, d29, d3 + vld1.8 {d5}, [r2, : 64] + sub r2, r2, #16 + vext.32 d10, d6, d6, #0 + vmov.i32 q1, #0xffffffff + vshl.i64 q4, q1, #25 + add r5, sp, #480 + vld1.8 {d14-d15}, [r5, : 128] + vadd.i64 q9, q2, q7 + vshl.i64 q1, q1, #26 + vshr.s64 q10, q9, #26 + vld1.8 {d0}, [r2, : 64]! + vadd.i64 q5, q5, q10 + vand q9, q9, q1 + vld1.8 {d16}, [r2, : 64]! + add r2, sp, #496 + vld1.8 {d20-d21}, [r2, : 128] + vadd.i64 q11, q5, q10 + vsub.i64 q2, q2, q9 + vshr.s64 q9, q11, #25 + vext.32 d12, d5, d4, #0 + vand q11, q11, q4 + vadd.i64 q0, q0, q9 + vmov d19, d7 + vadd.i64 q3, q0, q7 + vsub.i64 q5, q5, q11 + vshr.s64 q11, q3, #26 + vext.32 d18, d11, d10, #0 + vand q3, q3, q1 + vadd.i64 q8, q8, q11 + vadd.i64 q11, q8, q10 + vsub.i64 q0, q0, q3 + vshr.s64 q3, q11, #25 + vand q11, q11, q4 + vadd.i64 q3, q6, q3 + vadd.i64 q6, q3, q7 + vsub.i64 q8, q8, q11 + vshr.s64 q11, q6, #26 + vand q6, q6, q1 + vadd.i64 q9, q9, q11 + vadd.i64 d25, d19, d21 + vsub.i64 q3, q3, q6 + vshr.s64 d23, d25, #25 + vand q4, q12, q4 + vadd.i64 d21, d23, d23 + vshl.i64 d25, d23, #4 + vadd.i64 d21, d21, d23 + vadd.i64 d25, d25, d21 + vadd.i64 d4, d4, d25 + vzip.i32 q0, q8 + vadd.i64 d12, d4, d14 + add r2, r6, #8 + vst1.8 d0, [r2, : 64] + vsub.i64 d19, d19, d9 + add r2, r2, #16 + vst1.8 d16, [r2, : 64] + vshr.s64 d22, d12, #26 + vand q0, q6, q1 + vadd.i64 d10, d10, d22 + vzip.i32 q3, q9 + vsub.i64 d4, d4, d0 + sub r2, r2, #8 + vst1.8 d6, [r2, : 64] + add r2, r2, #16 + vst1.8 d18, [r2, : 64] + vzip.i32 q2, q5 + sub r2, r2, #32 + vst1.8 d4, [r2, : 64] + cmp r4, #0 + beq .Lskippostcopy + add r2, r3, #144 + mov r4, r4 + vld1.8 {d0-d1}, [r2, : 128]! + vld1.8 {d2-d3}, [r2, : 128]! + vld1.8 {d4}, [r2, : 64] + vst1.8 {d0-d1}, [r4, : 128]! + vst1.8 {d2-d3}, [r4, : 128]! + vst1.8 d4, [r4, : 64] +.Lskippostcopy: + cmp r1, #1 + bne .Lskipfinalcopy + add r2, r3, #288 + add r4, r3, #144 + vld1.8 {d0-d1}, [r2, : 128]! + vld1.8 {d2-d3}, [r2, : 128]! + vld1.8 {d4}, [r2, : 64] + vst1.8 {d0-d1}, [r4, : 128]! + vst1.8 {d2-d3}, [r4, : 128]! + vst1.8 d4, [r4, : 64] +.Lskipfinalcopy: + add r1, r1, #1 + cmp r1, #12 + blo .Linvertloop + add r1, r3, #144 + ldr r2, [r1], #4 + ldr r3, [r1], #4 + ldr r4, [r1], #4 + ldr r5, [r1], #4 + ldr r6, [r1], #4 + ldr r7, [r1], #4 + ldr r8, [r1], #4 + ldr r9, [r1], #4 + ldr r10, [r1], #4 + ldr r1, [r1] + add r11, r1, r1, LSL #4 + add r11, r11, r1, LSL #1 + add r11, r11, #16777216 + mov r11, r11, ASR #25 + add r11, r11, r2 + mov r11, r11, ASR #26 + add r11, r11, r3 + mov r11, r11, ASR #25 + add r11, r11, r4 + mov r11, r11, ASR #26 + add r11, r11, r5 + mov r11, r11, ASR #25 + add r11, r11, r6 + mov r11, r11, ASR #26 + add r11, r11, r7 + mov r11, r11, ASR #25 + add r11, r11, r8 + mov r11, r11, ASR #26 + add r11, r11, r9 + mov r11, r11, ASR #25 + add r11, r11, r10 + mov r11, r11, ASR #26 + add r11, r11, r1 + mov r11, r11, ASR #25 + add r2, r2, r11 + add r2, r2, r11, LSL #1 + add r2, r2, r11, LSL #4 + mov r11, r2, ASR #26 + add r3, r3, r11 + sub r2, r2, r11, LSL #26 + mov r11, r3, ASR #25 + add r4, r4, r11 + sub r3, r3, r11, LSL #25 + mov r11, r4, ASR #26 + add r5, r5, r11 + sub r4, r4, r11, LSL #26 + mov r11, r5, ASR #25 + add r6, r6, r11 + sub r5, r5, r11, LSL #25 + mov r11, r6, ASR #26 + add r7, r7, r11 + sub r6, r6, r11, LSL #26 + mov r11, r7, ASR #25 + add r8, r8, r11 + sub r7, r7, r11, LSL #25 + mov r11, r8, ASR #26 + add r9, r9, r11 + sub r8, r8, r11, LSL #26 + mov r11, r9, ASR #25 + add r10, r10, r11 + sub r9, r9, r11, LSL #25 + mov r11, r10, ASR #26 + add r1, r1, r11 + sub r10, r10, r11, LSL #26 + mov r11, r1, ASR #25 + sub r1, r1, r11, LSL #25 + add r2, r2, r3, LSL #26 + mov r3, r3, LSR #6 + add r3, r3, r4, LSL #19 + mov r4, r4, LSR #13 + add r4, r4, r5, LSL #13 + mov r5, r5, LSR #19 + add r5, r5, r6, LSL #6 + add r6, r7, r8, LSL #25 + mov r7, r8, LSR #7 + add r7, r7, r9, LSL #19 + mov r8, r9, LSR #13 + add r8, r8, r10, LSL #12 + mov r9, r10, LSR #20 + add r1, r9, r1, LSL #6 + str r2, [r0] + str r3, [r0, #4] + str r4, [r0, #8] + str r5, [r0, #12] + str r6, [r0, #16] + str r7, [r0, #20] + str r8, [r0, #24] + str r1, [r0, #28] + movw r0, #0 + mov sp, ip + pop {r4-r11, pc} +ENDPROC(curve25519_neon) diff --git a/arch/arm/crypto/curve25519-glue.c b/arch/arm/crypto/curve25519-glue.c new file mode 100644 index 000000000000..2e9e12d2f642 --- /dev/null +++ b/arch/arm/crypto/curve25519-glue.c @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + * + * Based on public domain code from Daniel J. Bernstein and Peter Schwabe. This + * began from SUPERCOP's curve25519/neon2/scalarmult.s, but has subsequently been + * manually reworked for use in kernel space. + */ + +#include <asm/hwcap.h> +#include <asm/neon.h> +#include <asm/simd.h> +#include <crypto/internal/kpp.h> +#include <crypto/internal/simd.h> +#include <linux/types.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/jump_label.h> +#include <crypto/curve25519.h> + +asmlinkage void curve25519_neon(u8 mypublic[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE], + const u8 basepoint[CURVE25519_KEY_SIZE]); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); + +void curve25519_arch(u8 out[CURVE25519_KEY_SIZE], + const u8 scalar[CURVE25519_KEY_SIZE], + const u8 point[CURVE25519_KEY_SIZE]) +{ + if (static_branch_likely(&have_neon) && crypto_simd_usable()) { + kernel_neon_begin(); + curve25519_neon(out, scalar, point); + kernel_neon_end(); + } else { + curve25519_generic(out, scalar, point); + } +} +EXPORT_SYMBOL(curve25519_arch); + +static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf, + unsigned int len) +{ + u8 *secret = kpp_tfm_ctx(tfm); + + if (!len) + curve25519_generate_secret(secret); + else if (len == CURVE25519_KEY_SIZE && + crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE)) + memcpy(secret, buf, CURVE25519_KEY_SIZE); + else + return -EINVAL; + return 0; +} + +static int curve25519_compute_value(struct kpp_request *req) +{ + struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); + const u8 *secret = kpp_tfm_ctx(tfm); + u8 public_key[CURVE25519_KEY_SIZE]; + u8 buf[CURVE25519_KEY_SIZE]; + int copied, nbytes; + u8 const *bp; + + if (req->src) { + copied = sg_copy_to_buffer(req->src, + sg_nents_for_len(req->src, + CURVE25519_KEY_SIZE), + public_key, CURVE25519_KEY_SIZE); + if (copied != CURVE25519_KEY_SIZE) + return -EINVAL; + bp = public_key; + } else { + bp = curve25519_base_point; + } + + curve25519_arch(buf, secret, bp); + + /* might want less than we've got */ + nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len); + copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst, + nbytes), + buf, nbytes); + if (copied != nbytes) + return -EINVAL; + return 0; +} + +static unsigned int curve25519_max_size(struct crypto_kpp *tfm) +{ + return CURVE25519_KEY_SIZE; +} + +static struct kpp_alg curve25519_alg = { + .base.cra_name = "curve25519", + .base.cra_driver_name = "curve25519-neon", + .base.cra_priority = 200, + .base.cra_module = THIS_MODULE, + .base.cra_ctxsize = CURVE25519_KEY_SIZE, + + .set_secret = curve25519_set_secret, + .generate_public_key = curve25519_compute_value, + .compute_shared_secret = curve25519_compute_value, + .max_size = curve25519_max_size, +}; + +static int __init mod_init(void) +{ + if (elf_hwcap & HWCAP_NEON) { + static_branch_enable(&have_neon); + return crypto_register_kpp(&curve25519_alg); + } + return 0; +} + +static void __exit mod_exit(void) +{ + if (elf_hwcap & HWCAP_NEON) + crypto_unregister_kpp(&curve25519_alg); +} + +module_init(mod_init); +module_exit(mod_exit); + +MODULE_ALIAS_CRYPTO("curve25519"); +MODULE_ALIAS_CRYPTO("curve25519-neon"); +MODULE_LICENSE("GPL v2"); diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S index c47fe81abcb0..534c9647726d 100644 --- a/arch/arm/crypto/ghash-ce-core.S +++ b/arch/arm/crypto/ghash-ce-core.S @@ -88,6 +88,7 @@ T3_H .req d17 .text + .arch armv8-a .fpu crypto-neon-fp-armv8 .macro __pmull_p64, rd, rn, rm, b1, b2, b3, b4 diff --git a/arch/arm/crypto/poly1305-armv4.pl b/arch/arm/crypto/poly1305-armv4.pl new file mode 100644 index 000000000000..6d79498d3115 --- /dev/null +++ b/arch/arm/crypto/poly1305-armv4.pl @@ -0,0 +1,1236 @@ +#!/usr/bin/env perl +# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause +# +# ==================================================================== +# Written by Andy Polyakov, @dot-asm, initially for the OpenSSL +# project. +# ==================================================================== +# +# IALU(*)/gcc-4.4 NEON +# +# ARM11xx(ARMv6) 7.78/+100% - +# Cortex-A5 6.35/+130% 3.00 +# Cortex-A8 6.25/+115% 2.36 +# Cortex-A9 5.10/+95% 2.55 +# Cortex-A15 3.85/+85% 1.25(**) +# Snapdragon S4 5.70/+100% 1.48(**) +# +# (*) this is for -march=armv6, i.e. with bunch of ldrb loading data; +# (**) these are trade-off results, they can be improved by ~8% but at +# the cost of 15/12% regression on Cortex-A5/A7, it's even possible +# to improve Cortex-A9 result, but then A5/A7 loose more than 20%; + +$flavour = shift; +if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; } +else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} } + +if ($flavour && $flavour ne "void") { + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or + die "can't locate arm-xlate.pl"; + + open STDOUT,"| \"$^X\" $xlate $flavour $output"; +} else { + open STDOUT,">$output"; +} + +($ctx,$inp,$len,$padbit)=map("r$_",(0..3)); + +$code.=<<___; +#ifndef __KERNEL__ +# include "arm_arch.h" +#else +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__ +# define poly1305_init poly1305_init_arm +# define poly1305_blocks poly1305_blocks_arm +# define poly1305_emit poly1305_emit_arm +.globl poly1305_blocks_neon +#endif + +#if defined(__thumb2__) +.syntax unified +.thumb +#else +.code 32 +#endif + +.text + +.globl poly1305_emit +.globl poly1305_blocks +.globl poly1305_init +.type poly1305_init,%function +.align 5 +poly1305_init: +.Lpoly1305_init: + stmdb sp!,{r4-r11} + + eor r3,r3,r3 + cmp $inp,#0 + str r3,[$ctx,#0] @ zero hash value + str r3,[$ctx,#4] + str r3,[$ctx,#8] + str r3,[$ctx,#12] + str r3,[$ctx,#16] + str r3,[$ctx,#36] @ clear is_base2_26 + add $ctx,$ctx,#20 + +#ifdef __thumb2__ + it eq +#endif + moveq r0,#0 + beq .Lno_key + +#if __ARM_MAX_ARCH__>=7 + mov r3,#-1 + str r3,[$ctx,#28] @ impossible key power value +# ifndef __KERNEL__ + adr r11,.Lpoly1305_init + ldr r12,.LOPENSSL_armcap +# endif +#endif + ldrb r4,[$inp,#0] + mov r10,#0x0fffffff + ldrb r5,[$inp,#1] + and r3,r10,#-4 @ 0x0ffffffc + ldrb r6,[$inp,#2] + ldrb r7,[$inp,#3] + orr r4,r4,r5,lsl#8 + ldrb r5,[$inp,#4] + orr r4,r4,r6,lsl#16 + ldrb r6,[$inp,#5] + orr r4,r4,r7,lsl#24 + ldrb r7,[$inp,#6] + and r4,r4,r10 + +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) +# if !defined(_WIN32) + ldr r12,[r11,r12] @ OPENSSL_armcap_P +# endif +# if defined(__APPLE__) || defined(_WIN32) + ldr r12,[r12] +# endif +#endif + ldrb r8,[$inp,#7] + orr r5,r5,r6,lsl#8 + ldrb r6,[$inp,#8] + orr r5,r5,r7,lsl#16 + ldrb r7,[$inp,#9] + orr r5,r5,r8,lsl#24 + ldrb r8,[$inp,#10] + and r5,r5,r3 + +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + tst r12,#ARMV7_NEON @ check for NEON +# ifdef __thumb2__ + adr r9,.Lpoly1305_blocks_neon + adr r11,.Lpoly1305_blocks + it ne + movne r11,r9 + adr r12,.Lpoly1305_emit + orr r11,r11,#1 @ thumb-ify addresses + orr r12,r12,#1 +# else + add r12,r11,#(.Lpoly1305_emit-.Lpoly1305_init) + ite eq + addeq r11,r11,#(.Lpoly1305_blocks-.Lpoly1305_init) + addne r11,r11,#(.Lpoly1305_blocks_neon-.Lpoly1305_init) +# endif +#endif + ldrb r9,[$inp,#11] + orr r6,r6,r7,lsl#8 + ldrb r7,[$inp,#12] + orr r6,r6,r8,lsl#16 + ldrb r8,[$inp,#13] + orr r6,r6,r9,lsl#24 + ldrb r9,[$inp,#14] + and r6,r6,r3 + + ldrb r10,[$inp,#15] + orr r7,r7,r8,lsl#8 + str r4,[$ctx,#0] + orr r7,r7,r9,lsl#16 + str r5,[$ctx,#4] + orr r7,r7,r10,lsl#24 + str r6,[$ctx,#8] + and r7,r7,r3 + str r7,[$ctx,#12] +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + stmia r2,{r11,r12} @ fill functions table + mov r0,#1 +#else + mov r0,#0 +#endif +.Lno_key: + ldmia sp!,{r4-r11} +#if __ARM_ARCH__>=5 + ret @ bx lr +#else + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + bx lr @ interoperable with Thumb ISA:-) +#endif +.size poly1305_init,.-poly1305_init +___ +{ +my ($h0,$h1,$h2,$h3,$h4,$r0,$r1,$r2,$r3)=map("r$_",(4..12)); +my ($s1,$s2,$s3)=($r1,$r2,$r3); + +$code.=<<___; +.type poly1305_blocks,%function +.align 5 +poly1305_blocks: +.Lpoly1305_blocks: + stmdb sp!,{r3-r11,lr} + + ands $len,$len,#-16 + beq .Lno_data + + add $len,$len,$inp @ end pointer + sub sp,sp,#32 + +#if __ARM_ARCH__<7 + ldmia $ctx,{$h0-$r3} @ load context + add $ctx,$ctx,#20 + str $len,[sp,#16] @ offload stuff + str $ctx,[sp,#12] +#else + ldr lr,[$ctx,#36] @ is_base2_26 + ldmia $ctx!,{$h0-$h4} @ load hash value + str $len,[sp,#16] @ offload stuff + str $ctx,[sp,#12] + + adds $r0,$h0,$h1,lsl#26 @ base 2^26 -> base 2^32 + mov $r1,$h1,lsr#6 + adcs $r1,$r1,$h2,lsl#20 + mov $r2,$h2,lsr#12 + adcs $r2,$r2,$h3,lsl#14 + mov $r3,$h3,lsr#18 + adcs $r3,$r3,$h4,lsl#8 + mov $len,#0 + teq lr,#0 + str $len,[$ctx,#16] @ clear is_base2_26 + adc $len,$len,$h4,lsr#24 + + itttt ne + movne $h0,$r0 @ choose between radixes + movne $h1,$r1 + movne $h2,$r2 + movne $h3,$r3 + ldmia $ctx,{$r0-$r3} @ load key + it ne + movne $h4,$len +#endif + + mov lr,$inp + cmp $padbit,#0 + str $r1,[sp,#20] + str $r2,[sp,#24] + str $r3,[sp,#28] + b .Loop + +.align 4 +.Loop: +#if __ARM_ARCH__<7 + ldrb r0,[lr],#16 @ load input +# ifdef __thumb2__ + it hi +# endif + addhi $h4,$h4,#1 @ 1<<128 + ldrb r1,[lr,#-15] + ldrb r2,[lr,#-14] + ldrb r3,[lr,#-13] + orr r1,r0,r1,lsl#8 + ldrb r0,[lr,#-12] + orr r2,r1,r2,lsl#16 + ldrb r1,[lr,#-11] + orr r3,r2,r3,lsl#24 + ldrb r2,[lr,#-10] + adds $h0,$h0,r3 @ accumulate input + + ldrb r3,[lr,#-9] + orr r1,r0,r1,lsl#8 + ldrb r0,[lr,#-8] + orr r2,r1,r2,lsl#16 + ldrb r1,[lr,#-7] + orr r3,r2,r3,lsl#24 + ldrb r2,[lr,#-6] + adcs $h1,$h1,r3 + + ldrb r3,[lr,#-5] + orr r1,r0,r1,lsl#8 + ldrb r0,[lr,#-4] + orr r2,r1,r2,lsl#16 + ldrb r1,[lr,#-3] + orr r3,r2,r3,lsl#24 + ldrb r2,[lr,#-2] + adcs $h2,$h2,r3 + + ldrb r3,[lr,#-1] + orr r1,r0,r1,lsl#8 + str lr,[sp,#8] @ offload input pointer + orr r2,r1,r2,lsl#16 + add $s1,$r1,$r1,lsr#2 + orr r3,r2,r3,lsl#24 +#else + ldr r0,[lr],#16 @ load input + it hi + addhi $h4,$h4,#1 @ padbit + ldr r1,[lr,#-12] + ldr r2,[lr,#-8] + ldr r3,[lr,#-4] +# ifdef __ARMEB__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +# endif + adds $h0,$h0,r0 @ accumulate input + str lr,[sp,#8] @ offload input pointer + adcs $h1,$h1,r1 + add $s1,$r1,$r1,lsr#2 + adcs $h2,$h2,r2 +#endif + add $s2,$r2,$r2,lsr#2 + adcs $h3,$h3,r3 + add $s3,$r3,$r3,lsr#2 + + umull r2,r3,$h1,$r0 + adc $h4,$h4,#0 + umull r0,r1,$h0,$r0 + umlal r2,r3,$h4,$s1 + umlal r0,r1,$h3,$s1 + ldr $r1,[sp,#20] @ reload $r1 + umlal r2,r3,$h2,$s3 + umlal r0,r1,$h1,$s3 + umlal r2,r3,$h3,$s2 + umlal r0,r1,$h2,$s2 + umlal r2,r3,$h0,$r1 + str r0,[sp,#0] @ future $h0 + mul r0,$s2,$h4 + ldr $r2,[sp,#24] @ reload $r2 + adds r2,r2,r1 @ d1+=d0>>32 + eor r1,r1,r1 + adc lr,r3,#0 @ future $h2 + str r2,[sp,#4] @ future $h1 + + mul r2,$s3,$h4 + eor r3,r3,r3 + umlal r0,r1,$h3,$s3 + ldr $r3,[sp,#28] @ reload $r3 + umlal r2,r3,$h3,$r0 + umlal r0,r1,$h2,$r0 + umlal r2,r3,$h2,$r1 + umlal r0,r1,$h1,$r1 + umlal r2,r3,$h1,$r2 + umlal r0,r1,$h0,$r2 + umlal r2,r3,$h0,$r3 + ldr $h0,[sp,#0] + mul $h4,$r0,$h4 + ldr $h1,[sp,#4] + + adds $h2,lr,r0 @ d2+=d1>>32 + ldr lr,[sp,#8] @ reload input pointer + adc r1,r1,#0 + adds $h3,r2,r1 @ d3+=d2>>32 + ldr r0,[sp,#16] @ reload end pointer + adc r3,r3,#0 + add $h4,$h4,r3 @ h4+=d3>>32 + + and r1,$h4,#-4 + and $h4,$h4,#3 + add r1,r1,r1,lsr#2 @ *=5 + adds $h0,$h0,r1 + adcs $h1,$h1,#0 + adcs $h2,$h2,#0 + adcs $h3,$h3,#0 + adc $h4,$h4,#0 + + cmp r0,lr @ done yet? + bhi .Loop + + ldr $ctx,[sp,#12] + add sp,sp,#32 + stmdb $ctx,{$h0-$h4} @ store the result + +.Lno_data: +#if __ARM_ARCH__>=5 + ldmia sp!,{r3-r11,pc} +#else + ldmia sp!,{r3-r11,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + bx lr @ interoperable with Thumb ISA:-) +#endif +.size poly1305_blocks,.-poly1305_blocks +___ +} +{ +my ($ctx,$mac,$nonce)=map("r$_",(0..2)); +my ($h0,$h1,$h2,$h3,$h4,$g0,$g1,$g2,$g3)=map("r$_",(3..11)); +my $g4=$ctx; + +$code.=<<___; +.type poly1305_emit,%function +.align 5 +poly1305_emit: +.Lpoly1305_emit: + stmdb sp!,{r4-r11} + + ldmia $ctx,{$h0-$h4} + +#if __ARM_ARCH__>=7 + ldr ip,[$ctx,#36] @ is_base2_26 + + adds $g0,$h0,$h1,lsl#26 @ base 2^26 -> base 2^32 + mov $g1,$h1,lsr#6 + adcs $g1,$g1,$h2,lsl#20 + mov $g2,$h2,lsr#12 + adcs $g2,$g2,$h3,lsl#14 + mov $g3,$h3,lsr#18 + adcs $g3,$g3,$h4,lsl#8 + mov $g4,#0 + adc $g4,$g4,$h4,lsr#24 + + tst ip,ip + itttt ne + movne $h0,$g0 + movne $h1,$g1 + movne $h2,$g2 + movne $h3,$g3 + it ne + movne $h4,$g4 +#endif + + adds $g0,$h0,#5 @ compare to modulus + adcs $g1,$h1,#0 + adcs $g2,$h2,#0 + adcs $g3,$h3,#0 + adc $g4,$h4,#0 + tst $g4,#4 @ did it carry/borrow? + +#ifdef __thumb2__ + it ne +#endif + movne $h0,$g0 + ldr $g0,[$nonce,#0] +#ifdef __thumb2__ + it ne +#endif + movne $h1,$g1 + ldr $g1,[$nonce,#4] +#ifdef __thumb2__ + it ne +#endif + movne $h2,$g2 + ldr $g2,[$nonce,#8] +#ifdef __thumb2__ + it ne +#endif + movne $h3,$g3 + ldr $g3,[$nonce,#12] + + adds $h0,$h0,$g0 + adcs $h1,$h1,$g1 + adcs $h2,$h2,$g2 + adc $h3,$h3,$g3 + +#if __ARM_ARCH__>=7 +# ifdef __ARMEB__ + rev $h0,$h0 + rev $h1,$h1 + rev $h2,$h2 + rev $h3,$h3 +# endif + str $h0,[$mac,#0] + str $h1,[$mac,#4] + str $h2,[$mac,#8] + str $h3,[$mac,#12] +#else + strb $h0,[$mac,#0] + mov $h0,$h0,lsr#8 + strb $h1,[$mac,#4] + mov $h1,$h1,lsr#8 + strb $h2,[$mac,#8] + mov $h2,$h2,lsr#8 + strb $h3,[$mac,#12] + mov $h3,$h3,lsr#8 + + strb $h0,[$mac,#1] + mov $h0,$h0,lsr#8 + strb $h1,[$mac,#5] + mov $h1,$h1,lsr#8 + strb $h2,[$mac,#9] + mov $h2,$h2,lsr#8 + strb $h3,[$mac,#13] + mov $h3,$h3,lsr#8 + + strb $h0,[$mac,#2] + mov $h0,$h0,lsr#8 + strb $h1,[$mac,#6] + mov $h1,$h1,lsr#8 + strb $h2,[$mac,#10] + mov $h2,$h2,lsr#8 + strb $h3,[$mac,#14] + mov $h3,$h3,lsr#8 + + strb $h0,[$mac,#3] + strb $h1,[$mac,#7] + strb $h2,[$mac,#11] + strb $h3,[$mac,#15] +#endif + ldmia sp!,{r4-r11} +#if __ARM_ARCH__>=5 + ret @ bx lr +#else + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + bx lr @ interoperable with Thumb ISA:-) +#endif +.size poly1305_emit,.-poly1305_emit +___ +{ +my ($R0,$R1,$S1,$R2,$S2,$R3,$S3,$R4,$S4) = map("d$_",(0..9)); +my ($D0,$D1,$D2,$D3,$D4, $H0,$H1,$H2,$H3,$H4) = map("q$_",(5..14)); +my ($T0,$T1,$MASK) = map("q$_",(15,4,0)); + +my ($in2,$zeros,$tbl0,$tbl1) = map("r$_",(4..7)); + +$code.=<<___; +#if __ARM_MAX_ARCH__>=7 +.fpu neon + +.type poly1305_init_neon,%function +.align 5 +poly1305_init_neon: +.Lpoly1305_init_neon: + ldr r3,[$ctx,#48] @ first table element + cmp r3,#-1 @ is value impossible? + bne .Lno_init_neon + + ldr r4,[$ctx,#20] @ load key base 2^32 + ldr r5,[$ctx,#24] + ldr r6,[$ctx,#28] + ldr r7,[$ctx,#32] + + and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26 + mov r3,r4,lsr#26 + mov r4,r5,lsr#20 + orr r3,r3,r5,lsl#6 + mov r5,r6,lsr#14 + orr r4,r4,r6,lsl#12 + mov r6,r7,lsr#8 + orr r5,r5,r7,lsl#18 + and r3,r3,#0x03ffffff + and r4,r4,#0x03ffffff + and r5,r5,#0x03ffffff + + vdup.32 $R0,r2 @ r^1 in both lanes + add r2,r3,r3,lsl#2 @ *5 + vdup.32 $R1,r3 + add r3,r4,r4,lsl#2 + vdup.32 $S1,r2 + vdup.32 $R2,r4 + add r4,r5,r5,lsl#2 + vdup.32 $S2,r3 + vdup.32 $R3,r5 + add r5,r6,r6,lsl#2 + vdup.32 $S3,r4 + vdup.32 $R4,r6 + vdup.32 $S4,r5 + + mov $zeros,#2 @ counter + +.Lsquare_neon: + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 + @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 + @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 + @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 + @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 + + vmull.u32 $D0,$R0,${R0}[1] + vmull.u32 $D1,$R1,${R0}[1] + vmull.u32 $D2,$R2,${R0}[1] + vmull.u32 $D3,$R3,${R0}[1] + vmull.u32 $D4,$R4,${R0}[1] + + vmlal.u32 $D0,$R4,${S1}[1] + vmlal.u32 $D1,$R0,${R1}[1] + vmlal.u32 $D2,$R1,${R1}[1] + vmlal.u32 $D3,$R2,${R1}[1] + vmlal.u32 $D4,$R3,${R1}[1] + + vmlal.u32 $D0,$R3,${S2}[1] + vmlal.u32 $D1,$R4,${S2}[1] + vmlal.u32 $D3,$R1,${R2}[1] + vmlal.u32 $D2,$R0,${R2}[1] + vmlal.u32 $D4,$R2,${R2}[1] + + vmlal.u32 $D0,$R2,${S3}[1] + vmlal.u32 $D3,$R0,${R3}[1] + vmlal.u32 $D1,$R3,${S3}[1] + vmlal.u32 $D2,$R4,${S3}[1] + vmlal.u32 $D4,$R1,${R3}[1] + + vmlal.u32 $D3,$R4,${S4}[1] + vmlal.u32 $D0,$R1,${S4}[1] + vmlal.u32 $D1,$R2,${S4}[1] + vmlal.u32 $D2,$R3,${S4}[1] + vmlal.u32 $D4,$R0,${R4}[1] + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ lazy reduction as discussed in "NEON crypto" by D.J. Bernstein + @ and P. Schwabe + @ + @ H0>>+H1>>+H2>>+H3>>+H4 + @ H3>>+H4>>*5+H0>>+H1 + @ + @ Trivia. + @ + @ Result of multiplication of n-bit number by m-bit number is + @ n+m bits wide. However! Even though 2^n is a n+1-bit number, + @ m-bit number multiplied by 2^n is still n+m bits wide. + @ + @ Sum of two n-bit numbers is n+1 bits wide, sum of three - n+2, + @ and so is sum of four. Sum of 2^m n-m-bit numbers and n-bit + @ one is n+1 bits wide. + @ + @ >>+ denotes Hnext += Hn>>26, Hn &= 0x3ffffff. This means that + @ H0, H2, H3 are guaranteed to be 26 bits wide, while H1 and H4 + @ can be 27. However! In cases when their width exceeds 26 bits + @ they are limited by 2^26+2^6. This in turn means that *sum* + @ of the products with these values can still be viewed as sum + @ of 52-bit numbers as long as the amount of addends is not a + @ power of 2. For example, + @ + @ H4 = H4*R0 + H3*R1 + H2*R2 + H1*R3 + H0 * R4, + @ + @ which can't be larger than 5 * (2^26 + 2^6) * (2^26 + 2^6), or + @ 5 * (2^52 + 2*2^32 + 2^12), which in turn is smaller than + @ 8 * (2^52) or 2^55. However, the value is then multiplied by + @ by 5, so we should be looking at 5 * 5 * (2^52 + 2^33 + 2^12), + @ which is less than 32 * (2^52) or 2^57. And when processing + @ data we are looking at triple as many addends... + @ + @ In key setup procedure pre-reduced H0 is limited by 5*4+1 and + @ 5*H4 - by 5*5 52-bit addends, or 57 bits. But when hashing the + @ input H0 is limited by (5*4+1)*3 addends, or 58 bits, while + @ 5*H4 by 5*5*3, or 59[!] bits. How is this relevant? vmlal.u32 + @ instruction accepts 2x32-bit input and writes 2x64-bit result. + @ This means that result of reduction have to be compressed upon + @ loop wrap-around. This can be done in the process of reduction + @ to minimize amount of instructions [as well as amount of + @ 128-bit instructions, which benefits low-end processors], but + @ one has to watch for H2 (which is narrower than H0) and 5*H4 + @ not being wider than 58 bits, so that result of right shift + @ by 26 bits fits in 32 bits. This is also useful on x86, + @ because it allows to use paddd in place for paddq, which + @ benefits Atom, where paddq is ridiculously slow. + + vshr.u64 $T0,$D3,#26 + vmovn.i64 $D3#lo,$D3 + vshr.u64 $T1,$D0,#26 + vmovn.i64 $D0#lo,$D0 + vadd.i64 $D4,$D4,$T0 @ h3 -> h4 + vbic.i32 $D3#lo,#0xfc000000 @ &=0x03ffffff + vadd.i64 $D1,$D1,$T1 @ h0 -> h1 + vbic.i32 $D0#lo,#0xfc000000 + + vshrn.u64 $T0#lo,$D4,#26 + vmovn.i64 $D4#lo,$D4 + vshr.u64 $T1,$D1,#26 + vmovn.i64 $D1#lo,$D1 + vadd.i64 $D2,$D2,$T1 @ h1 -> h2 + vbic.i32 $D4#lo,#0xfc000000 + vbic.i32 $D1#lo,#0xfc000000 + + vadd.i32 $D0#lo,$D0#lo,$T0#lo + vshl.u32 $T0#lo,$T0#lo,#2 + vshrn.u64 $T1#lo,$D2,#26 + vmovn.i64 $D2#lo,$D2 + vadd.i32 $D0#lo,$D0#lo,$T0#lo @ h4 -> h0 + vadd.i32 $D3#lo,$D3#lo,$T1#lo @ h2 -> h3 + vbic.i32 $D2#lo,#0xfc000000 + + vshr.u32 $T0#lo,$D0#lo,#26 + vbic.i32 $D0#lo,#0xfc000000 + vshr.u32 $T1#lo,$D3#lo,#26 + vbic.i32 $D3#lo,#0xfc000000 + vadd.i32 $D1#lo,$D1#lo,$T0#lo @ h0 -> h1 + vadd.i32 $D4#lo,$D4#lo,$T1#lo @ h3 -> h4 + + subs $zeros,$zeros,#1 + beq .Lsquare_break_neon + + add $tbl0,$ctx,#(48+0*9*4) + add $tbl1,$ctx,#(48+1*9*4) + + vtrn.32 $R0,$D0#lo @ r^2:r^1 + vtrn.32 $R2,$D2#lo + vtrn.32 $R3,$D3#lo + vtrn.32 $R1,$D1#lo + vtrn.32 $R4,$D4#lo + + vshl.u32 $S2,$R2,#2 @ *5 + vshl.u32 $S3,$R3,#2 + vshl.u32 $S1,$R1,#2 + vshl.u32 $S4,$R4,#2 + vadd.i32 $S2,$S2,$R2 + vadd.i32 $S1,$S1,$R1 + vadd.i32 $S3,$S3,$R3 + vadd.i32 $S4,$S4,$R4 + + vst4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! + vst4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! + vst4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]! + vst4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]! + vst1.32 {${S4}[0]},[$tbl0,:32] + vst1.32 {${S4}[1]},[$tbl1,:32] + + b .Lsquare_neon + +.align 4 +.Lsquare_break_neon: + add $tbl0,$ctx,#(48+2*4*9) + add $tbl1,$ctx,#(48+3*4*9) + + vmov $R0,$D0#lo @ r^4:r^3 + vshl.u32 $S1,$D1#lo,#2 @ *5 + vmov $R1,$D1#lo + vshl.u32 $S2,$D2#lo,#2 + vmov $R2,$D2#lo + vshl.u32 $S3,$D3#lo,#2 + vmov $R3,$D3#lo + vshl.u32 $S4,$D4#lo,#2 + vmov $R4,$D4#lo + vadd.i32 $S1,$S1,$D1#lo + vadd.i32 $S2,$S2,$D2#lo + vadd.i32 $S3,$S3,$D3#lo + vadd.i32 $S4,$S4,$D4#lo + + vst4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! + vst4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! + vst4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]! + vst4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]! + vst1.32 {${S4}[0]},[$tbl0] + vst1.32 {${S4}[1]},[$tbl1] + +.Lno_init_neon: + ret @ bx lr +.size poly1305_init_neon,.-poly1305_init_neon + +.type poly1305_blocks_neon,%function +.align 5 +poly1305_blocks_neon: +.Lpoly1305_blocks_neon: + ldr ip,[$ctx,#36] @ is_base2_26 + + cmp $len,#64 + blo .Lpoly1305_blocks + + stmdb sp!,{r4-r7} + vstmdb sp!,{d8-d15} @ ABI specification says so + + tst ip,ip @ is_base2_26? + bne .Lbase2_26_neon + + stmdb sp!,{r1-r3,lr} + bl .Lpoly1305_init_neon + + ldr r4,[$ctx,#0] @ load hash value base 2^32 + ldr r5,[$ctx,#4] + ldr r6,[$ctx,#8] + ldr r7,[$ctx,#12] + ldr ip,[$ctx,#16] + + and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26 + mov r3,r4,lsr#26 + veor $D0#lo,$D0#lo,$D0#lo + mov r4,r5,lsr#20 + orr r3,r3,r5,lsl#6 + veor $D1#lo,$D1#lo,$D1#lo + mov r5,r6,lsr#14 + orr r4,r4,r6,lsl#12 + veor $D2#lo,$D2#lo,$D2#lo + mov r6,r7,lsr#8 + orr r5,r5,r7,lsl#18 + veor $D3#lo,$D3#lo,$D3#lo + and r3,r3,#0x03ffffff + orr r6,r6,ip,lsl#24 + veor $D4#lo,$D4#lo,$D4#lo + and r4,r4,#0x03ffffff + mov r1,#1 + and r5,r5,#0x03ffffff + str r1,[$ctx,#36] @ set is_base2_26 + + vmov.32 $D0#lo[0],r2 + vmov.32 $D1#lo[0],r3 + vmov.32 $D2#lo[0],r4 + vmov.32 $D3#lo[0],r5 + vmov.32 $D4#lo[0],r6 + adr $zeros,.Lzeros + + ldmia sp!,{r1-r3,lr} + b .Lhash_loaded + +.align 4 +.Lbase2_26_neon: + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ load hash value + + veor $D0#lo,$D0#lo,$D0#lo + veor $D1#lo,$D1#lo,$D1#lo + veor $D2#lo,$D2#lo,$D2#lo + veor $D3#lo,$D3#lo,$D3#lo + veor $D4#lo,$D4#lo,$D4#lo + vld4.32 {$D0#lo[0],$D1#lo[0],$D2#lo[0],$D3#lo[0]},[$ctx]! + adr $zeros,.Lzeros + vld1.32 {$D4#lo[0]},[$ctx] + sub $ctx,$ctx,#16 @ rewind + +.Lhash_loaded: + add $in2,$inp,#32 + mov $padbit,$padbit,lsl#24 + tst $len,#31 + beq .Leven + + vld4.32 {$H0#lo[0],$H1#lo[0],$H2#lo[0],$H3#lo[0]},[$inp]! + vmov.32 $H4#lo[0],$padbit + sub $len,$len,#16 + add $in2,$inp,#32 + +# ifdef __ARMEB__ + vrev32.8 $H0,$H0 + vrev32.8 $H3,$H3 + vrev32.8 $H1,$H1 + vrev32.8 $H2,$H2 +# endif + vsri.u32 $H4#lo,$H3#lo,#8 @ base 2^32 -> base 2^26 + vshl.u32 $H3#lo,$H3#lo,#18 + + vsri.u32 $H3#lo,$H2#lo,#14 + vshl.u32 $H2#lo,$H2#lo,#12 + vadd.i32 $H4#hi,$H4#lo,$D4#lo @ add hash value and move to #hi + + vbic.i32 $H3#lo,#0xfc000000 + vsri.u32 $H2#lo,$H1#lo,#20 + vshl.u32 $H1#lo,$H1#lo,#6 + + vbic.i32 $H2#lo,#0xfc000000 + vsri.u32 $H1#lo,$H0#lo,#26 + vadd.i32 $H3#hi,$H3#lo,$D3#lo + + vbic.i32 $H0#lo,#0xfc000000 + vbic.i32 $H1#lo,#0xfc000000 + vadd.i32 $H2#hi,$H2#lo,$D2#lo + + vadd.i32 $H0#hi,$H0#lo,$D0#lo + vadd.i32 $H1#hi,$H1#lo,$D1#lo + + mov $tbl1,$zeros + add $tbl0,$ctx,#48 + + cmp $len,$len + b .Long_tail + +.align 4 +.Leven: + subs $len,$len,#64 + it lo + movlo $in2,$zeros + + vmov.i32 $H4,#1<<24 @ padbit, yes, always + vld4.32 {$H0#lo,$H1#lo,$H2#lo,$H3#lo},[$inp] @ inp[0:1] + add $inp,$inp,#64 + vld4.32 {$H0#hi,$H1#hi,$H2#hi,$H3#hi},[$in2] @ inp[2:3] (or 0) + add $in2,$in2,#64 + itt hi + addhi $tbl1,$ctx,#(48+1*9*4) + addhi $tbl0,$ctx,#(48+3*9*4) + +# ifdef __ARMEB__ + vrev32.8 $H0,$H0 + vrev32.8 $H3,$H3 + vrev32.8 $H1,$H1 + vrev32.8 $H2,$H2 +# endif + vsri.u32 $H4,$H3,#8 @ base 2^32 -> base 2^26 + vshl.u32 $H3,$H3,#18 + + vsri.u32 $H3,$H2,#14 + vshl.u32 $H2,$H2,#12 + + vbic.i32 $H3,#0xfc000000 + vsri.u32 $H2,$H1,#20 + vshl.u32 $H1,$H1,#6 + + vbic.i32 $H2,#0xfc000000 + vsri.u32 $H1,$H0,#26 + + vbic.i32 $H0,#0xfc000000 + vbic.i32 $H1,#0xfc000000 + + bls .Lskip_loop + + vld4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! @ load r^2 + vld4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! @ load r^4 + vld4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]! + vld4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]! + b .Loop_neon + +.align 5 +.Loop_neon: + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2 + @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r + @ \___________________/ + @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2 + @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r + @ \___________________/ \____________________/ + @ + @ Note that we start with inp[2:3]*r^2. This is because it + @ doesn't depend on reduction in previous iteration. + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 + @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 + @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 + @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 + @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ inp[2:3]*r^2 + + vadd.i32 $H2#lo,$H2#lo,$D2#lo @ accumulate inp[0:1] + vmull.u32 $D2,$H2#hi,${R0}[1] + vadd.i32 $H0#lo,$H0#lo,$D0#lo + vmull.u32 $D0,$H0#hi,${R0}[1] + vadd.i32 $H3#lo,$H3#lo,$D3#lo + vmull.u32 $D3,$H3#hi,${R0}[1] + vmlal.u32 $D2,$H1#hi,${R1}[1] + vadd.i32 $H1#lo,$H1#lo,$D1#lo + vmull.u32 $D1,$H1#hi,${R0}[1] + + vadd.i32 $H4#lo,$H4#lo,$D4#lo + vmull.u32 $D4,$H4#hi,${R0}[1] + subs $len,$len,#64 + vmlal.u32 $D0,$H4#hi,${S1}[1] + it lo + movlo $in2,$zeros + vmlal.u32 $D3,$H2#hi,${R1}[1] + vld1.32 ${S4}[1],[$tbl1,:32] + vmlal.u32 $D1,$H0#hi,${R1}[1] + vmlal.u32 $D4,$H3#hi,${R1}[1] + + vmlal.u32 $D0,$H3#hi,${S2}[1] + vmlal.u32 $D3,$H1#hi,${R2}[1] + vmlal.u32 $D4,$H2#hi,${R2}[1] + vmlal.u32 $D1,$H4#hi,${S2}[1] + vmlal.u32 $D2,$H0#hi,${R2}[1] + + vmlal.u32 $D3,$H0#hi,${R3}[1] + vmlal.u32 $D0,$H2#hi,${S3}[1] + vmlal.u32 $D4,$H1#hi,${R3}[1] + vmlal.u32 $D1,$H3#hi,${S3}[1] + vmlal.u32 $D2,$H4#hi,${S3}[1] + + vmlal.u32 $D3,$H4#hi,${S4}[1] + vmlal.u32 $D0,$H1#hi,${S4}[1] + vmlal.u32 $D4,$H0#hi,${R4}[1] + vmlal.u32 $D1,$H2#hi,${S4}[1] + vmlal.u32 $D2,$H3#hi,${S4}[1] + + vld4.32 {$H0#hi,$H1#hi,$H2#hi,$H3#hi},[$in2] @ inp[2:3] (or 0) + add $in2,$in2,#64 + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ (hash+inp[0:1])*r^4 and accumulate + + vmlal.u32 $D3,$H3#lo,${R0}[0] + vmlal.u32 $D0,$H0#lo,${R0}[0] + vmlal.u32 $D4,$H4#lo,${R0}[0] + vmlal.u32 $D1,$H1#lo,${R0}[0] + vmlal.u32 $D2,$H2#lo,${R0}[0] + vld1.32 ${S4}[0],[$tbl0,:32] + + vmlal.u32 $D3,$H2#lo,${R1}[0] + vmlal.u32 $D0,$H4#lo,${S1}[0] + vmlal.u32 $D4,$H3#lo,${R1}[0] + vmlal.u32 $D1,$H0#lo,${R1}[0] + vmlal.u32 $D2,$H1#lo,${R1}[0] + + vmlal.u32 $D3,$H1#lo,${R2}[0] + vmlal.u32 $D0,$H3#lo,${S2}[0] + vmlal.u32 $D4,$H2#lo,${R2}[0] + vmlal.u32 $D1,$H4#lo,${S2}[0] + vmlal.u32 $D2,$H0#lo,${R2}[0] + + vmlal.u32 $D3,$H0#lo,${R3}[0] + vmlal.u32 $D0,$H2#lo,${S3}[0] + vmlal.u32 $D4,$H1#lo,${R3}[0] + vmlal.u32 $D1,$H3#lo,${S3}[0] + vmlal.u32 $D3,$H4#lo,${S4}[0] + + vmlal.u32 $D2,$H4#lo,${S3}[0] + vmlal.u32 $D0,$H1#lo,${S4}[0] + vmlal.u32 $D4,$H0#lo,${R4}[0] + vmov.i32 $H4,#1<<24 @ padbit, yes, always + vmlal.u32 $D1,$H2#lo,${S4}[0] + vmlal.u32 $D2,$H3#lo,${S4}[0] + + vld4.32 {$H0#lo,$H1#lo,$H2#lo,$H3#lo},[$inp] @ inp[0:1] + add $inp,$inp,#64 +# ifdef __ARMEB__ + vrev32.8 $H0,$H0 + vrev32.8 $H1,$H1 + vrev32.8 $H2,$H2 + vrev32.8 $H3,$H3 +# endif + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ lazy reduction interleaved with base 2^32 -> base 2^26 of + @ inp[0:3] previously loaded to $H0-$H3 and smashed to $H0-$H4. + + vshr.u64 $T0,$D3,#26 + vmovn.i64 $D3#lo,$D3 + vshr.u64 $T1,$D0,#26 + vmovn.i64 $D0#lo,$D0 + vadd.i64 $D4,$D4,$T0 @ h3 -> h4 + vbic.i32 $D3#lo,#0xfc000000 + vsri.u32 $H4,$H3,#8 @ base 2^32 -> base 2^26 + vadd.i64 $D1,$D1,$T1 @ h0 -> h1 + vshl.u32 $H3,$H3,#18 + vbic.i32 $D0#lo,#0xfc000000 + + vshrn.u64 $T0#lo,$D4,#26 + vmovn.i64 $D4#lo,$D4 + vshr.u64 $T1,$D1,#26 + vmovn.i64 $D1#lo,$D1 + vadd.i64 $D2,$D2,$T1 @ h1 -> h2 + vsri.u32 $H3,$H2,#14 + vbic.i32 $D4#lo,#0xfc000000 + vshl.u32 $H2,$H2,#12 + vbic.i32 $D1#lo,#0xfc000000 + + vadd.i32 $D0#lo,$D0#lo,$T0#lo + vshl.u32 $T0#lo,$T0#lo,#2 + vbic.i32 $H3,#0xfc000000 + vshrn.u64 $T1#lo,$D2,#26 + vmovn.i64 $D2#lo,$D2 + vaddl.u32 $D0,$D0#lo,$T0#lo @ h4 -> h0 [widen for a sec] + vsri.u32 $H2,$H1,#20 + vadd.i32 $D3#lo,$D3#lo,$T1#lo @ h2 -> h3 + vshl.u32 $H1,$H1,#6 + vbic.i32 $D2#lo,#0xfc000000 + vbic.i32 $H2,#0xfc000000 + + vshrn.u64 $T0#lo,$D0,#26 @ re-narrow + vmovn.i64 $D0#lo,$D0 + vsri.u32 $H1,$H0,#26 + vbic.i32 $H0,#0xfc000000 + vshr.u32 $T1#lo,$D3#lo,#26 + vbic.i32 $D3#lo,#0xfc000000 + vbic.i32 $D0#lo,#0xfc000000 + vadd.i32 $D1#lo,$D1#lo,$T0#lo @ h0 -> h1 + vadd.i32 $D4#lo,$D4#lo,$T1#lo @ h3 -> h4 + vbic.i32 $H1,#0xfc000000 + + bhi .Loop_neon + +.Lskip_loop: + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1 + + add $tbl1,$ctx,#(48+0*9*4) + add $tbl0,$ctx,#(48+1*9*4) + adds $len,$len,#32 + it ne + movne $len,#0 + bne .Long_tail + + vadd.i32 $H2#hi,$H2#lo,$D2#lo @ add hash value and move to #hi + vadd.i32 $H0#hi,$H0#lo,$D0#lo + vadd.i32 $H3#hi,$H3#lo,$D3#lo + vadd.i32 $H1#hi,$H1#lo,$D1#lo + vadd.i32 $H4#hi,$H4#lo,$D4#lo + +.Long_tail: + vld4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! @ load r^1 + vld4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! @ load r^2 + + vadd.i32 $H2#lo,$H2#lo,$D2#lo @ can be redundant + vmull.u32 $D2,$H2#hi,$R0 + vadd.i32 $H0#lo,$H0#lo,$D0#lo + vmull.u32 $D0,$H0#hi,$R0 + vadd.i32 $H3#lo,$H3#lo,$D3#lo + vmull.u32 $D3,$H3#hi,$R0 + vadd.i32 $H1#lo,$H1#lo,$D1#lo + vmull.u32 $D1,$H1#hi,$R0 + vadd.i32 $H4#lo,$H4#lo,$D4#lo + vmull.u32 $D4,$H4#hi,$R0 + + vmlal.u32 $D0,$H4#hi,$S1 + vld4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]! + vmlal.u32 $D3,$H2#hi,$R1 + vld4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]! + vmlal.u32 $D1,$H0#hi,$R1 + vmlal.u32 $D4,$H3#hi,$R1 + vmlal.u32 $D2,$H1#hi,$R1 + + vmlal.u32 $D3,$H1#hi,$R2 + vld1.32 ${S4}[1],[$tbl1,:32] + vmlal.u32 $D0,$H3#hi,$S2 + vld1.32 ${S4}[0],[$tbl0,:32] + vmlal.u32 $D4,$H2#hi,$R2 + vmlal.u32 $D1,$H4#hi,$S2 + vmlal.u32 $D2,$H0#hi,$R2 + + vmlal.u32 $D3,$H0#hi,$R3 + it ne + addne $tbl1,$ctx,#(48+2*9*4) + vmlal.u32 $D0,$H2#hi,$S3 + it ne + addne $tbl0,$ctx,#(48+3*9*4) + vmlal.u32 $D4,$H1#hi,$R3 + vmlal.u32 $D1,$H3#hi,$S3 + vmlal.u32 $D2,$H4#hi,$S3 + + vmlal.u32 $D3,$H4#hi,$S4 + vorn $MASK,$MASK,$MASK @ all-ones, can be redundant + vmlal.u32 $D0,$H1#hi,$S4 + vshr.u64 $MASK,$MASK,#38 + vmlal.u32 $D4,$H0#hi,$R4 + vmlal.u32 $D1,$H2#hi,$S4 + vmlal.u32 $D2,$H3#hi,$S4 + + beq .Lshort_tail + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ (hash+inp[0:1])*r^4:r^3 and accumulate + + vld4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! @ load r^3 + vld4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! @ load r^4 + + vmlal.u32 $D2,$H2#lo,$R0 + vmlal.u32 $D0,$H0#lo,$R0 + vmlal.u32 $D3,$H3#lo,$R0 + vmlal.u32 $D1,$H1#lo,$R0 + vmlal.u32 $D4,$H4#lo,$R0 + + vmlal.u32 $D0,$H4#lo,$S1 + vld4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]! + vmlal.u32 $D3,$H2#lo,$R1 + vld4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]! + vmlal.u32 $D1,$H0#lo,$R1 + vmlal.u32 $D4,$H3#lo,$R1 + vmlal.u32 $D2,$H1#lo,$R1 + + vmlal.u32 $D3,$H1#lo,$R2 + vld1.32 ${S4}[1],[$tbl1,:32] + vmlal.u32 $D0,$H3#lo,$S2 + vld1.32 ${S4}[0],[$tbl0,:32] + vmlal.u32 $D4,$H2#lo,$R2 + vmlal.u32 $D1,$H4#lo,$S2 + vmlal.u32 $D2,$H0#lo,$R2 + + vmlal.u32 $D3,$H0#lo,$R3 + vmlal.u32 $D0,$H2#lo,$S3 + vmlal.u32 $D4,$H1#lo,$R3 + vmlal.u32 $D1,$H3#lo,$S3 + vmlal.u32 $D2,$H4#lo,$S3 + + vmlal.u32 $D3,$H4#lo,$S4 + vorn $MASK,$MASK,$MASK @ all-ones + vmlal.u32 $D0,$H1#lo,$S4 + vshr.u64 $MASK,$MASK,#38 + vmlal.u32 $D4,$H0#lo,$R4 + vmlal.u32 $D1,$H2#lo,$S4 + vmlal.u32 $D2,$H3#lo,$S4 + +.Lshort_tail: + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ horizontal addition + + vadd.i64 $D3#lo,$D3#lo,$D3#hi + vadd.i64 $D0#lo,$D0#lo,$D0#hi + vadd.i64 $D4#lo,$D4#lo,$D4#hi + vadd.i64 $D1#lo,$D1#lo,$D1#hi + vadd.i64 $D2#lo,$D2#lo,$D2#hi + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ lazy reduction, but without narrowing + + vshr.u64 $T0,$D3,#26 + vand.i64 $D3,$D3,$MASK + vshr.u64 $T1,$D0,#26 + vand.i64 $D0,$D0,$MASK + vadd.i64 $D4,$D4,$T0 @ h3 -> h4 + vadd.i64 $D1,$D1,$T1 @ h0 -> h1 + + vshr.u64 $T0,$D4,#26 + vand.i64 $D4,$D4,$MASK + vshr.u64 $T1,$D1,#26 + vand.i64 $D1,$D1,$MASK + vadd.i64 $D2,$D2,$T1 @ h1 -> h2 + + vadd.i64 $D0,$D0,$T0 + vshl.u64 $T0,$T0,#2 + vshr.u64 $T1,$D2,#26 + vand.i64 $D2,$D2,$MASK + vadd.i64 $D0,$D0,$T0 @ h4 -> h0 + vadd.i64 $D3,$D3,$T1 @ h2 -> h3 + + vshr.u64 $T0,$D0,#26 + vand.i64 $D0,$D0,$MASK + vshr.u64 $T1,$D3,#26 + vand.i64 $D3,$D3,$MASK + vadd.i64 $D1,$D1,$T0 @ h0 -> h1 + vadd.i64 $D4,$D4,$T1 @ h3 -> h4 + + cmp $len,#0 + bne .Leven + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ store hash value + + vst4.32 {$D0#lo[0],$D1#lo[0],$D2#lo[0],$D3#lo[0]},[$ctx]! + vst1.32 {$D4#lo[0]},[$ctx] + + vldmia sp!,{d8-d15} @ epilogue + ldmia sp!,{r4-r7} + ret @ bx lr +.size poly1305_blocks_neon,.-poly1305_blocks_neon + +.align 5 +.Lzeros: +.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +#ifndef __KERNEL__ +.LOPENSSL_armcap: +# ifdef _WIN32 +.word OPENSSL_armcap_P +# else +.word OPENSSL_armcap_P-.Lpoly1305_init +# endif +.comm OPENSSL_armcap_P,4,4 +.hidden OPENSSL_armcap_P +#endif +#endif +___ +} } +$code.=<<___; +.asciz "Poly1305 for ARMv4/NEON, CRYPTOGAMS by \@dot-asm" +.align 2 +___ + +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval $1/geo; + + s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo or + s/\bret\b/bx lr/go or + s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4 + + print $_,"\n"; +} +close STDOUT; # enforce flush diff --git a/arch/arm/crypto/poly1305-core.S_shipped b/arch/arm/crypto/poly1305-core.S_shipped new file mode 100644 index 000000000000..37b71d990293 --- /dev/null +++ b/arch/arm/crypto/poly1305-core.S_shipped @@ -0,0 +1,1158 @@ +#ifndef __KERNEL__ +# include "arm_arch.h" +#else +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__ +# define poly1305_init poly1305_init_arm +# define poly1305_blocks poly1305_blocks_arm +# define poly1305_emit poly1305_emit_arm +.globl poly1305_blocks_neon +#endif + +#if defined(__thumb2__) +.syntax unified +.thumb +#else +.code 32 +#endif + +.text + +.globl poly1305_emit +.globl poly1305_blocks +.globl poly1305_init +.type poly1305_init,%function +.align 5 +poly1305_init: +.Lpoly1305_init: + stmdb sp!,{r4-r11} + + eor r3,r3,r3 + cmp r1,#0 + str r3,[r0,#0] @ zero hash value + str r3,[r0,#4] + str r3,[r0,#8] + str r3,[r0,#12] + str r3,[r0,#16] + str r3,[r0,#36] @ clear is_base2_26 + add r0,r0,#20 + +#ifdef __thumb2__ + it eq +#endif + moveq r0,#0 + beq .Lno_key + +#if __ARM_MAX_ARCH__>=7 + mov r3,#-1 + str r3,[r0,#28] @ impossible key power value +# ifndef __KERNEL__ + adr r11,.Lpoly1305_init + ldr r12,.LOPENSSL_armcap +# endif +#endif + ldrb r4,[r1,#0] + mov r10,#0x0fffffff + ldrb r5,[r1,#1] + and r3,r10,#-4 @ 0x0ffffffc + ldrb r6,[r1,#2] + ldrb r7,[r1,#3] + orr r4,r4,r5,lsl#8 + ldrb r5,[r1,#4] + orr r4,r4,r6,lsl#16 + ldrb r6,[r1,#5] + orr r4,r4,r7,lsl#24 + ldrb r7,[r1,#6] + and r4,r4,r10 + +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) +# if !defined(_WIN32) + ldr r12,[r11,r12] @ OPENSSL_armcap_P +# endif +# if defined(__APPLE__) || defined(_WIN32) + ldr r12,[r12] +# endif +#endif + ldrb r8,[r1,#7] + orr r5,r5,r6,lsl#8 + ldrb r6,[r1,#8] + orr r5,r5,r7,lsl#16 + ldrb r7,[r1,#9] + orr r5,r5,r8,lsl#24 + ldrb r8,[r1,#10] + and r5,r5,r3 + +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + tst r12,#ARMV7_NEON @ check for NEON +# ifdef __thumb2__ + adr r9,.Lpoly1305_blocks_neon + adr r11,.Lpoly1305_blocks + it ne + movne r11,r9 + adr r12,.Lpoly1305_emit + orr r11,r11,#1 @ thumb-ify addresses + orr r12,r12,#1 +# else + add r12,r11,#(.Lpoly1305_emit-.Lpoly1305_init) + ite eq + addeq r11,r11,#(.Lpoly1305_blocks-.Lpoly1305_init) + addne r11,r11,#(.Lpoly1305_blocks_neon-.Lpoly1305_init) +# endif +#endif + ldrb r9,[r1,#11] + orr r6,r6,r7,lsl#8 + ldrb r7,[r1,#12] + orr r6,r6,r8,lsl#16 + ldrb r8,[r1,#13] + orr r6,r6,r9,lsl#24 + ldrb r9,[r1,#14] + and r6,r6,r3 + + ldrb r10,[r1,#15] + orr r7,r7,r8,lsl#8 + str r4,[r0,#0] + orr r7,r7,r9,lsl#16 + str r5,[r0,#4] + orr r7,r7,r10,lsl#24 + str r6,[r0,#8] + and r7,r7,r3 + str r7,[r0,#12] +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + stmia r2,{r11,r12} @ fill functions table + mov r0,#1 +#else + mov r0,#0 +#endif +.Lno_key: + ldmia sp!,{r4-r11} +#if __ARM_ARCH__>=5 + bx lr @ bx lr +#else + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size poly1305_init,.-poly1305_init +.type poly1305_blocks,%function +.align 5 +poly1305_blocks: +.Lpoly1305_blocks: + stmdb sp!,{r3-r11,lr} + + ands r2,r2,#-16 + beq .Lno_data + + add r2,r2,r1 @ end pointer + sub sp,sp,#32 + +#if __ARM_ARCH__<7 + ldmia r0,{r4-r12} @ load context + add r0,r0,#20 + str r2,[sp,#16] @ offload stuff + str r0,[sp,#12] +#else + ldr lr,[r0,#36] @ is_base2_26 + ldmia r0!,{r4-r8} @ load hash value + str r2,[sp,#16] @ offload stuff + str r0,[sp,#12] + + adds r9,r4,r5,lsl#26 @ base 2^26 -> base 2^32 + mov r10,r5,lsr#6 + adcs r10,r10,r6,lsl#20 + mov r11,r6,lsr#12 + adcs r11,r11,r7,lsl#14 + mov r12,r7,lsr#18 + adcs r12,r12,r8,lsl#8 + mov r2,#0 + teq lr,#0 + str r2,[r0,#16] @ clear is_base2_26 + adc r2,r2,r8,lsr#24 + + itttt ne + movne r4,r9 @ choose between radixes + movne r5,r10 + movne r6,r11 + movne r7,r12 + ldmia r0,{r9-r12} @ load key + it ne + movne r8,r2 +#endif + + mov lr,r1 + cmp r3,#0 + str r10,[sp,#20] + str r11,[sp,#24] + str r12,[sp,#28] + b .Loop + +.align 4 +.Loop: +#if __ARM_ARCH__<7 + ldrb r0,[lr],#16 @ load input +# ifdef __thumb2__ + it hi +# endif + addhi r8,r8,#1 @ 1<<128 + ldrb r1,[lr,#-15] + ldrb r2,[lr,#-14] + ldrb r3,[lr,#-13] + orr r1,r0,r1,lsl#8 + ldrb r0,[lr,#-12] + orr r2,r1,r2,lsl#16 + ldrb r1,[lr,#-11] + orr r3,r2,r3,lsl#24 + ldrb r2,[lr,#-10] + adds r4,r4,r3 @ accumulate input + + ldrb r3,[lr,#-9] + orr r1,r0,r1,lsl#8 + ldrb r0,[lr,#-8] + orr r2,r1,r2,lsl#16 + ldrb r1,[lr,#-7] + orr r3,r2,r3,lsl#24 + ldrb r2,[lr,#-6] + adcs r5,r5,r3 + + ldrb r3,[lr,#-5] + orr r1,r0,r1,lsl#8 + ldrb r0,[lr,#-4] + orr r2,r1,r2,lsl#16 + ldrb r1,[lr,#-3] + orr r3,r2,r3,lsl#24 + ldrb r2,[lr,#-2] + adcs r6,r6,r3 + + ldrb r3,[lr,#-1] + orr r1,r0,r1,lsl#8 + str lr,[sp,#8] @ offload input pointer + orr r2,r1,r2,lsl#16 + add r10,r10,r10,lsr#2 + orr r3,r2,r3,lsl#24 +#else + ldr r0,[lr],#16 @ load input + it hi + addhi r8,r8,#1 @ padbit + ldr r1,[lr,#-12] + ldr r2,[lr,#-8] + ldr r3,[lr,#-4] +# ifdef __ARMEB__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +# endif + adds r4,r4,r0 @ accumulate input + str lr,[sp,#8] @ offload input pointer + adcs r5,r5,r1 + add r10,r10,r10,lsr#2 + adcs r6,r6,r2 +#endif + add r11,r11,r11,lsr#2 + adcs r7,r7,r3 + add r12,r12,r12,lsr#2 + + umull r2,r3,r5,r9 + adc r8,r8,#0 + umull r0,r1,r4,r9 + umlal r2,r3,r8,r10 + umlal r0,r1,r7,r10 + ldr r10,[sp,#20] @ reload r10 + umlal r2,r3,r6,r12 + umlal r0,r1,r5,r12 + umlal r2,r3,r7,r11 + umlal r0,r1,r6,r11 + umlal r2,r3,r4,r10 + str r0,[sp,#0] @ future r4 + mul r0,r11,r8 + ldr r11,[sp,#24] @ reload r11 + adds r2,r2,r1 @ d1+=d0>>32 + eor r1,r1,r1 + adc lr,r3,#0 @ future r6 + str r2,[sp,#4] @ future r5 + + mul r2,r12,r8 + eor r3,r3,r3 + umlal r0,r1,r7,r12 + ldr r12,[sp,#28] @ reload r12 + umlal r2,r3,r7,r9 + umlal r0,r1,r6,r9 + umlal r2,r3,r6,r10 + umlal r0,r1,r5,r10 + umlal r2,r3,r5,r11 + umlal r0,r1,r4,r11 + umlal r2,r3,r4,r12 + ldr r4,[sp,#0] + mul r8,r9,r8 + ldr r5,[sp,#4] + + adds r6,lr,r0 @ d2+=d1>>32 + ldr lr,[sp,#8] @ reload input pointer + adc r1,r1,#0 + adds r7,r2,r1 @ d3+=d2>>32 + ldr r0,[sp,#16] @ reload end pointer + adc r3,r3,#0 + add r8,r8,r3 @ h4+=d3>>32 + + and r1,r8,#-4 + and r8,r8,#3 + add r1,r1,r1,lsr#2 @ *=5 + adds r4,r4,r1 + adcs r5,r5,#0 + adcs r6,r6,#0 + adcs r7,r7,#0 + adc r8,r8,#0 + + cmp r0,lr @ done yet? + bhi .Loop + + ldr r0,[sp,#12] + add sp,sp,#32 + stmdb r0,{r4-r8} @ store the result + +.Lno_data: +#if __ARM_ARCH__>=5 + ldmia sp!,{r3-r11,pc} +#else + ldmia sp!,{r3-r11,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size poly1305_blocks,.-poly1305_blocks +.type poly1305_emit,%function +.align 5 +poly1305_emit: +.Lpoly1305_emit: + stmdb sp!,{r4-r11} + + ldmia r0,{r3-r7} + +#if __ARM_ARCH__>=7 + ldr ip,[r0,#36] @ is_base2_26 + + adds r8,r3,r4,lsl#26 @ base 2^26 -> base 2^32 + mov r9,r4,lsr#6 + adcs r9,r9,r5,lsl#20 + mov r10,r5,lsr#12 + adcs r10,r10,r6,lsl#14 + mov r11,r6,lsr#18 + adcs r11,r11,r7,lsl#8 + mov r0,#0 + adc r0,r0,r7,lsr#24 + + tst ip,ip + itttt ne + movne r3,r8 + movne r4,r9 + movne r5,r10 + movne r6,r11 + it ne + movne r7,r0 +#endif + + adds r8,r3,#5 @ compare to modulus + adcs r9,r4,#0 + adcs r10,r5,#0 + adcs r11,r6,#0 + adc r0,r7,#0 + tst r0,#4 @ did it carry/borrow? + +#ifdef __thumb2__ + it ne +#endif + movne r3,r8 + ldr r8,[r2,#0] +#ifdef __thumb2__ + it ne +#endif + movne r4,r9 + ldr r9,[r2,#4] +#ifdef __thumb2__ + it ne +#endif + movne r5,r10 + ldr r10,[r2,#8] +#ifdef __thumb2__ + it ne +#endif + movne r6,r11 + ldr r11,[r2,#12] + + adds r3,r3,r8 + adcs r4,r4,r9 + adcs r5,r5,r10 + adc r6,r6,r11 + +#if __ARM_ARCH__>=7 +# ifdef __ARMEB__ + rev r3,r3 + rev r4,r4 + rev r5,r5 + rev r6,r6 +# endif + str r3,[r1,#0] + str r4,[r1,#4] + str r5,[r1,#8] + str r6,[r1,#12] +#else + strb r3,[r1,#0] + mov r3,r3,lsr#8 + strb r4,[r1,#4] + mov r4,r4,lsr#8 + strb r5,[r1,#8] + mov r5,r5,lsr#8 + strb r6,[r1,#12] + mov r6,r6,lsr#8 + + strb r3,[r1,#1] + mov r3,r3,lsr#8 + strb r4,[r1,#5] + mov r4,r4,lsr#8 + strb r5,[r1,#9] + mov r5,r5,lsr#8 + strb r6,[r1,#13] + mov r6,r6,lsr#8 + + strb r3,[r1,#2] + mov r3,r3,lsr#8 + strb r4,[r1,#6] + mov r4,r4,lsr#8 + strb r5,[r1,#10] + mov r5,r5,lsr#8 + strb r6,[r1,#14] + mov r6,r6,lsr#8 + + strb r3,[r1,#3] + strb r4,[r1,#7] + strb r5,[r1,#11] + strb r6,[r1,#15] +#endif + ldmia sp!,{r4-r11} +#if __ARM_ARCH__>=5 + bx lr @ bx lr +#else + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size poly1305_emit,.-poly1305_emit +#if __ARM_MAX_ARCH__>=7 +.fpu neon + +.type poly1305_init_neon,%function +.align 5 +poly1305_init_neon: +.Lpoly1305_init_neon: + ldr r3,[r0,#48] @ first table element + cmp r3,#-1 @ is value impossible? + bne .Lno_init_neon + + ldr r4,[r0,#20] @ load key base 2^32 + ldr r5,[r0,#24] + ldr r6,[r0,#28] + ldr r7,[r0,#32] + + and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26 + mov r3,r4,lsr#26 + mov r4,r5,lsr#20 + orr r3,r3,r5,lsl#6 + mov r5,r6,lsr#14 + orr r4,r4,r6,lsl#12 + mov r6,r7,lsr#8 + orr r5,r5,r7,lsl#18 + and r3,r3,#0x03ffffff + and r4,r4,#0x03ffffff + and r5,r5,#0x03ffffff + + vdup.32 d0,r2 @ r^1 in both lanes + add r2,r3,r3,lsl#2 @ *5 + vdup.32 d1,r3 + add r3,r4,r4,lsl#2 + vdup.32 d2,r2 + vdup.32 d3,r4 + add r4,r5,r5,lsl#2 + vdup.32 d4,r3 + vdup.32 d5,r5 + add r5,r6,r6,lsl#2 + vdup.32 d6,r4 + vdup.32 d7,r6 + vdup.32 d8,r5 + + mov r5,#2 @ counter + +.Lsquare_neon: + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 + @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 + @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 + @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 + @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 + + vmull.u32 q5,d0,d0[1] + vmull.u32 q6,d1,d0[1] + vmull.u32 q7,d3,d0[1] + vmull.u32 q8,d5,d0[1] + vmull.u32 q9,d7,d0[1] + + vmlal.u32 q5,d7,d2[1] + vmlal.u32 q6,d0,d1[1] + vmlal.u32 q7,d1,d1[1] + vmlal.u32 q8,d3,d1[1] + vmlal.u32 q9,d5,d1[1] + + vmlal.u32 q5,d5,d4[1] + vmlal.u32 q6,d7,d4[1] + vmlal.u32 q8,d1,d3[1] + vmlal.u32 q7,d0,d3[1] + vmlal.u32 q9,d3,d3[1] + + vmlal.u32 q5,d3,d6[1] + vmlal.u32 q8,d0,d5[1] + vmlal.u32 q6,d5,d6[1] + vmlal.u32 q7,d7,d6[1] + vmlal.u32 q9,d1,d5[1] + + vmlal.u32 q8,d7,d8[1] + vmlal.u32 q5,d1,d8[1] + vmlal.u32 q6,d3,d8[1] + vmlal.u32 q7,d5,d8[1] + vmlal.u32 q9,d0,d7[1] + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ lazy reduction as discussed in "NEON crypto" by D.J. Bernstein + @ and P. Schwabe + @ + @ H0>>+H1>>+H2>>+H3>>+H4 + @ H3>>+H4>>*5+H0>>+H1 + @ + @ Trivia. + @ + @ Result of multiplication of n-bit number by m-bit number is + @ n+m bits wide. However! Even though 2^n is a n+1-bit number, + @ m-bit number multiplied by 2^n is still n+m bits wide. + @ + @ Sum of two n-bit numbers is n+1 bits wide, sum of three - n+2, + @ and so is sum of four. Sum of 2^m n-m-bit numbers and n-bit + @ one is n+1 bits wide. + @ + @ >>+ denotes Hnext += Hn>>26, Hn &= 0x3ffffff. This means that + @ H0, H2, H3 are guaranteed to be 26 bits wide, while H1 and H4 + @ can be 27. However! In cases when their width exceeds 26 bits + @ they are limited by 2^26+2^6. This in turn means that *sum* + @ of the products with these values can still be viewed as sum + @ of 52-bit numbers as long as the amount of addends is not a + @ power of 2. For example, + @ + @ H4 = H4*R0 + H3*R1 + H2*R2 + H1*R3 + H0 * R4, + @ + @ which can't be larger than 5 * (2^26 + 2^6) * (2^26 + 2^6), or + @ 5 * (2^52 + 2*2^32 + 2^12), which in turn is smaller than + @ 8 * (2^52) or 2^55. However, the value is then multiplied by + @ by 5, so we should be looking at 5 * 5 * (2^52 + 2^33 + 2^12), + @ which is less than 32 * (2^52) or 2^57. And when processing + @ data we are looking at triple as many addends... + @ + @ In key setup procedure pre-reduced H0 is limited by 5*4+1 and + @ 5*H4 - by 5*5 52-bit addends, or 57 bits. But when hashing the + @ input H0 is limited by (5*4+1)*3 addends, or 58 bits, while + @ 5*H4 by 5*5*3, or 59[!] bits. How is this relevant? vmlal.u32 + @ instruction accepts 2x32-bit input and writes 2x64-bit result. + @ This means that result of reduction have to be compressed upon + @ loop wrap-around. This can be done in the process of reduction + @ to minimize amount of instructions [as well as amount of + @ 128-bit instructions, which benefits low-end processors], but + @ one has to watch for H2 (which is narrower than H0) and 5*H4 + @ not being wider than 58 bits, so that result of right shift + @ by 26 bits fits in 32 bits. This is also useful on x86, + @ because it allows to use paddd in place for paddq, which + @ benefits Atom, where paddq is ridiculously slow. + + vshr.u64 q15,q8,#26 + vmovn.i64 d16,q8 + vshr.u64 q4,q5,#26 + vmovn.i64 d10,q5 + vadd.i64 q9,q9,q15 @ h3 -> h4 + vbic.i32 d16,#0xfc000000 @ &=0x03ffffff + vadd.i64 q6,q6,q4 @ h0 -> h1 + vbic.i32 d10,#0xfc000000 + + vshrn.u64 d30,q9,#26 + vmovn.i64 d18,q9 + vshr.u64 q4,q6,#26 + vmovn.i64 d12,q6 + vadd.i64 q7,q7,q4 @ h1 -> h2 + vbic.i32 d18,#0xfc000000 + vbic.i32 d12,#0xfc000000 + + vadd.i32 d10,d10,d30 + vshl.u32 d30,d30,#2 + vshrn.u64 d8,q7,#26 + vmovn.i64 d14,q7 + vadd.i32 d10,d10,d30 @ h4 -> h0 + vadd.i32 d16,d16,d8 @ h2 -> h3 + vbic.i32 d14,#0xfc000000 + + vshr.u32 d30,d10,#26 + vbic.i32 d10,#0xfc000000 + vshr.u32 d8,d16,#26 + vbic.i32 d16,#0xfc000000 + vadd.i32 d12,d12,d30 @ h0 -> h1 + vadd.i32 d18,d18,d8 @ h3 -> h4 + + subs r5,r5,#1 + beq .Lsquare_break_neon + + add r6,r0,#(48+0*9*4) + add r7,r0,#(48+1*9*4) + + vtrn.32 d0,d10 @ r^2:r^1 + vtrn.32 d3,d14 + vtrn.32 d5,d16 + vtrn.32 d1,d12 + vtrn.32 d7,d18 + + vshl.u32 d4,d3,#2 @ *5 + vshl.u32 d6,d5,#2 + vshl.u32 d2,d1,#2 + vshl.u32 d8,d7,#2 + vadd.i32 d4,d4,d3 + vadd.i32 d2,d2,d1 + vadd.i32 d6,d6,d5 + vadd.i32 d8,d8,d7 + + vst4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! + vst4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! + vst4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]! + vst4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]! + vst1.32 {d8[0]},[r6,:32] + vst1.32 {d8[1]},[r7,:32] + + b .Lsquare_neon + +.align 4 +.Lsquare_break_neon: + add r6,r0,#(48+2*4*9) + add r7,r0,#(48+3*4*9) + + vmov d0,d10 @ r^4:r^3 + vshl.u32 d2,d12,#2 @ *5 + vmov d1,d12 + vshl.u32 d4,d14,#2 + vmov d3,d14 + vshl.u32 d6,d16,#2 + vmov d5,d16 + vshl.u32 d8,d18,#2 + vmov d7,d18 + vadd.i32 d2,d2,d12 + vadd.i32 d4,d4,d14 + vadd.i32 d6,d6,d16 + vadd.i32 d8,d8,d18 + + vst4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! + vst4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! + vst4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]! + vst4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]! + vst1.32 {d8[0]},[r6] + vst1.32 {d8[1]},[r7] + +.Lno_init_neon: + bx lr @ bx lr +.size poly1305_init_neon,.-poly1305_init_neon + +.type poly1305_blocks_neon,%function +.align 5 +poly1305_blocks_neon: +.Lpoly1305_blocks_neon: + ldr ip,[r0,#36] @ is_base2_26 + + cmp r2,#64 + blo .Lpoly1305_blocks + + stmdb sp!,{r4-r7} + vstmdb sp!,{d8-d15} @ ABI specification says so + + tst ip,ip @ is_base2_26? + bne .Lbase2_26_neon + + stmdb sp!,{r1-r3,lr} + bl .Lpoly1305_init_neon + + ldr r4,[r0,#0] @ load hash value base 2^32 + ldr r5,[r0,#4] + ldr r6,[r0,#8] + ldr r7,[r0,#12] + ldr ip,[r0,#16] + + and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26 + mov r3,r4,lsr#26 + veor d10,d10,d10 + mov r4,r5,lsr#20 + orr r3,r3,r5,lsl#6 + veor d12,d12,d12 + mov r5,r6,lsr#14 + orr r4,r4,r6,lsl#12 + veor d14,d14,d14 + mov r6,r7,lsr#8 + orr r5,r5,r7,lsl#18 + veor d16,d16,d16 + and r3,r3,#0x03ffffff + orr r6,r6,ip,lsl#24 + veor d18,d18,d18 + and r4,r4,#0x03ffffff + mov r1,#1 + and r5,r5,#0x03ffffff + str r1,[r0,#36] @ set is_base2_26 + + vmov.32 d10[0],r2 + vmov.32 d12[0],r3 + vmov.32 d14[0],r4 + vmov.32 d16[0],r5 + vmov.32 d18[0],r6 + adr r5,.Lzeros + + ldmia sp!,{r1-r3,lr} + b .Lhash_loaded + +.align 4 +.Lbase2_26_neon: + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ load hash value + + veor d10,d10,d10 + veor d12,d12,d12 + veor d14,d14,d14 + veor d16,d16,d16 + veor d18,d18,d18 + vld4.32 {d10[0],d12[0],d14[0],d16[0]},[r0]! + adr r5,.Lzeros + vld1.32 {d18[0]},[r0] + sub r0,r0,#16 @ rewind + +.Lhash_loaded: + add r4,r1,#32 + mov r3,r3,lsl#24 + tst r2,#31 + beq .Leven + + vld4.32 {d20[0],d22[0],d24[0],d26[0]},[r1]! + vmov.32 d28[0],r3 + sub r2,r2,#16 + add r4,r1,#32 + +# ifdef __ARMEB__ + vrev32.8 q10,q10 + vrev32.8 q13,q13 + vrev32.8 q11,q11 + vrev32.8 q12,q12 +# endif + vsri.u32 d28,d26,#8 @ base 2^32 -> base 2^26 + vshl.u32 d26,d26,#18 + + vsri.u32 d26,d24,#14 + vshl.u32 d24,d24,#12 + vadd.i32 d29,d28,d18 @ add hash value and move to #hi + + vbic.i32 d26,#0xfc000000 + vsri.u32 d24,d22,#20 + vshl.u32 d22,d22,#6 + + vbic.i32 d24,#0xfc000000 + vsri.u32 d22,d20,#26 + vadd.i32 d27,d26,d16 + + vbic.i32 d20,#0xfc000000 + vbic.i32 d22,#0xfc000000 + vadd.i32 d25,d24,d14 + + vadd.i32 d21,d20,d10 + vadd.i32 d23,d22,d12 + + mov r7,r5 + add r6,r0,#48 + + cmp r2,r2 + b .Long_tail + +.align 4 +.Leven: + subs r2,r2,#64 + it lo + movlo r4,r5 + + vmov.i32 q14,#1<<24 @ padbit, yes, always + vld4.32 {d20,d22,d24,d26},[r1] @ inp[0:1] + add r1,r1,#64 + vld4.32 {d21,d23,d25,d27},[r4] @ inp[2:3] (or 0) + add r4,r4,#64 + itt hi + addhi r7,r0,#(48+1*9*4) + addhi r6,r0,#(48+3*9*4) + +# ifdef __ARMEB__ + vrev32.8 q10,q10 + vrev32.8 q13,q13 + vrev32.8 q11,q11 + vrev32.8 q12,q12 +# endif + vsri.u32 q14,q13,#8 @ base 2^32 -> base 2^26 + vshl.u32 q13,q13,#18 + + vsri.u32 q13,q12,#14 + vshl.u32 q12,q12,#12 + + vbic.i32 q13,#0xfc000000 + vsri.u32 q12,q11,#20 + vshl.u32 q11,q11,#6 + + vbic.i32 q12,#0xfc000000 + vsri.u32 q11,q10,#26 + + vbic.i32 q10,#0xfc000000 + vbic.i32 q11,#0xfc000000 + + bls .Lskip_loop + + vld4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! @ load r^2 + vld4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! @ load r^4 + vld4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]! + vld4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]! + b .Loop_neon + +.align 5 +.Loop_neon: + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2 + @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r + @ ___________________/ + @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2 + @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r + @ ___________________/ ____________________/ + @ + @ Note that we start with inp[2:3]*r^2. This is because it + @ doesn't depend on reduction in previous iteration. + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 + @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 + @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 + @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 + @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ inp[2:3]*r^2 + + vadd.i32 d24,d24,d14 @ accumulate inp[0:1] + vmull.u32 q7,d25,d0[1] + vadd.i32 d20,d20,d10 + vmull.u32 q5,d21,d0[1] + vadd.i32 d26,d26,d16 + vmull.u32 q8,d27,d0[1] + vmlal.u32 q7,d23,d1[1] + vadd.i32 d22,d22,d12 + vmull.u32 q6,d23,d0[1] + + vadd.i32 d28,d28,d18 + vmull.u32 q9,d29,d0[1] + subs r2,r2,#64 + vmlal.u32 q5,d29,d2[1] + it lo + movlo r4,r5 + vmlal.u32 q8,d25,d1[1] + vld1.32 d8[1],[r7,:32] + vmlal.u32 q6,d21,d1[1] + vmlal.u32 q9,d27,d1[1] + + vmlal.u32 q5,d27,d4[1] + vmlal.u32 q8,d23,d3[1] + vmlal.u32 q9,d25,d3[1] + vmlal.u32 q6,d29,d4[1] + vmlal.u32 q7,d21,d3[1] + + vmlal.u32 q8,d21,d5[1] + vmlal.u32 q5,d25,d6[1] + vmlal.u32 q9,d23,d5[1] + vmlal.u32 q6,d27,d6[1] + vmlal.u32 q7,d29,d6[1] + + vmlal.u32 q8,d29,d8[1] + vmlal.u32 q5,d23,d8[1] + vmlal.u32 q9,d21,d7[1] + vmlal.u32 q6,d25,d8[1] + vmlal.u32 q7,d27,d8[1] + + vld4.32 {d21,d23,d25,d27},[r4] @ inp[2:3] (or 0) + add r4,r4,#64 + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ (hash+inp[0:1])*r^4 and accumulate + + vmlal.u32 q8,d26,d0[0] + vmlal.u32 q5,d20,d0[0] + vmlal.u32 q9,d28,d0[0] + vmlal.u32 q6,d22,d0[0] + vmlal.u32 q7,d24,d0[0] + vld1.32 d8[0],[r6,:32] + + vmlal.u32 q8,d24,d1[0] + vmlal.u32 q5,d28,d2[0] + vmlal.u32 q9,d26,d1[0] + vmlal.u32 q6,d20,d1[0] + vmlal.u32 q7,d22,d1[0] + + vmlal.u32 q8,d22,d3[0] + vmlal.u32 q5,d26,d4[0] + vmlal.u32 q9,d24,d3[0] + vmlal.u32 q6,d28,d4[0] + vmlal.u32 q7,d20,d3[0] + + vmlal.u32 q8,d20,d5[0] + vmlal.u32 q5,d24,d6[0] + vmlal.u32 q9,d22,d5[0] + vmlal.u32 q6,d26,d6[0] + vmlal.u32 q8,d28,d8[0] + + vmlal.u32 q7,d28,d6[0] + vmlal.u32 q5,d22,d8[0] + vmlal.u32 q9,d20,d7[0] + vmov.i32 q14,#1<<24 @ padbit, yes, always + vmlal.u32 q6,d24,d8[0] + vmlal.u32 q7,d26,d8[0] + + vld4.32 {d20,d22,d24,d26},[r1] @ inp[0:1] + add r1,r1,#64 +# ifdef __ARMEB__ + vrev32.8 q10,q10 + vrev32.8 q11,q11 + vrev32.8 q12,q12 + vrev32.8 q13,q13 +# endif + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ lazy reduction interleaved with base 2^32 -> base 2^26 of + @ inp[0:3] previously loaded to q10-q13 and smashed to q10-q14. + + vshr.u64 q15,q8,#26 + vmovn.i64 d16,q8 + vshr.u64 q4,q5,#26 + vmovn.i64 d10,q5 + vadd.i64 q9,q9,q15 @ h3 -> h4 + vbic.i32 d16,#0xfc000000 + vsri.u32 q14,q13,#8 @ base 2^32 -> base 2^26 + vadd.i64 q6,q6,q4 @ h0 -> h1 + vshl.u32 q13,q13,#18 + vbic.i32 d10,#0xfc000000 + + vshrn.u64 d30,q9,#26 + vmovn.i64 d18,q9 + vshr.u64 q4,q6,#26 + vmovn.i64 d12,q6 + vadd.i64 q7,q7,q4 @ h1 -> h2 + vsri.u32 q13,q12,#14 + vbic.i32 d18,#0xfc000000 + vshl.u32 q12,q12,#12 + vbic.i32 d12,#0xfc000000 + + vadd.i32 d10,d10,d30 + vshl.u32 d30,d30,#2 + vbic.i32 q13,#0xfc000000 + vshrn.u64 d8,q7,#26 + vmovn.i64 d14,q7 + vaddl.u32 q5,d10,d30 @ h4 -> h0 [widen for a sec] + vsri.u32 q12,q11,#20 + vadd.i32 d16,d16,d8 @ h2 -> h3 + vshl.u32 q11,q11,#6 + vbic.i32 d14,#0xfc000000 + vbic.i32 q12,#0xfc000000 + + vshrn.u64 d30,q5,#26 @ re-narrow + vmovn.i64 d10,q5 + vsri.u32 q11,q10,#26 + vbic.i32 q10,#0xfc000000 + vshr.u32 d8,d16,#26 + vbic.i32 d16,#0xfc000000 + vbic.i32 d10,#0xfc000000 + vadd.i32 d12,d12,d30 @ h0 -> h1 + vadd.i32 d18,d18,d8 @ h3 -> h4 + vbic.i32 q11,#0xfc000000 + + bhi .Loop_neon + +.Lskip_loop: + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1 + + add r7,r0,#(48+0*9*4) + add r6,r0,#(48+1*9*4) + adds r2,r2,#32 + it ne + movne r2,#0 + bne .Long_tail + + vadd.i32 d25,d24,d14 @ add hash value and move to #hi + vadd.i32 d21,d20,d10 + vadd.i32 d27,d26,d16 + vadd.i32 d23,d22,d12 + vadd.i32 d29,d28,d18 + +.Long_tail: + vld4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! @ load r^1 + vld4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! @ load r^2 + + vadd.i32 d24,d24,d14 @ can be redundant + vmull.u32 q7,d25,d0 + vadd.i32 d20,d20,d10 + vmull.u32 q5,d21,d0 + vadd.i32 d26,d26,d16 + vmull.u32 q8,d27,d0 + vadd.i32 d22,d22,d12 + vmull.u32 q6,d23,d0 + vadd.i32 d28,d28,d18 + vmull.u32 q9,d29,d0 + + vmlal.u32 q5,d29,d2 + vld4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]! + vmlal.u32 q8,d25,d1 + vld4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]! + vmlal.u32 q6,d21,d1 + vmlal.u32 q9,d27,d1 + vmlal.u32 q7,d23,d1 + + vmlal.u32 q8,d23,d3 + vld1.32 d8[1],[r7,:32] + vmlal.u32 q5,d27,d4 + vld1.32 d8[0],[r6,:32] + vmlal.u32 q9,d25,d3 + vmlal.u32 q6,d29,d4 + vmlal.u32 q7,d21,d3 + + vmlal.u32 q8,d21,d5 + it ne + addne r7,r0,#(48+2*9*4) + vmlal.u32 q5,d25,d6 + it ne + addne r6,r0,#(48+3*9*4) + vmlal.u32 q9,d23,d5 + vmlal.u32 q6,d27,d6 + vmlal.u32 q7,d29,d6 + + vmlal.u32 q8,d29,d8 + vorn q0,q0,q0 @ all-ones, can be redundant + vmlal.u32 q5,d23,d8 + vshr.u64 q0,q0,#38 + vmlal.u32 q9,d21,d7 + vmlal.u32 q6,d25,d8 + vmlal.u32 q7,d27,d8 + + beq .Lshort_tail + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ (hash+inp[0:1])*r^4:r^3 and accumulate + + vld4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! @ load r^3 + vld4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! @ load r^4 + + vmlal.u32 q7,d24,d0 + vmlal.u32 q5,d20,d0 + vmlal.u32 q8,d26,d0 + vmlal.u32 q6,d22,d0 + vmlal.u32 q9,d28,d0 + + vmlal.u32 q5,d28,d2 + vld4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]! + vmlal.u32 q8,d24,d1 + vld4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]! + vmlal.u32 q6,d20,d1 + vmlal.u32 q9,d26,d1 + vmlal.u32 q7,d22,d1 + + vmlal.u32 q8,d22,d3 + vld1.32 d8[1],[r7,:32] + vmlal.u32 q5,d26,d4 + vld1.32 d8[0],[r6,:32] + vmlal.u32 q9,d24,d3 + vmlal.u32 q6,d28,d4 + vmlal.u32 q7,d20,d3 + + vmlal.u32 q8,d20,d5 + vmlal.u32 q5,d24,d6 + vmlal.u32 q9,d22,d5 + vmlal.u32 q6,d26,d6 + vmlal.u32 q7,d28,d6 + + vmlal.u32 q8,d28,d8 + vorn q0,q0,q0 @ all-ones + vmlal.u32 q5,d22,d8 + vshr.u64 q0,q0,#38 + vmlal.u32 q9,d20,d7 + vmlal.u32 q6,d24,d8 + vmlal.u32 q7,d26,d8 + +.Lshort_tail: + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ horizontal addition + + vadd.i64 d16,d16,d17 + vadd.i64 d10,d10,d11 + vadd.i64 d18,d18,d19 + vadd.i64 d12,d12,d13 + vadd.i64 d14,d14,d15 + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ lazy reduction, but without narrowing + + vshr.u64 q15,q8,#26 + vand.i64 q8,q8,q0 + vshr.u64 q4,q5,#26 + vand.i64 q5,q5,q0 + vadd.i64 q9,q9,q15 @ h3 -> h4 + vadd.i64 q6,q6,q4 @ h0 -> h1 + + vshr.u64 q15,q9,#26 + vand.i64 q9,q9,q0 + vshr.u64 q4,q6,#26 + vand.i64 q6,q6,q0 + vadd.i64 q7,q7,q4 @ h1 -> h2 + + vadd.i64 q5,q5,q15 + vshl.u64 q15,q15,#2 + vshr.u64 q4,q7,#26 + vand.i64 q7,q7,q0 + vadd.i64 q5,q5,q15 @ h4 -> h0 + vadd.i64 q8,q8,q4 @ h2 -> h3 + + vshr.u64 q15,q5,#26 + vand.i64 q5,q5,q0 + vshr.u64 q4,q8,#26 + vand.i64 q8,q8,q0 + vadd.i64 q6,q6,q15 @ h0 -> h1 + vadd.i64 q9,q9,q4 @ h3 -> h4 + + cmp r2,#0 + bne .Leven + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ store hash value + + vst4.32 {d10[0],d12[0],d14[0],d16[0]},[r0]! + vst1.32 {d18[0]},[r0] + + vldmia sp!,{d8-d15} @ epilogue + ldmia sp!,{r4-r7} + bx lr @ bx lr +.size poly1305_blocks_neon,.-poly1305_blocks_neon + +.align 5 +.Lzeros: +.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +#ifndef __KERNEL__ +.LOPENSSL_armcap: +# ifdef _WIN32 +.word OPENSSL_armcap_P +# else +.word OPENSSL_armcap_P-.Lpoly1305_init +# endif +.comm OPENSSL_armcap_P,4,4 +.hidden OPENSSL_armcap_P +#endif +#endif +.asciz "Poly1305 for ARMv4/NEON, CRYPTOGAMS by @dot-asm" +.align 2 diff --git a/arch/arm/crypto/poly1305-glue.c b/arch/arm/crypto/poly1305-glue.c new file mode 100644 index 000000000000..74a725ac89c9 --- /dev/null +++ b/arch/arm/crypto/poly1305-glue.c @@ -0,0 +1,276 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM + * + * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org> + */ + +#include <asm/hwcap.h> +#include <asm/neon.h> +#include <asm/simd.h> +#include <asm/unaligned.h> +#include <crypto/algapi.h> +#include <crypto/internal/hash.h> +#include <crypto/internal/poly1305.h> +#include <crypto/internal/simd.h> +#include <linux/cpufeature.h> +#include <linux/crypto.h> +#include <linux/jump_label.h> +#include <linux/module.h> + +void poly1305_init_arm(void *state, const u8 *key); +void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit); +void poly1305_emit_arm(void *state, __le32 *digest, const u32 *nonce); + +void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit) +{ +} + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); + +void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) +{ + poly1305_init_arm(&dctx->h, key); + dctx->s[0] = get_unaligned_le32(key + 16); + dctx->s[1] = get_unaligned_le32(key + 20); + dctx->s[2] = get_unaligned_le32(key + 24); + dctx->s[3] = get_unaligned_le32(key + 28); + dctx->buflen = 0; +} +EXPORT_SYMBOL(poly1305_init_arch); + +static int arm_poly1305_init(struct shash_desc *desc) +{ + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); + + dctx->buflen = 0; + dctx->rset = 0; + dctx->sset = false; + + return 0; +} + +static void arm_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, + u32 len, u32 hibit, bool do_neon) +{ + if (unlikely(!dctx->sset)) { + if (!dctx->rset) { + poly1305_init_arm(&dctx->h, src); + src += POLY1305_BLOCK_SIZE; + len -= POLY1305_BLOCK_SIZE; + dctx->rset = 1; + } + if (len >= POLY1305_BLOCK_SIZE) { + dctx->s[0] = get_unaligned_le32(src + 0); + dctx->s[1] = get_unaligned_le32(src + 4); + dctx->s[2] = get_unaligned_le32(src + 8); + dctx->s[3] = get_unaligned_le32(src + 12); + src += POLY1305_BLOCK_SIZE; + len -= POLY1305_BLOCK_SIZE; + dctx->sset = true; + } + if (len < POLY1305_BLOCK_SIZE) + return; + } + + len &= ~(POLY1305_BLOCK_SIZE - 1); + + if (static_branch_likely(&have_neon) && likely(do_neon)) + poly1305_blocks_neon(&dctx->h, src, len, hibit); + else + poly1305_blocks_arm(&dctx->h, src, len, hibit); +} + +static void arm_poly1305_do_update(struct poly1305_desc_ctx *dctx, + const u8 *src, u32 len, bool do_neon) +{ + if (unlikely(dctx->buflen)) { + u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen); + + memcpy(dctx->buf + dctx->buflen, src, bytes); + src += bytes; + len -= bytes; + dctx->buflen += bytes; + + if (dctx->buflen == POLY1305_BLOCK_SIZE) { + arm_poly1305_blocks(dctx, dctx->buf, + POLY1305_BLOCK_SIZE, 1, false); + dctx->buflen = 0; + } + } + + if (likely(len >= POLY1305_BLOCK_SIZE)) { + arm_poly1305_blocks(dctx, src, len, 1, do_neon); + src += round_down(len, POLY1305_BLOCK_SIZE); + len %= POLY1305_BLOCK_SIZE; + } + + if (unlikely(len)) { + dctx->buflen = len; + memcpy(dctx->buf, src, len); + } +} + +static int arm_poly1305_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen) +{ + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); + + arm_poly1305_do_update(dctx, src, srclen, false); + return 0; +} + +static int __maybe_unused arm_poly1305_update_neon(struct shash_desc *desc, + const u8 *src, + unsigned int srclen) +{ + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); + bool do_neon = crypto_simd_usable() && srclen > 128; + + if (static_branch_likely(&have_neon) && do_neon) + kernel_neon_begin(); + arm_poly1305_do_update(dctx, src, srclen, do_neon); + if (static_branch_likely(&have_neon) && do_neon) + kernel_neon_end(); + return 0; +} + +void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, + unsigned int nbytes) +{ + bool do_neon = IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && + crypto_simd_usable(); + + if (unlikely(dctx->buflen)) { + u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen); + + memcpy(dctx->buf + dctx->buflen, src, bytes); + src += bytes; + nbytes -= bytes; + dctx->buflen += bytes; + + if (dctx->buflen == POLY1305_BLOCK_SIZE) { + poly1305_blocks_arm(&dctx->h, dctx->buf, + POLY1305_BLOCK_SIZE, 1); + dctx->buflen = 0; + } + } + + if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { + unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); + + if (static_branch_likely(&have_neon) && do_neon) { + kernel_neon_begin(); + poly1305_blocks_neon(&dctx->h, src, len, 1); + kernel_neon_end(); + } else { + poly1305_blocks_arm(&dctx->h, src, len, 1); + } + src += len; + nbytes %= POLY1305_BLOCK_SIZE; + } + + if (unlikely(nbytes)) { + dctx->buflen = nbytes; + memcpy(dctx->buf, src, nbytes); + } +} +EXPORT_SYMBOL(poly1305_update_arch); + +void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) +{ + __le32 digest[4]; + u64 f = 0; + + if (unlikely(dctx->buflen)) { + dctx->buf[dctx->buflen++] = 1; + memset(dctx->buf + dctx->buflen, 0, + POLY1305_BLOCK_SIZE - dctx->buflen); + poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); + } + + poly1305_emit_arm(&dctx->h, digest, dctx->s); + + /* mac = (h + s) % (2^128) */ + f = (f >> 32) + le32_to_cpu(digest[0]); + put_unaligned_le32(f, dst); + f = (f >> 32) + le32_to_cpu(digest[1]); + put_unaligned_le32(f, dst + 4); + f = (f >> 32) + le32_to_cpu(digest[2]); + put_unaligned_le32(f, dst + 8); + f = (f >> 32) + le32_to_cpu(digest[3]); + put_unaligned_le32(f, dst + 12); + + *dctx = (struct poly1305_desc_ctx){}; +} +EXPORT_SYMBOL(poly1305_final_arch); + +static int arm_poly1305_final(struct shash_desc *desc, u8 *dst) +{ + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); + + if (unlikely(!dctx->sset)) + return -ENOKEY; + + poly1305_final_arch(dctx, dst); + return 0; +} + +static struct shash_alg arm_poly1305_algs[] = {{ + .init = arm_poly1305_init, + .update = arm_poly1305_update, + .final = arm_poly1305_final, + .digestsize = POLY1305_DIGEST_SIZE, + .descsize = sizeof(struct poly1305_desc_ctx), + + .base.cra_name = "poly1305", + .base.cra_driver_name = "poly1305-arm", + .base.cra_priority = 150, + .base.cra_blocksize = POLY1305_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, +#ifdef CONFIG_KERNEL_MODE_NEON +}, { + .init = arm_poly1305_init, + .update = arm_poly1305_update_neon, + .final = arm_poly1305_final, + .digestsize = POLY1305_DIGEST_SIZE, + .descsize = sizeof(struct poly1305_desc_ctx), + + .base.cra_name = "poly1305", + .base.cra_driver_name = "poly1305-neon", + .base.cra_priority = 200, + .base.cra_blocksize = POLY1305_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, +#endif +}}; + +static int __init arm_poly1305_mod_init(void) +{ + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && + (elf_hwcap & HWCAP_NEON)) + static_branch_enable(&have_neon); + else + /* register only the first entry */ + return crypto_register_shash(&arm_poly1305_algs[0]); + + return crypto_register_shashes(arm_poly1305_algs, + ARRAY_SIZE(arm_poly1305_algs)); +} + +static void __exit arm_poly1305_mod_exit(void) +{ + if (!static_branch_likely(&have_neon)) { + crypto_unregister_shash(&arm_poly1305_algs[0]); + return; + } + crypto_unregister_shashes(arm_poly1305_algs, + ARRAY_SIZE(arm_poly1305_algs)); +} + +module_init(arm_poly1305_mod_init); +module_exit(arm_poly1305_mod_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("poly1305"); +MODULE_ALIAS_CRYPTO("poly1305-arm"); +MODULE_ALIAS_CRYPTO("poly1305-neon"); diff --git a/arch/arm/crypto/sha1-ce-core.S b/arch/arm/crypto/sha1-ce-core.S index 49a74a441aec..8a702e051738 100644 --- a/arch/arm/crypto/sha1-ce-core.S +++ b/arch/arm/crypto/sha1-ce-core.S @@ -10,6 +10,7 @@ #include <asm/assembler.h> .text + .arch armv8-a .fpu crypto-neon-fp-armv8 k0 .req q0 diff --git a/arch/arm/crypto/sha2-ce-core.S b/arch/arm/crypto/sha2-ce-core.S index 4ad517577e23..b6369d2440a1 100644 --- a/arch/arm/crypto/sha2-ce-core.S +++ b/arch/arm/crypto/sha2-ce-core.S @@ -10,6 +10,7 @@ #include <asm/assembler.h> .text + .arch armv8-a .fpu crypto-neon-fp-armv8 k0 .req q7 diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h index 567dbede4785..f1d0a7807cd0 100644 --- a/arch/arm/include/asm/domain.h +++ b/arch/arm/include/asm/domain.h @@ -82,7 +82,7 @@ #ifndef __ASSEMBLY__ #ifdef CONFIG_CPU_CP15_MMU -static inline unsigned int get_domain(void) +static __always_inline unsigned int get_domain(void) { unsigned int domain; @@ -94,7 +94,7 @@ static inline unsigned int get_domain(void) return domain; } -static inline void set_domain(unsigned val) +static __always_inline void set_domain(unsigned int val) { asm volatile( "mcr p15, 0, %0, c3, c0 @ set domain" @@ -102,12 +102,12 @@ static inline void set_domain(unsigned val) isb(); } #else -static inline unsigned int get_domain(void) +static __always_inline unsigned int get_domain(void) { return 0; } -static inline void set_domain(unsigned val) +static __always_inline void set_domain(unsigned int val) { } #endif diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index 0125aa059d5b..9c04bd810d07 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -162,6 +162,7 @@ #define HSR_ISV (_AC(1, UL) << HSR_ISV_SHIFT) #define HSR_SRT_SHIFT (16) #define HSR_SRT_MASK (0xf << HSR_SRT_SHIFT) +#define HSR_CM (1 << 8) #define HSR_FSC (0x3f) #define HSR_FSC_TYPE (0x3c) #define HSR_SSE (1 << 21) diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 40002416efec..9b118516d2db 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -95,12 +95,12 @@ static inline unsigned long *vcpu_hcr(const struct kvm_vcpu *vcpu) return (unsigned long *)&vcpu->arch.hcr; } -static inline void vcpu_clear_wfe_traps(struct kvm_vcpu *vcpu) +static inline void vcpu_clear_wfx_traps(struct kvm_vcpu *vcpu) { vcpu->arch.hcr &= ~HCR_TWE; } -static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu) +static inline void vcpu_set_wfx_traps(struct kvm_vcpu *vcpu) { vcpu->arch.hcr |= HCR_TWE; } @@ -167,6 +167,11 @@ static inline bool kvm_vcpu_dabt_isvalid(struct kvm_vcpu *vcpu) return kvm_vcpu_get_hsr(vcpu) & HSR_ISV; } +static inline unsigned long kvm_vcpu_dabt_iss_nisv_sanitized(const struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_get_hsr(vcpu) & (HSR_CM | HSR_WNR | HSR_FSC); +} + static inline bool kvm_vcpu_dabt_iswrite(struct kvm_vcpu *vcpu) { return kvm_vcpu_get_hsr(vcpu) & HSR_WNR; diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 8a37c8e89777..556cd818eccf 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -7,6 +7,7 @@ #ifndef __ARM_KVM_HOST_H__ #define __ARM_KVM_HOST_H__ +#include <linux/arm-smccc.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/kvm_types.h> @@ -38,6 +39,7 @@ KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1) #define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2) +#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3) DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); @@ -76,6 +78,14 @@ struct kvm_arch { /* Mandated version of PSCI */ u32 psci_version; + + /* + * If we encounter a data abort without valid instruction syndrome + * information, report this to user space. User space can (and + * should) opt in to this feature if KVM_CAP_ARM_NISV_TO_USER is + * supported. + */ + bool return_nisv_io_abort_to_user; }; #define KVM_NR_MEM_OBJS 40 @@ -323,6 +333,29 @@ static inline int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext) int kvm_perf_init(void); int kvm_perf_teardown(void); +static inline long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu) +{ + return SMCCC_RET_NOT_SUPPORTED; +} + +static inline gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu) +{ + return GPA_INVALID; +} + +static inline void kvm_update_stolen_time(struct kvm_vcpu *vcpu) +{ +} + +static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch) +{ +} + +static inline bool kvm_arm_is_pvtime_enabled(struct kvm_vcpu_arch *vcpu_arch) +{ + return false; +} + void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 303248e5b990..98c6b91be4a8 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -22,7 +22,7 @@ * perform such accesses (eg, via list poison values) which could then * be exploited for priviledge escalation. */ -static inline unsigned int uaccess_save_and_enable(void) +static __always_inline unsigned int uaccess_save_and_enable(void) { #ifdef CONFIG_CPU_SW_DOMAIN_PAN unsigned int old_domain = get_domain(); @@ -37,7 +37,7 @@ static inline unsigned int uaccess_save_and_enable(void) #endif } -static inline void uaccess_restore(unsigned int flags) +static __always_inline void uaccess_restore(unsigned int flags) { #ifdef CONFIG_CPU_SW_DOMAIN_PAN /* Restore the user access mask */ diff --git a/arch/arm/include/asm/xen/xen-ops.h b/arch/arm/include/asm/xen/xen-ops.h deleted file mode 100644 index ec154e719b11..000000000000 --- a/arch/arm/include/asm/xen/xen-ops.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_XEN_OPS_H -#define _ASM_XEN_OPS_H - -void xen_efi_runtime_setup(void); - -#endif /* _ASM_XEN_OPS_H */ diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 2769360f195c..03cd7c19a683 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -131,8 +131,9 @@ struct kvm_vcpu_events { struct { __u8 serror_pending; __u8 serror_has_esr; + __u8 ext_dabt_pending; /* Align it to 8 bytes */ - __u8 pad[6]; + __u8 pad[5]; __u64 serror_esr; } exception; __u32 reserved[12]; diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index a7810be07da1..4a3982812a40 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -68,7 +68,7 @@ ENDPROC(__vet_atags) * The following fragment of code is executed with the MMU on in MMU mode, * and uses absolute addresses; this is not position independent. * - * r0 = cp#15 control register + * r0 = cp#15 control register (exc_ret for M-class) * r1 = machine ID * r2 = atags/dtb pointer * r9 = processor ID @@ -137,7 +137,8 @@ __mmap_switched_data: #ifdef CONFIG_CPU_CP15 .long cr_alignment @ r3 #else - .long 0 @ r3 +M_CLASS(.long exc_ret) @ r3 +AR_CLASS(.long 0) @ r3 #endif .size __mmap_switched_data, . - __mmap_switched_data diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index afa350f44dea..0fc814bbc34b 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -201,6 +201,8 @@ M_CLASS(streq r3, [r12, #PMSAv8_MAIR1]) bic r0, r0, #V7M_SCB_CCR_IC #endif str r0, [r12, V7M_SCB_CCR] + /* Pass exc_ret to __mmap_switched */ + mov r0, r10 #endif /* CONFIG_CPU_CP15 elif CONFIG_CPU_V7M */ ret lr ENDPROC(__after_proc_init) diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index b76b75bd9e00..e442d82821df 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -24,7 +24,7 @@ obj-y += kvm-arm.o init.o interrupts.o obj-y += handle_exit.o guest.o emulate.o reset.o obj-y += coproc.o coproc_a15.o coproc_a7.o vgic-v3-coproc.o obj-y += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o -obj-y += $(KVM)/arm/psci.o $(KVM)/arm/perf.o +obj-y += $(KVM)/arm/psci.o $(KVM)/arm/perf.o $(KVM)/arm/hypercalls.o obj-y += $(KVM)/arm/aarch32.o obj-y += $(KVM)/arm/vgic/vgic.o diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 684cf64b4033..0e6f23504c26 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -21,6 +21,10 @@ #define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU } struct kvm_stats_debugfs_item debugfs_entries[] = { + VCPU_STAT(halt_successful_poll), + VCPU_STAT(halt_attempted_poll), + VCPU_STAT(halt_poll_invalid), + VCPU_STAT(halt_wakeup), VCPU_STAT(hvc_exit_stat), VCPU_STAT(wfe_exit_stat), VCPU_STAT(wfi_exit_stat), @@ -255,6 +259,12 @@ int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, { events->exception.serror_pending = !!(*vcpu_hcr(vcpu) & HCR_VA); + /* + * We never return a pending ext_dabt here because we deliver it to + * the virtual CPU directly when setting the event and it's no longer + * 'pending' at this point. + */ + return 0; } @@ -263,12 +273,16 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, { bool serror_pending = events->exception.serror_pending; bool has_esr = events->exception.serror_has_esr; + bool ext_dabt_pending = events->exception.ext_dabt_pending; if (serror_pending && has_esr) return -EINVAL; else if (serror_pending) kvm_inject_vabt(vcpu); + if (ext_dabt_pending) + kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); + return 0; } diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 2a6a1394d26e..e58a89d2f13f 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -9,7 +9,7 @@ #include <asm/kvm_emulate.h> #include <asm/kvm_coproc.h> #include <asm/kvm_mmu.h> -#include <kvm/arm_psci.h> +#include <kvm/arm_hypercalls.h> #include <trace/events/kvm.h> #include "trace.h" diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c index 8062412be70f..9fc5c73cc0be 100644 --- a/arch/arm/mach-davinci/dm365.c +++ b/arch/arm/mach-davinci/dm365.c @@ -462,8 +462,8 @@ static s8 dm365_queue_priority_mapping[][2] = { }; static const struct dma_slave_map dm365_edma_map[] = { - { "davinci-mcbsp.0", "tx", EDMA_FILTER_PARAM(0, 2) }, - { "davinci-mcbsp.0", "rx", EDMA_FILTER_PARAM(0, 3) }, + { "davinci-mcbsp", "tx", EDMA_FILTER_PARAM(0, 2) }, + { "davinci-mcbsp", "rx", EDMA_FILTER_PARAM(0, 3) }, { "davinci_voicecodec", "tx", EDMA_FILTER_PARAM(0, 2) }, { "davinci_voicecodec", "rx", EDMA_FILTER_PARAM(0, 3) }, { "spi_davinci.2", "tx", EDMA_FILTER_PARAM(0, 10) }, diff --git a/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c b/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c index dd939e1325c6..29fd13684a68 100644 --- a/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c @@ -763,7 +763,8 @@ static struct omap_hwmod_class_sysconfig am33xx_timer_sysc = { .rev_offs = 0x0000, .sysc_offs = 0x0010, .syss_offs = 0x0014, - .sysc_flags = (SYSC_HAS_SIDLEMODE | SYSC_HAS_SOFTRESET), + .sysc_flags = SYSC_HAS_SIDLEMODE | SYSC_HAS_SOFTRESET | + SYSC_HAS_RESET_STATUS, .idlemodes = (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART | SIDLE_SMART_WKUP), .sysc_fields = &omap_hwmod_sysc_type2, diff --git a/arch/arm/mach-omap2/omap_hwmod_33xx_data.c b/arch/arm/mach-omap2/omap_hwmod_33xx_data.c index 2bcb6345b873..54524775f278 100644 --- a/arch/arm/mach-omap2/omap_hwmod_33xx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_33xx_data.c @@ -231,8 +231,9 @@ static struct omap_hwmod am33xx_control_hwmod = { static struct omap_hwmod_class_sysconfig lcdc_sysc = { .rev_offs = 0x0, .sysc_offs = 0x54, - .sysc_flags = (SYSC_HAS_SIDLEMODE | SYSC_HAS_MIDLEMODE), - .idlemodes = (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART), + .sysc_flags = SYSC_HAS_SIDLEMODE | SYSC_HAS_MIDLEMODE, + .idlemodes = SIDLE_FORCE | SIDLE_NO | SIDLE_SMART | + MSTANDBY_FORCE | MSTANDBY_NO | MSTANDBY_SMART, .sysc_fields = &omap_hwmod_sysc_type2, }; diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c index d942a3357090..de8c54034a5a 100644 --- a/arch/arm/mach-omap2/pdata-quirks.c +++ b/arch/arm/mach-omap2/pdata-quirks.c @@ -89,6 +89,13 @@ static struct iommu_platform_data omap3_iommu_pdata = { .reset_name = "mmu", .assert_reset = omap_device_assert_hardreset, .deassert_reset = omap_device_deassert_hardreset, + .device_enable = omap_device_enable, + .device_idle = omap_device_idle, +}; + +static struct iommu_platform_data omap3_iommu_isp_pdata = { + .device_enable = omap_device_enable, + .device_idle = omap_device_idle, }; static int omap3_sbc_t3730_twl_callback(struct device *dev, @@ -262,21 +269,13 @@ static void __init am3517_evm_legacy_init(void) am35xx_emac_reset(); } -static struct platform_device omap3_rom_rng_device = { - .name = "omap3-rom-rng", - .id = -1, - .dev = { - .platform_data = rx51_secure_rng_call, - }, -}; - static void __init nokia_n900_legacy_init(void) { hsmmc2_internal_input_clk(); mmc_pdata[0].name = "external"; mmc_pdata[1].name = "internal"; - if (omap_type() == OMAP2_DEVICE_TYPE_SEC) { + if (omap_type() != OMAP2_DEVICE_TYPE_GP) { if (IS_ENABLED(CONFIG_ARM_ERRATA_430973)) { pr_info("RX-51: Enabling ARM errata 430973 workaround\n"); /* set IBE to 1 */ @@ -285,9 +284,6 @@ static void __init nokia_n900_legacy_init(void) pr_warn("RX-51: Not enabling ARM errata 430973 workaround\n"); pr_warn("Thumb binaries may crash randomly without this workaround\n"); } - - pr_info("RX-51: Registering OMAP3 HWRNG device\n"); - platform_device_register(&omap3_rom_rng_device); } } @@ -424,6 +420,8 @@ static struct iommu_platform_data omap4_iommu_pdata = { .reset_name = "mmu_cache", .assert_reset = omap_device_assert_hardreset, .deassert_reset = omap_device_deassert_hardreset, + .device_enable = omap_device_enable, + .device_idle = omap_device_idle, }; #endif @@ -617,6 +615,8 @@ static struct of_dev_auxdata omap_auxdata_lookup[] = { #ifdef CONFIG_ARCH_OMAP3 OF_DEV_AUXDATA("ti,omap2-iommu", 0x5d000000, "5d000000.mmu", &omap3_iommu_pdata), + OF_DEV_AUXDATA("ti,omap2-iommu", 0x480bd400, "480bd400.mmu", + &omap3_iommu_isp_pdata), OF_DEV_AUXDATA("ti,omap3-smartreflex-core", 0x480cb000, "480cb000.smartreflex", &omap_sr_pdata[OMAP_SR_CORE]), OF_DEV_AUXDATA("ti,omap3-smartreflex-mpu-iva", 0x480c9000, @@ -627,6 +627,7 @@ static struct of_dev_auxdata omap_auxdata_lookup[] = { OF_DEV_AUXDATA("ti,davinci_mdio", 0x5c030000, "davinci_mdio.0", NULL), OF_DEV_AUXDATA("ti,am3517-emac", 0x5c000000, "davinci_emac.0", &am35xx_emac_pdata), + OF_DEV_AUXDATA("nokia,n900-rom-rng", 0, NULL, rx51_secure_rng_call), /* McBSP modules with sidetone core */ #if IS_ENABLED(CONFIG_SND_SOC_OMAP_MCBSP) OF_DEV_AUXDATA("ti,omap3-mcbsp", 0x49022000, "49022000.mcbsp", &mcbsp_pdata), diff --git a/arch/arm/mach-omap2/pm.c b/arch/arm/mach-omap2/pm.c index 1fde1bf53fb6..7ac9af56762d 100644 --- a/arch/arm/mach-omap2/pm.c +++ b/arch/arm/mach-omap2/pm.c @@ -74,83 +74,6 @@ int omap_pm_clkdms_setup(struct clockdomain *clkdm, void *unused) return 0; } -/* - * This API is to be called during init to set the various voltage - * domains to the voltage as per the opp table. Typically we boot up - * at the nominal voltage. So this function finds out the rate of - * the clock associated with the voltage domain, finds out the correct - * opp entry and sets the voltage domain to the voltage specified - * in the opp entry - */ -static int __init omap2_set_init_voltage(char *vdd_name, char *clk_name, - const char *oh_name) -{ - struct voltagedomain *voltdm; - struct clk *clk; - struct dev_pm_opp *opp; - unsigned long freq, bootup_volt; - struct device *dev; - - if (!vdd_name || !clk_name || !oh_name) { - pr_err("%s: invalid parameters\n", __func__); - goto exit; - } - - if (!strncmp(oh_name, "mpu", 3)) - /* - * All current OMAPs share voltage rail and clock - * source, so CPU0 is used to represent the MPU-SS. - */ - dev = get_cpu_device(0); - else - dev = omap_device_get_by_hwmod_name(oh_name); - - if (IS_ERR(dev)) { - pr_err("%s: Unable to get dev pointer for hwmod %s\n", - __func__, oh_name); - goto exit; - } - - voltdm = voltdm_lookup(vdd_name); - if (!voltdm) { - pr_err("%s: unable to get vdd pointer for vdd_%s\n", - __func__, vdd_name); - goto exit; - } - - clk = clk_get(NULL, clk_name); - if (IS_ERR(clk)) { - pr_err("%s: unable to get clk %s\n", __func__, clk_name); - goto exit; - } - - freq = clk_get_rate(clk); - clk_put(clk); - - opp = dev_pm_opp_find_freq_ceil(dev, &freq); - if (IS_ERR(opp)) { - pr_err("%s: unable to find boot up OPP for vdd_%s\n", - __func__, vdd_name); - goto exit; - } - - bootup_volt = dev_pm_opp_get_voltage(opp); - dev_pm_opp_put(opp); - - if (!bootup_volt) { - pr_err("%s: unable to find voltage corresponding to the bootup OPP for vdd_%s\n", - __func__, vdd_name); - goto exit; - } - - voltdm_scale(voltdm, bootup_volt); - return 0; - -exit: - pr_err("%s: unable to set vdd_%s\n", __func__, vdd_name); - return -EINVAL; -} - #ifdef CONFIG_SUSPEND static int omap_pm_enter(suspend_state_t suspend_state) { @@ -208,25 +131,6 @@ void omap_common_suspend_init(void *pm_suspend) } #endif /* CONFIG_SUSPEND */ -static void __init omap3_init_voltages(void) -{ - if (!soc_is_omap34xx()) - return; - - omap2_set_init_voltage("mpu_iva", "dpll1_ck", "mpu"); - omap2_set_init_voltage("core", "l3_ick", "l3_main"); -} - -static void __init omap4_init_voltages(void) -{ - if (!soc_is_omap44xx()) - return; - - omap2_set_init_voltage("mpu", "dpll_mpu_ck", "mpu"); - omap2_set_init_voltage("core", "l3_div_ck", "l3_main_1"); - omap2_set_init_voltage("iva", "dpll_iva_m5x2_ck", "iva"); -} - int __maybe_unused omap_pm_nop_init(void) { return 0; @@ -246,10 +150,6 @@ int __init omap2_common_pm_late_init(void) omap4_twl_init(); omap_voltage_late_init(); - /* Initialize the voltages */ - omap3_init_voltages(); - omap4_init_voltages(); - /* Smartreflex device init */ omap_devinit_smartreflex(); diff --git a/arch/arm/mach-pxa/icontrol.c b/arch/arm/mach-pxa/icontrol.c index 865b10344ea2..0474a4b1394d 100644 --- a/arch/arm/mach-pxa/icontrol.c +++ b/arch/arm/mach-pxa/icontrol.c @@ -12,6 +12,7 @@ #include <linux/irq.h> #include <linux/platform_device.h> +#include <linux/property.h> #include <linux/gpio.h> #include <asm/mach-types.h> @@ -22,7 +23,6 @@ #include <linux/spi/spi.h> #include <linux/spi/pxa2xx_spi.h> -#include <linux/can/platform/mcp251x.h> #include <linux/regulator/machine.h> #include "generic.h" @@ -69,8 +69,9 @@ static struct pxa2xx_spi_chip mcp251x_chip_info4 = { .gpio_cs = ICONTROL_MCP251x_nCS4 }; -static struct mcp251x_platform_data mcp251x_info = { - .oscillator_frequency = 16E6, +static const struct property_entry mcp251x_properties[] = { + PROPERTY_ENTRY_U32("clock-frequency", 16000000), + {} }; static struct spi_board_info mcp251x_board_info[] = { @@ -79,7 +80,7 @@ static struct spi_board_info mcp251x_board_info[] = { .max_speed_hz = 6500000, .bus_num = 3, .chip_select = 0, - .platform_data = &mcp251x_info, + .properties = mcp251x_properties, .controller_data = &mcp251x_chip_info1, .irq = PXA_GPIO_TO_IRQ(ICONTROL_MCP251x_nIRQ1) }, diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c index da113c8eefbf..b27fc7ac9cea 100644 --- a/arch/arm/mach-pxa/zeus.c +++ b/arch/arm/mach-pxa/zeus.c @@ -13,6 +13,7 @@ #include <linux/leds.h> #include <linux/irq.h> #include <linux/pm.h> +#include <linux/property.h> #include <linux/gpio.h> #include <linux/gpio/machine.h> #include <linux/serial_8250.h> @@ -27,7 +28,6 @@ #include <linux/platform_data/i2c-pxa.h> #include <linux/platform_data/pca953x.h> #include <linux/apm-emulation.h> -#include <linux/can/platform/mcp251x.h> #include <linux/regulator/fixed.h> #include <linux/regulator/machine.h> @@ -428,14 +428,15 @@ static struct gpiod_lookup_table can_regulator_gpiod_table = { }, }; -static struct mcp251x_platform_data zeus_mcp2515_pdata = { - .oscillator_frequency = 16*1000*1000, +static const struct property_entry mcp251x_properties[] = { + PROPERTY_ENTRY_U32("clock-frequency", 16000000), + {} }; static struct spi_board_info zeus_spi_board_info[] = { [0] = { .modalias = "mcp2515", - .platform_data = &zeus_mcp2515_pdata, + .properties = mcp251x_properties, .irq = PXA_GPIO_TO_IRQ(ZEUS_CAN_GPIO), .max_speed_hz = 1*1000*1000, .bus_num = 3, diff --git a/arch/arm/mach-sunxi/mc_smp.c b/arch/arm/mach-sunxi/mc_smp.c index 239084cf8192..26cbce135338 100644 --- a/arch/arm/mach-sunxi/mc_smp.c +++ b/arch/arm/mach-sunxi/mc_smp.c @@ -481,14 +481,18 @@ static void sunxi_mc_smp_cpu_die(unsigned int l_cpu) static int sunxi_cpu_powerdown(unsigned int cpu, unsigned int cluster) { u32 reg; + int gating_bit = cpu; pr_debug("%s: cluster %u cpu %u\n", __func__, cluster, cpu); if (cpu >= SUNXI_CPUS_PER_CLUSTER || cluster >= SUNXI_NR_CLUSTERS) return -EINVAL; + if (is_a83t && cpu == 0) + gating_bit = 4; + /* gate processor power */ reg = readl(prcm_base + PRCM_PWROFF_GATING_REG(cluster)); - reg |= PRCM_PWROFF_GATING_REG_CORE(cpu); + reg |= PRCM_PWROFF_GATING_REG_CORE(gating_bit); writel(reg, prcm_base + PRCM_PWROFF_GATING_REG(cluster)); udelay(20); diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 04b36436cbc0..788c5cf46de5 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -324,7 +324,7 @@ union offset_union { __put32_unaligned_check("strbt", val, addr) static void -do_alignment_finish_ldst(unsigned long addr, unsigned long instr, struct pt_regs *regs, union offset_union offset) +do_alignment_finish_ldst(unsigned long addr, u32 instr, struct pt_regs *regs, union offset_union offset) { if (!LDST_U_BIT(instr)) offset.un = -offset.un; @@ -337,7 +337,7 @@ do_alignment_finish_ldst(unsigned long addr, unsigned long instr, struct pt_regs } static int -do_alignment_ldrhstrh(unsigned long addr, unsigned long instr, struct pt_regs *regs) +do_alignment_ldrhstrh(unsigned long addr, u32 instr, struct pt_regs *regs) { unsigned int rd = RD_BITS(instr); @@ -386,8 +386,7 @@ do_alignment_ldrhstrh(unsigned long addr, unsigned long instr, struct pt_regs *r } static int -do_alignment_ldrdstrd(unsigned long addr, unsigned long instr, - struct pt_regs *regs) +do_alignment_ldrdstrd(unsigned long addr, u32 instr, struct pt_regs *regs) { unsigned int rd = RD_BITS(instr); unsigned int rd2; @@ -449,7 +448,7 @@ do_alignment_ldrdstrd(unsigned long addr, unsigned long instr, } static int -do_alignment_ldrstr(unsigned long addr, unsigned long instr, struct pt_regs *regs) +do_alignment_ldrstr(unsigned long addr, u32 instr, struct pt_regs *regs) { unsigned int rd = RD_BITS(instr); @@ -498,7 +497,7 @@ do_alignment_ldrstr(unsigned long addr, unsigned long instr, struct pt_regs *reg * PU = 10 A B */ static int -do_alignment_ldmstm(unsigned long addr, unsigned long instr, struct pt_regs *regs) +do_alignment_ldmstm(unsigned long addr, u32 instr, struct pt_regs *regs) { unsigned int rd, rn, correction, nr_regs, regbits; unsigned long eaddr, newaddr; @@ -539,7 +538,7 @@ do_alignment_ldmstm(unsigned long addr, unsigned long instr, struct pt_regs *reg * processor for us. */ if (addr != eaddr) { - pr_err("LDMSTM: PC = %08lx, instr = %08lx, " + pr_err("LDMSTM: PC = %08lx, instr = %08x, " "addr = %08lx, eaddr = %08lx\n", instruction_pointer(regs), instr, addr, eaddr); show_regs(regs); @@ -716,10 +715,10 @@ thumb2arm(u16 tinstr) * 2. Register name Rt from ARMv7 is same as Rd from ARMv6 (Rd is Rt) */ static void * -do_alignment_t32_to_handler(unsigned long *pinstr, struct pt_regs *regs, +do_alignment_t32_to_handler(u32 *pinstr, struct pt_regs *regs, union offset_union *poffset) { - unsigned long instr = *pinstr; + u32 instr = *pinstr; u16 tinst1 = (instr >> 16) & 0xffff; u16 tinst2 = instr & 0xffff; @@ -767,17 +766,48 @@ do_alignment_t32_to_handler(unsigned long *pinstr, struct pt_regs *regs, return NULL; } +static int alignment_get_arm(struct pt_regs *regs, u32 *ip, u32 *inst) +{ + u32 instr = 0; + int fault; + + if (user_mode(regs)) + fault = get_user(instr, ip); + else + fault = probe_kernel_address(ip, instr); + + *inst = __mem_to_opcode_arm(instr); + + return fault; +} + +static int alignment_get_thumb(struct pt_regs *regs, u16 *ip, u16 *inst) +{ + u16 instr = 0; + int fault; + + if (user_mode(regs)) + fault = get_user(instr, ip); + else + fault = probe_kernel_address(ip, instr); + + *inst = __mem_to_opcode_thumb16(instr); + + return fault; +} + static int do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { union offset_union uninitialized_var(offset); - unsigned long instr = 0, instrptr; - int (*handler)(unsigned long addr, unsigned long instr, struct pt_regs *regs); + unsigned long instrptr; + int (*handler)(unsigned long addr, u32 instr, struct pt_regs *regs); unsigned int type; - unsigned int fault; + u32 instr = 0; u16 tinstr = 0; int isize = 4; int thumb2_32b = 0; + int fault; if (interrupts_enabled(regs)) local_irq_enable(); @@ -786,15 +816,14 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (thumb_mode(regs)) { u16 *ptr = (u16 *)(instrptr & ~1); - fault = probe_kernel_address(ptr, tinstr); - tinstr = __mem_to_opcode_thumb16(tinstr); + + fault = alignment_get_thumb(regs, ptr, &tinstr); if (!fault) { if (cpu_architecture() >= CPU_ARCH_ARMv7 && IS_T32(tinstr)) { /* Thumb-2 32-bit */ - u16 tinst2 = 0; - fault = probe_kernel_address(ptr + 1, tinst2); - tinst2 = __mem_to_opcode_thumb16(tinst2); + u16 tinst2; + fault = alignment_get_thumb(regs, ptr + 1, &tinst2); instr = __opcode_thumb32_compose(tinstr, tinst2); thumb2_32b = 1; } else { @@ -803,8 +832,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) } } } else { - fault = probe_kernel_address((void *)instrptr, instr); - instr = __mem_to_opcode_arm(instr); + fault = alignment_get_arm(regs, (void *)instrptr, &instr); } if (fault) { @@ -926,7 +954,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) * Oops, we didn't handle the instruction. */ pr_err("Alignment trap: not handling instruction " - "%0*lx at [<%08lx>]\n", + "%0*x at [<%08lx>]\n", isize << 1, isize == 2 ? tinstr : instr, instrptr); ai_skipped += 1; @@ -936,7 +964,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) ai_user += 1; if (ai_usermode & UM_WARN) - printk("Alignment trap: %s (%d) PC=0x%08lx Instr=0x%0*lx " + printk("Alignment trap: %s (%d) PC=0x%08lx Instr=0x%0*x " "Address=0x%08lx FSR 0x%03x\n", current->comm, task_pid_nr(current), instrptr, isize << 1, diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index 9a07916af8dd..7c90b4c615a5 100644 --- a/arch/arm/mm/proc-v7-bugs.c +++ b/arch/arm/mm/proc-v7-bugs.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/arm-smccc.h> #include <linux/kernel.h> -#include <linux/psci.h> #include <linux/smp.h> #include <asm/cp15.h> @@ -75,26 +74,20 @@ static void cpu_v7_spectre_init(void) case ARM_CPU_PART_CORTEX_A72: { struct arm_smccc_res res; - if (psci_ops.smccc_version == SMCCC_VERSION_1_0) - break; + arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_1, &res); + if ((int)res.a0 != 0) + return; - switch (psci_ops.conduit) { - case PSCI_CONDUIT_HVC: - arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_1, &res); - if ((int)res.a0 != 0) - break; + switch (arm_smccc_1_1_get_conduit()) { + case SMCCC_CONDUIT_HVC: per_cpu(harden_branch_predictor_fn, cpu) = call_hvc_arch_workaround_1; cpu_do_switch_mm = cpu_v7_hvc_switch_mm; spectre_v2_method = "hypervisor"; break; - case PSCI_CONDUIT_SMC: - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_1, &res); - if ((int)res.a0 != 0) - break; + case SMCCC_CONDUIT_SMC: per_cpu(harden_branch_predictor_fn, cpu) = call_smc_arch_workaround_1; cpu_do_switch_mm = cpu_v7_smc_switch_mm; diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S index 1448f144e7fb..1a49d503eafc 100644 --- a/arch/arm/mm/proc-v7m.S +++ b/arch/arm/mm/proc-v7m.S @@ -132,13 +132,11 @@ __v7m_setup_cont: dsb mov r6, lr @ save LR ldr sp, =init_thread_union + THREAD_START_SP - stmia sp, {r0-r3, r12} cpsie i svc #0 1: cpsid i - ldr r0, =exc_ret - orr lr, lr, #EXC_RET_THREADMODE_PROCESSSTACK - str lr, [r0] + /* Calculate exc_ret */ + orr r10, lr, #EXC_RET_THREADMODE_PROCESSSTACK ldmia sp, {r0-r3, r12} str r5, [r12, #11 * 4] @ restore the original SVC vector entry mov lr, r6 @ restore LR diff --git a/arch/arm/plat-pxa/ssp.c b/arch/arm/plat-pxa/ssp.c index 9a6e4923bd69..563440315acd 100644 --- a/arch/arm/plat-pxa/ssp.c +++ b/arch/arm/plat-pxa/ssp.c @@ -89,7 +89,7 @@ void pxa_ssp_free(struct ssp_device *ssp) ssp->use_count--; ssp->label = NULL; } else - dev_err(&ssp->pdev->dev, "device already free\n"); + dev_err(ssp->dev, "device already free\n"); mutex_unlock(&ssp_lock); } EXPORT_SYMBOL(pxa_ssp_free); @@ -118,7 +118,7 @@ static int pxa_ssp_probe(struct platform_device *pdev) if (ssp == NULL) return -ENOMEM; - ssp->pdev = pdev; + ssp->dev = dev; ssp->clk = devm_clk_get(dev, NULL); if (IS_ERR(ssp->clk)) diff --git a/arch/arm/xen/Makefile b/arch/arm/xen/Makefile index 7ed28982c4c3..c32d04713ba0 100644 --- a/arch/arm/xen/Makefile +++ b/arch/arm/xen/Makefile @@ -1,3 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only obj-y := enlighten.o hypercall.o grant-table.o p2m.o mm.o -obj-$(CONFIG_XEN_EFI) += efi.o diff --git a/arch/arm/xen/efi.c b/arch/arm/xen/efi.c deleted file mode 100644 index d687a73044bf..000000000000 --- a/arch/arm/xen/efi.c +++ /dev/null @@ -1,28 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (c) 2015, Linaro Limited, Shannon Zhao - */ - -#include <linux/efi.h> -#include <xen/xen-ops.h> -#include <asm/xen/xen-ops.h> - -/* Set XEN EFI runtime services function pointers. Other fields of struct efi, - * e.g. efi.systab, will be set like normal EFI. - */ -void __init xen_efi_runtime_setup(void) -{ - efi.get_time = xen_efi_get_time; - efi.set_time = xen_efi_set_time; - efi.get_wakeup_time = xen_efi_get_wakeup_time; - efi.set_wakeup_time = xen_efi_set_wakeup_time; - efi.get_variable = xen_efi_get_variable; - efi.get_next_variable = xen_efi_get_next_variable; - efi.set_variable = xen_efi_set_variable; - efi.query_variable_info = xen_efi_query_variable_info; - efi.update_capsule = xen_efi_update_capsule; - efi.query_capsule_caps = xen_efi_query_capsule_caps; - efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count; - efi.reset_system = xen_efi_reset_system; -} -EXPORT_SYMBOL_GPL(xen_efi_runtime_setup); diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 1e57692552d9..dd6804a64f1a 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -15,7 +15,6 @@ #include <xen/xen-ops.h> #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> -#include <asm/xen/xen-ops.h> #include <asm/system_misc.h> #include <asm/efi.h> #include <linux/interrupt.h> @@ -437,7 +436,7 @@ EXPORT_SYMBOL_GPL(HYPERVISOR_memory_op); EXPORT_SYMBOL_GPL(HYPERVISOR_physdev_op); EXPORT_SYMBOL_GPL(HYPERVISOR_vcpu_op); EXPORT_SYMBOL_GPL(HYPERVISOR_tmem_op); -EXPORT_SYMBOL_GPL(HYPERVISOR_platform_op); +EXPORT_SYMBOL_GPL(HYPERVISOR_platform_op_raw); EXPORT_SYMBOL_GPL(HYPERVISOR_multicall); EXPORT_SYMBOL_GPL(HYPERVISOR_vm_assist); EXPORT_SYMBOL_GPL(HYPERVISOR_dm_op); diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index 2b2c208408bb..3c7645d7b9b4 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -15,6 +15,7 @@ #include <xen/interface/grant_table.h> #include <xen/interface/memory.h> #include <xen/page.h> +#include <xen/xen-ops.h> #include <xen/swiotlb-xen.h> #include <asm/cacheflush.h> @@ -28,7 +29,10 @@ unsigned long xen_get_swiotlb_free_pages(unsigned int order) for_each_memblock(memory, reg) { if (reg->base < (phys_addr_t)0xffffffff) { - flags |= __GFP_DMA; + if (IS_ENABLED(CONFIG_ZONE_DMA32)) + flags |= __GFP_DMA32; + else + flags |= __GFP_DMA; break; } } @@ -130,7 +134,7 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) return; } -int __init xen_mm_init(void) +static int __init xen_mm_init(void) { struct gnttab_cache_flush cflush; if (!xen_initial_domain()) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 41a9b4257b72..fcc6635666b4 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -67,7 +67,7 @@ config ARM64 select ARCH_USE_QUEUED_SPINLOCKS select ARCH_SUPPORTS_MEMORY_FAILURE select ARCH_SUPPORTS_ATOMIC_RMW - select ARCH_SUPPORTS_INT128 if GCC_VERSION >= 50000 || CC_IS_CLANG + select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG) select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT @@ -110,7 +110,6 @@ config ARM64 select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL select GENERIC_GETTIMEOFDAY - select GENERIC_COMPAT_VDSO if (!CPU_BIG_ENDIAN && COMPAT) select HANDLE_DOMAIN_IRQ select HARDIRQS_SW_RESEND select HAVE_PCI @@ -144,6 +143,8 @@ config ARM64 select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE + select HAVE_DYNAMIC_FTRACE_WITH_REGS \ + if $(cc-option,-fpatchable-function-entry=2) select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_FAST_GUP select HAVE_FTRACE_MCOUNT_RECORD @@ -267,6 +268,10 @@ config GENERIC_CSUM config GENERIC_CALIBRATE_DELAY def_bool y +config ZONE_DMA + bool "Support DMA zone" if EXPERT + default y + config ZONE_DMA32 bool "Support DMA32 zone" if EXPERT default y @@ -539,6 +544,16 @@ config ARM64_ERRATUM_1286807 invalidated has been observed by other observers. The workaround repeats the TLBI+DSB operation. +config ARM64_ERRATUM_1319367 + bool "Cortex-A57/A72: Speculative AT instruction using out-of-context translation regime could cause subsequent request to generate an incorrect translation" + default y + help + This option adds work arounds for ARM Cortex-A57 erratum 1319537 + and A72 erratum 1319367 + + Cortex-A57 and A72 cores could end-up with corrupted TLBs by + speculating an AT instruction during a guest context switch. + If unsure, say Y. config ARM64_ERRATUM_1463225 @@ -559,6 +574,22 @@ config ARM64_ERRATUM_1463225 If unsure, say Y. +config ARM64_ERRATUM_1542419 + bool "Neoverse-N1: workaround mis-ordering of instruction fetches" + default y + help + This option adds a workaround for ARM Neoverse-N1 erratum + 1542419. + + Affected Neoverse-N1 cores could execute a stale instruction when + modified by another CPU. The workaround depends on a firmware + counterpart. + + Workaround the issue by hiding the DIC feature from EL0. This + forces user-space to perform cache maintenance. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y @@ -617,6 +648,23 @@ config CAVIUM_ERRATUM_30115 If unsure, say Y. +config CAVIUM_TX2_ERRATUM_219 + bool "Cavium ThunderX2 erratum 219: PRFM between TTBR change and ISB fails" + default y + help + On Cavium ThunderX2, a load, store or prefetch instruction between a + TTBR update and the corresponding context synchronizing operation can + cause a spurious Data Abort to be delivered to any hardware thread in + the CPU core. + + Work around the issue by avoiding the problematic code sequence and + trapping KVM guest TTBRx_EL1 writes to EL2 when SMT is enabled. The + trap handler performs the corresponding register access, skips the + instruction and ensures context synchronization by virtue of the + exception return. + + If unsure, say Y. + config QCOM_FALKOR_ERRATUM_1003 bool "Falkor E1003: Incorrect translation due to ASID change" default y @@ -829,10 +877,26 @@ config ARM64_PA_BITS default 48 if ARM64_PA_BITS_48 default 52 if ARM64_PA_BITS_52 +choice + prompt "Endianness" + default CPU_LITTLE_ENDIAN + help + Select the endianness of data accesses performed by the CPU. Userspace + applications will need to be compiled and linked for the endianness + that is selected here. + config CPU_BIG_ENDIAN bool "Build big-endian kernel" help - Say Y if you plan on running a kernel in big-endian mode. + Say Y if you plan on running a kernel with a big-endian userspace. + +config CPU_LITTLE_ENDIAN + bool "Build little-endian kernel" + help + Say Y if you plan on running a kernel with a little-endian userspace. + This is usually the case for distributions targeting arm64. + +endchoice config SCHED_MC bool "Multi-core scheduler support" @@ -1159,7 +1223,7 @@ menuconfig COMPAT if COMPAT config KUSER_HELPERS - bool "Enable kuser helpers page for 32 bit applications" + bool "Enable kuser helpers page for 32-bit applications" default y help Warning: disabling this option may break 32-bit user programs. @@ -1185,6 +1249,18 @@ config KUSER_HELPERS Say N here only if you are absolutely certain that you do not need these helpers; otherwise, the safe option is to say Y. +config COMPAT_VDSO + bool "Enable vDSO for 32-bit applications" + depends on !CPU_BIG_ENDIAN && "$(CROSS_COMPILE_COMPAT)" != "" + select GENERIC_COMPAT_VDSO + default y + help + Place in the process address space of 32-bit applications an + ELF shared object providing fast implementations of gettimeofday + and clock_gettime. + + You must have a 32-bit build of glibc 2.22 or later for programs + to seamlessly take advantage of this. menuconfig ARMV8_DEPRECATED bool "Emulate deprecated/obsolete ARMv8 instructions" @@ -1569,6 +1645,7 @@ config CMDLINE config CMDLINE_FORCE bool "Always use the default kernel command string" + depends on CMDLINE != "" help Always use the default kernel command string, even if the boot loader passes other arguments to the kernel. diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 84a3d502c5a5..1fbe24d4fdb6 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -53,22 +53,6 @@ $(warning Detected assembler with broken .inst; disassembly will be unreliable) endif endif -ifeq ($(CONFIG_GENERIC_COMPAT_VDSO), y) - CROSS_COMPILE_COMPAT ?= $(CONFIG_CROSS_COMPILE_COMPAT_VDSO:"%"=%) - - ifeq ($(CONFIG_CC_IS_CLANG), y) - $(warning CROSS_COMPILE_COMPAT is clang, the compat vDSO will not be built) - else ifeq ($(strip $(CROSS_COMPILE_COMPAT)),) - $(warning CROSS_COMPILE_COMPAT not defined or empty, the compat vDSO will not be built) - else ifeq ($(shell which $(CROSS_COMPILE_COMPAT)gcc 2> /dev/null),) - $(error $(CROSS_COMPILE_COMPAT)gcc not found, check CROSS_COMPILE_COMPAT) - else - export CROSS_COMPILE_COMPAT - export CONFIG_COMPAT_VDSO := y - compat_vdso := -DCONFIG_COMPAT_VDSO=1 - endif -endif - KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst) \ $(compat_vdso) $(cc_has_k_constraint) KBUILD_CFLAGS += -fno-asynchronous-unwind-tables @@ -111,6 +95,11 @@ ifeq ($(CONFIG_ARM64_MODULE_PLTS),y) KBUILD_LDS_MODULE += $(srctree)/arch/arm64/kernel/module.lds endif +ifeq ($(CONFIG_DYNAMIC_FTRACE_WITH_REGS),y) + KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY + CC_FLAGS_FTRACE := -fpatchable-function-entry=2 +endif + # Default value head-y := arch/arm64/kernel/head.o diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts index 24f1aac366d6..d5b6e8159a33 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts @@ -63,3 +63,12 @@ reg = <1>; }; }; + +®_dc1sw { + /* + * Ethernet PHY needs 30ms to properly power up and some more + * to initialize. 100ms should be plenty of time to finish + * whole process. + */ + regulator-enable-ramp-delay = <100000>; +}; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinebook.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinebook.dts index 2b6345db7dc0..78c82a665c84 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinebook.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinebook.dts @@ -104,6 +104,7 @@ &ehci0 { phys = <&usbphy 0>; + phy-names = "usb"; status = "okay"; }; @@ -150,6 +151,7 @@ &ohci0 { phys = <&usbphy 0>; + phy-names = "usb"; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts index e6fb9683f213..25099202c52c 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts @@ -159,6 +159,12 @@ }; ®_dc1sw { + /* + * Ethernet PHY needs 30ms to properly power up and some more + * to initialize. 100ms should be plenty of time to finish + * whole process. + */ + regulator-enable-ramp-delay = <100000>; regulator-name = "vcc-phy"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi index 69128a6dfc46..70f4cce6be43 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi @@ -142,15 +142,6 @@ clock-output-names = "ext-osc32k"; }; - pmu { - compatible = "arm,cortex-a53-pmu"; - interrupts = <GIC_SPI 152 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 153 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 154 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 155 IRQ_TYPE_LEVEL_HIGH>; - interrupt-affinity = <&cpu0>, <&cpu1>, <&cpu2>, <&cpu3>; - }; - psci { compatible = "arm,psci-0.2"; method = "smc"; @@ -553,6 +544,7 @@ resets = <&ccu RST_BUS_OHCI1>, <&ccu RST_BUS_EHCI1>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -564,6 +556,7 @@ <&ccu CLK_USB_OHCI1>; resets = <&ccu RST_BUS_OHCI1>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi index 4020a1aafa3e..0d5ea19336a1 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi @@ -547,6 +547,7 @@ resets = <&ccu RST_BUS_OHCI3>, <&ccu RST_BUS_EHCI3>; phys = <&usb2phy 3>; + phy-names = "usb"; status = "disabled"; }; @@ -558,6 +559,7 @@ <&ccu CLK_USB_OHCI3>; resets = <&ccu RST_BUS_OHCI3>; phys = <&usb2phy 3>; + phy-names = "usb"; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/broadcom/stingray/stingray-pinctrl.dtsi b/arch/arm64/boot/dts/broadcom/stingray/stingray-pinctrl.dtsi index 8a3a770e8f2c..56789ccf9454 100644 --- a/arch/arm64/boot/dts/broadcom/stingray/stingray-pinctrl.dtsi +++ b/arch/arm64/boot/dts/broadcom/stingray/stingray-pinctrl.dtsi @@ -42,13 +42,14 @@ pinmux: pinmux@14029c { compatible = "pinctrl-single"; - reg = <0x0014029c 0x250>; + reg = <0x0014029c 0x26c>; #address-cells = <1>; #size-cells = <1>; pinctrl-single,register-width = <32>; pinctrl-single,function-mask = <0xf>; pinctrl-single,gpio-range = < - &range 0 154 MODE_GPIO + &range 0 91 MODE_GPIO + &range 95 60 MODE_GPIO >; range: gpio-range { #pinctrl-single,gpio-range-cells = <3>; diff --git a/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi b/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi index 71e2e34400d4..0098dfdef96c 100644 --- a/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi +++ b/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi @@ -464,8 +464,7 @@ <&pinmux 108 16 27>, <&pinmux 135 77 6>, <&pinmux 141 67 4>, - <&pinmux 145 149 6>, - <&pinmux 151 91 4>; + <&pinmux 145 149 6>; }; i2c1: i2c@e0000 { diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts index d98346da01df..078a5010228c 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts @@ -127,7 +127,7 @@ status = "okay"; i2c-mux@77 { - compatible = "nxp,pca9847"; + compatible = "nxp,pca9547"; reg = <0x77>; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi index 408e0ecdce6a..b032f3890c8c 100644 --- a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi @@ -33,7 +33,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster0_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@1 { @@ -49,7 +49,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster0_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@100 { @@ -65,7 +65,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster1_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@101 { @@ -81,7 +81,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster1_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@200 { @@ -97,7 +97,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster2_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@201 { @@ -113,7 +113,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster2_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@300 { @@ -129,7 +129,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster3_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@301 { @@ -145,7 +145,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster3_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@400 { @@ -161,7 +161,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster4_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@401 { @@ -177,7 +177,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster4_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@500 { @@ -193,7 +193,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster5_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@501 { @@ -209,7 +209,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster5_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@600 { @@ -225,7 +225,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster6_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@601 { @@ -241,7 +241,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster6_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@700 { @@ -257,7 +257,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster7_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@701 { @@ -273,7 +273,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster7_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cluster0_l2: l2-cache0 { @@ -340,9 +340,9 @@ cache-level = <2>; }; - cpu_pw20: cpu-pw20 { + cpu_pw15: cpu-pw15 { compatible = "arm,idle-state"; - idle-state-name = "PW20"; + idle-state-name = "PW15"; arm,psci-suspend-param = <0x0>; entry-latency-us = <2000>; exit-latency-us = <2000>; diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi b/arch/arm64/boot/dts/freescale/imx8mm.dtsi index 5f9d0da196e1..23c8fad7932b 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi @@ -394,7 +394,7 @@ }; sdma2: dma-controller@302c0000 { - compatible = "fsl,imx8mm-sdma", "fsl,imx7d-sdma"; + compatible = "fsl,imx8mm-sdma", "fsl,imx8mq-sdma"; reg = <0x302c0000 0x10000>; interrupts = <GIC_SPI 103 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clk IMX8MM_CLK_SDMA2_ROOT>, @@ -405,7 +405,7 @@ }; sdma3: dma-controller@302b0000 { - compatible = "fsl,imx8mm-sdma", "fsl,imx7d-sdma"; + compatible = "fsl,imx8mm-sdma", "fsl,imx8mq-sdma"; reg = <0x302b0000 0x10000>; interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clk IMX8MM_CLK_SDMA3_ROOT>, @@ -694,7 +694,7 @@ compatible = "fsl,imx8mm-usdhc", "fsl,imx7d-usdhc"; reg = <0x30b40000 0x10000>; interrupts = <GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&clk IMX8MM_CLK_DUMMY>, + clocks = <&clk IMX8MM_CLK_IPG_ROOT>, <&clk IMX8MM_CLK_NAND_USDHC_BUS>, <&clk IMX8MM_CLK_USDHC1_ROOT>; clock-names = "ipg", "ahb", "per"; @@ -710,7 +710,7 @@ compatible = "fsl,imx8mm-usdhc", "fsl,imx7d-usdhc"; reg = <0x30b50000 0x10000>; interrupts = <GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&clk IMX8MM_CLK_DUMMY>, + clocks = <&clk IMX8MM_CLK_IPG_ROOT>, <&clk IMX8MM_CLK_NAND_USDHC_BUS>, <&clk IMX8MM_CLK_USDHC2_ROOT>; clock-names = "ipg", "ahb", "per"; @@ -724,7 +724,7 @@ compatible = "fsl,imx8mm-usdhc", "fsl,imx7d-usdhc"; reg = <0x30b60000 0x10000>; interrupts = <GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&clk IMX8MM_CLK_DUMMY>, + clocks = <&clk IMX8MM_CLK_IPG_ROOT>, <&clk IMX8MM_CLK_NAND_USDHC_BUS>, <&clk IMX8MM_CLK_USDHC3_ROOT>; clock-names = "ipg", "ahb", "per"; @@ -737,7 +737,7 @@ }; sdma1: dma-controller@30bd0000 { - compatible = "fsl,imx8mm-sdma", "fsl,imx7d-sdma"; + compatible = "fsl,imx8mm-sdma", "fsl,imx8mq-sdma"; reg = <0x30bd0000 0x10000>; interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clk IMX8MM_CLK_SDMA1_ROOT>, diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi index 785f4c420fa4..43c4db312146 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi @@ -288,7 +288,7 @@ }; sdma3: dma-controller@302b0000 { - compatible = "fsl,imx8mn-sdma", "fsl,imx7d-sdma"; + compatible = "fsl,imx8mn-sdma", "fsl,imx8mq-sdma"; reg = <0x302b0000 0x10000>; interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clk IMX8MN_CLK_SDMA3_ROOT>, @@ -299,7 +299,7 @@ }; sdma2: dma-controller@302c0000 { - compatible = "fsl,imx8mn-sdma", "fsl,imx7d-sdma"; + compatible = "fsl,imx8mn-sdma", "fsl,imx8mq-sdma"; reg = <0x302c0000 0x10000>; interrupts = <GIC_SPI 103 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clk IMX8MN_CLK_SDMA2_ROOT>, @@ -569,7 +569,7 @@ compatible = "fsl,imx8mn-usdhc", "fsl,imx7d-usdhc"; reg = <0x30b40000 0x10000>; interrupts = <GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&clk IMX8MN_CLK_DUMMY>, + clocks = <&clk IMX8MN_CLK_IPG_ROOT>, <&clk IMX8MN_CLK_NAND_USDHC_BUS>, <&clk IMX8MN_CLK_USDHC1_ROOT>; clock-names = "ipg", "ahb", "per"; @@ -585,7 +585,7 @@ compatible = "fsl,imx8mn-usdhc", "fsl,imx7d-usdhc"; reg = <0x30b50000 0x10000>; interrupts = <GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&clk IMX8MN_CLK_DUMMY>, + clocks = <&clk IMX8MN_CLK_IPG_ROOT>, <&clk IMX8MN_CLK_NAND_USDHC_BUS>, <&clk IMX8MN_CLK_USDHC2_ROOT>; clock-names = "ipg", "ahb", "per"; @@ -599,7 +599,7 @@ compatible = "fsl,imx8mn-usdhc", "fsl,imx7d-usdhc"; reg = <0x30b60000 0x10000>; interrupts = <GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&clk IMX8MN_CLK_DUMMY>, + clocks = <&clk IMX8MN_CLK_IPG_ROOT>, <&clk IMX8MN_CLK_NAND_USDHC_BUS>, <&clk IMX8MN_CLK_USDHC3_ROOT>; clock-names = "ipg", "ahb", "per"; @@ -612,7 +612,7 @@ }; sdma1: dma-controller@30bd0000 { - compatible = "fsl,imx8mn-sdma", "fsl,imx7d-sdma"; + compatible = "fsl,imx8mn-sdma", "fsl,imx8mq-sdma"; reg = <0x30bd0000 0x10000>; interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clk IMX8MN_CLK_SDMA1_ROOT>, diff --git a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi index af99473ada04..32ce14936b01 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi @@ -88,9 +88,9 @@ regulator-name = "0V9_ARM"; regulator-min-microvolt = <900000>; regulator-max-microvolt = <1000000>; - gpios = <&gpio3 19 GPIO_ACTIVE_HIGH>; - states = <1000000 0x0 - 900000 0x1>; + gpios = <&gpio3 16 GPIO_ACTIVE_HIGH>; + states = <1000000 0x1 + 900000 0x0>; regulator-always-on; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi index 04115ca6bfb5..55a3d1c4bdf0 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi @@ -850,7 +850,7 @@ "fsl,imx7d-usdhc"; reg = <0x30b40000 0x10000>; interrupts = <GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&clk IMX8MQ_CLK_DUMMY>, + clocks = <&clk IMX8MQ_CLK_IPG_ROOT>, <&clk IMX8MQ_CLK_NAND_USDHC_BUS>, <&clk IMX8MQ_CLK_USDHC1_ROOT>; clock-names = "ipg", "ahb", "per"; @@ -867,7 +867,7 @@ "fsl,imx7d-usdhc"; reg = <0x30b50000 0x10000>; interrupts = <GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&clk IMX8MQ_CLK_DUMMY>, + clocks = <&clk IMX8MQ_CLK_IPG_ROOT>, <&clk IMX8MQ_CLK_NAND_USDHC_BUS>, <&clk IMX8MQ_CLK_USDHC2_ROOT>; clock-names = "ipg", "ahb", "per"; diff --git a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts index d105986c6be1..5f350cc71a2f 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts @@ -60,11 +60,6 @@ gpio = <&gpiosb 0 GPIO_ACTIVE_HIGH>; }; - usb3_phy: usb3-phy { - compatible = "usb-nop-xceiv"; - vcc-supply = <&exp_usb3_vbus>; - }; - vsdc_reg: vsdc-reg { compatible = "regulator-gpio"; regulator-name = "vsdc"; @@ -255,10 +250,16 @@ status = "okay"; }; +&comphy2 { + connector { + compatible = "usb-a-connector"; + phy-supply = <&exp_usb3_vbus>; + }; +}; + &usb3 { status = "okay"; phys = <&comphy2 0>; - usb-phy = <&usb3_phy>; }; &mdio { diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts b/arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts index e152b0ca0290..b8066868a3fe 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts @@ -44,7 +44,7 @@ power-supply = <&pp3300_disp>; panel-timing { - clock-frequency = <266604720>; + clock-frequency = <266666667>; hactive = <2400>; hfront-porch = <48>; hback-porch = <84>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-hugsun-x99.dts b/arch/arm64/boot/dts/rockchip/rk3399-hugsun-x99.dts index 0d1f5f9a0de9..c133e8d64b2a 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-hugsun-x99.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-hugsun-x99.dts @@ -644,7 +644,7 @@ status = "okay"; u2phy0_host: host-port { - phy-supply = <&vcc5v0_host>; + phy-supply = <&vcc5v0_typec>; status = "okay"; }; @@ -712,7 +712,7 @@ &usbdrd_dwc3_0 { status = "okay"; - dr_mode = "otg"; + dr_mode = "host"; }; &usbdrd3_1 { diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts b/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts index 0401d4ec1f45..e544deb61d28 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts @@ -173,7 +173,7 @@ regulator-always-on; regulator-boot-on; regulator-min-microvolt = <800000>; - regulator-max-microvolt = <1400000>; + regulator-max-microvolt = <1700000>; vin-supply = <&vcc5v0_sys>; }; }; @@ -247,8 +247,8 @@ rk808: pmic@1b { compatible = "rockchip,rk808"; reg = <0x1b>; - interrupt-parent = <&gpio1>; - interrupts = <21 IRQ_TYPE_LEVEL_LOW>; + interrupt-parent = <&gpio3>; + interrupts = <10 IRQ_TYPE_LEVEL_LOW>; #clock-cells = <1>; clock-output-names = "xin32k", "rk808-clkout2"; pinctrl-names = "default"; @@ -574,7 +574,7 @@ pmic { pmic_int_l: pmic-int-l { - rockchip,pins = <1 RK_PC5 RK_FUNC_GPIO &pcfg_pull_up>; + rockchip,pins = <3 RK_PB2 RK_FUNC_GPIO &pcfg_pull_up>; }; vsel1_gpio: vsel1-gpio { @@ -624,7 +624,6 @@ &sdmmc { bus-width = <4>; - cap-mmc-highspeed; cap-sd-highspeed; cd-gpios = <&gpio0 7 GPIO_ACTIVE_LOW>; disable-wp; @@ -636,8 +635,7 @@ &sdhci { bus-width = <8>; - mmc-hs400-1_8v; - mmc-hs400-enhanced-strobe; + mmc-hs200-1_8v; non-removable; status = "okay"; }; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 8e05c39eab08..c9a867ac32d4 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -723,7 +723,7 @@ CONFIG_TEGRA_IOMMU_SMMU=y CONFIG_ARM_SMMU=y CONFIG_ARM_SMMU_V3=y CONFIG_QCOM_IOMMU=y -CONFIG_REMOTEPROC=m +CONFIG_REMOTEPROC=y CONFIG_QCOM_Q6V5_MSS=m CONFIG_QCOM_Q6V5_PAS=m CONFIG_QCOM_SYSMON=m diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 4922c4451e7c..b8eb0453123d 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -86,7 +86,7 @@ config CRYPTO_AES_ARM64_CE_CCM config CRYPTO_AES_ARM64_CE_BLK tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions" depends on KERNEL_MODE_NEON - select CRYPTO_BLKCIPHER + select CRYPTO_SKCIPHER select CRYPTO_AES_ARM64_CE select CRYPTO_AES_ARM64 select CRYPTO_SIMD @@ -94,7 +94,7 @@ config CRYPTO_AES_ARM64_CE_BLK config CRYPTO_AES_ARM64_NEON_BLK tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions" depends on KERNEL_MODE_NEON - select CRYPTO_BLKCIPHER + select CRYPTO_SKCIPHER select CRYPTO_AES_ARM64 select CRYPTO_LIB_AES select CRYPTO_SIMD @@ -102,8 +102,15 @@ config CRYPTO_AES_ARM64_NEON_BLK config CRYPTO_CHACHA20_NEON tristate "ChaCha20, XChaCha20, and XChaCha12 stream ciphers using NEON instructions" depends on KERNEL_MODE_NEON - select CRYPTO_BLKCIPHER - select CRYPTO_CHACHA20 + select CRYPTO_SKCIPHER + select CRYPTO_LIB_CHACHA_GENERIC + select CRYPTO_ARCH_HAVE_LIB_CHACHA + +config CRYPTO_POLY1305_NEON + tristate "Poly1305 hash function using scalar or NEON instructions" + depends on KERNEL_MODE_NEON + select CRYPTO_HASH + select CRYPTO_ARCH_HAVE_LIB_POLY1305 config CRYPTO_NHPOLY1305_NEON tristate "NHPoly1305 hash function using NEON instructions (for Adiantum)" @@ -113,7 +120,7 @@ config CRYPTO_NHPOLY1305_NEON config CRYPTO_AES_ARM64_BS tristate "AES in ECB/CBC/CTR/XTS modes using bit-sliced NEON algorithm" depends on KERNEL_MODE_NEON - select CRYPTO_BLKCIPHER + select CRYPTO_SKCIPHER select CRYPTO_AES_ARM64_NEON_BLK select CRYPTO_AES_ARM64 select CRYPTO_LIB_AES diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 0435f2a0610e..d0901e610df3 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -50,6 +50,10 @@ sha512-arm64-y := sha512-glue.o sha512-core.o obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o +obj-$(CONFIG_CRYPTO_POLY1305_NEON) += poly1305-neon.o +poly1305-neon-y := poly1305-core.o poly1305-glue.o +AFLAGS_poly1305-core.o += -Dpoly1305_init=poly1305_init_arm64 + obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o @@ -68,11 +72,15 @@ ifdef REGENERATE_ARM64_CRYPTO quiet_cmd_perlasm = PERLASM $@ cmd_perlasm = $(PERL) $(<) void $(@) +$(src)/poly1305-core.S_shipped: $(src)/poly1305-armv8.pl + $(call cmd,perlasm) + $(src)/sha256-core.S_shipped: $(src)/sha512-armv8.pl $(call cmd,perlasm) $(src)/sha512-core.S_shipped: $(src)/sha512-armv8.pl $(call cmd,perlasm) + endif -clean-files += sha256-core.S sha512-core.S +clean-files += poly1305-core.S sha256-core.S sha512-core.S diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c index ea873b8904c4..e3e27349a9fe 100644 --- a/arch/arm64/crypto/aes-neonbs-glue.c +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -384,7 +384,7 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt, goto xts_tail; kernel_neon_end(); - skcipher_walk_done(&walk, nbytes); + err = skcipher_walk_done(&walk, nbytes); } if (err || likely(!tail)) diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c index 1495d2b18518..b08029d7bde6 100644 --- a/arch/arm64/crypto/chacha-neon-glue.c +++ b/arch/arm64/crypto/chacha-neon-glue.c @@ -1,5 +1,5 @@ /* - * ARM NEON accelerated ChaCha and XChaCha stream ciphers, + * ARM NEON and scalar accelerated ChaCha and XChaCha stream ciphers, * including ChaCha20 (RFC7539) * * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org> @@ -20,9 +20,10 @@ */ #include <crypto/algapi.h> -#include <crypto/chacha.h> +#include <crypto/internal/chacha.h> #include <crypto/internal/simd.h> #include <crypto/internal/skcipher.h> +#include <linux/jump_label.h> #include <linux/kernel.h> #include <linux/module.h> @@ -36,6 +37,8 @@ asmlinkage void chacha_4block_xor_neon(u32 *state, u8 *dst, const u8 *src, int nrounds, int bytes); asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); + static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, int bytes, int nrounds) { @@ -59,6 +62,37 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, } } +void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds) +{ + if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) { + hchacha_block_generic(state, stream, nrounds); + } else { + kernel_neon_begin(); + hchacha_block_neon(state, stream, nrounds); + kernel_neon_end(); + } +} +EXPORT_SYMBOL(hchacha_block_arch); + +void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv) +{ + chacha_init_generic(state, key, iv); +} +EXPORT_SYMBOL(chacha_init_arch); + +void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, + int nrounds) +{ + if (!static_branch_likely(&have_neon) || bytes <= CHACHA_BLOCK_SIZE || + !crypto_simd_usable()) + return chacha_crypt_generic(state, dst, src, bytes, nrounds); + + kernel_neon_begin(); + chacha_doneon(state, dst, src, bytes, nrounds); + kernel_neon_end(); +} +EXPORT_SYMBOL(chacha_crypt_arch); + static int chacha_neon_stream_xor(struct skcipher_request *req, const struct chacha_ctx *ctx, const u8 *iv) { @@ -68,7 +102,7 @@ static int chacha_neon_stream_xor(struct skcipher_request *req, err = skcipher_walk_virt(&walk, req, false); - crypto_chacha_init(state, ctx, iv); + chacha_init_generic(state, ctx->key, iv); while (walk.nbytes > 0) { unsigned int nbytes = walk.nbytes; @@ -76,10 +110,17 @@ static int chacha_neon_stream_xor(struct skcipher_request *req, if (nbytes < walk.total) nbytes = rounddown(nbytes, walk.stride); - kernel_neon_begin(); - chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr, - nbytes, ctx->nrounds); - kernel_neon_end(); + if (!static_branch_likely(&have_neon) || + !crypto_simd_usable()) { + chacha_crypt_generic(state, walk.dst.virt.addr, + walk.src.virt.addr, nbytes, + ctx->nrounds); + } else { + kernel_neon_begin(); + chacha_doneon(state, walk.dst.virt.addr, + walk.src.virt.addr, nbytes, ctx->nrounds); + kernel_neon_end(); + } err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } @@ -91,9 +132,6 @@ static int chacha_neon(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); - if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) - return crypto_chacha_crypt(req); - return chacha_neon_stream_xor(req, ctx, req->iv); } @@ -105,14 +143,8 @@ static int xchacha_neon(struct skcipher_request *req) u32 state[16]; u8 real_iv[16]; - if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) - return crypto_xchacha_crypt(req); - - crypto_chacha_init(state, ctx, req->iv); - - kernel_neon_begin(); - hchacha_block_neon(state, subctx.key, ctx->nrounds); - kernel_neon_end(); + chacha_init_generic(state, ctx->key, req->iv); + hchacha_block_arch(state, subctx.key, ctx->nrounds); subctx.nrounds = ctx->nrounds; memcpy(&real_iv[0], req->iv + 24, 8); @@ -134,7 +166,7 @@ static struct skcipher_alg algs[] = { .ivsize = CHACHA_IV_SIZE, .chunksize = CHACHA_BLOCK_SIZE, .walksize = 5 * CHACHA_BLOCK_SIZE, - .setkey = crypto_chacha20_setkey, + .setkey = chacha20_setkey, .encrypt = chacha_neon, .decrypt = chacha_neon, }, { @@ -150,7 +182,7 @@ static struct skcipher_alg algs[] = { .ivsize = XCHACHA_IV_SIZE, .chunksize = CHACHA_BLOCK_SIZE, .walksize = 5 * CHACHA_BLOCK_SIZE, - .setkey = crypto_chacha20_setkey, + .setkey = chacha20_setkey, .encrypt = xchacha_neon, .decrypt = xchacha_neon, }, { @@ -166,7 +198,7 @@ static struct skcipher_alg algs[] = { .ivsize = XCHACHA_IV_SIZE, .chunksize = CHACHA_BLOCK_SIZE, .walksize = 5 * CHACHA_BLOCK_SIZE, - .setkey = crypto_chacha12_setkey, + .setkey = chacha12_setkey, .encrypt = xchacha_neon, .decrypt = xchacha_neon, } @@ -175,14 +207,17 @@ static struct skcipher_alg algs[] = { static int __init chacha_simd_mod_init(void) { if (!cpu_have_named_feature(ASIMD)) - return -ENODEV; + return 0; + + static_branch_enable(&have_neon); return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); } static void __exit chacha_simd_mod_fini(void) { - crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); + if (cpu_have_named_feature(ASIMD)) + crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); } module_init(chacha_simd_mod_init); diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S index 410e8afcf5a7..a791c4adf8e6 100644 --- a/arch/arm64/crypto/ghash-ce-core.S +++ b/arch/arm64/crypto/ghash-ce-core.S @@ -13,8 +13,8 @@ T1 .req v2 T2 .req v3 MASK .req v4 - XL .req v5 - XM .req v6 + XM .req v5 + XL .req v6 XH .req v7 IN1 .req v7 @@ -358,20 +358,37 @@ ENTRY(pmull_ghash_update_p8) __pmull_ghash p8 ENDPROC(pmull_ghash_update_p8) - KS0 .req v12 - KS1 .req v13 - INP0 .req v14 - INP1 .req v15 - - .macro load_round_keys, rounds, rk - cmp \rounds, #12 - blo 2222f /* 128 bits */ - beq 1111f /* 192 bits */ - ld1 {v17.4s-v18.4s}, [\rk], #32 -1111: ld1 {v19.4s-v20.4s}, [\rk], #32 -2222: ld1 {v21.4s-v24.4s}, [\rk], #64 - ld1 {v25.4s-v28.4s}, [\rk], #64 - ld1 {v29.4s-v31.4s}, [\rk] + KS0 .req v8 + KS1 .req v9 + KS2 .req v10 + KS3 .req v11 + + INP0 .req v21 + INP1 .req v22 + INP2 .req v23 + INP3 .req v24 + + K0 .req v25 + K1 .req v26 + K2 .req v27 + K3 .req v28 + K4 .req v12 + K5 .req v13 + K6 .req v4 + K7 .req v5 + K8 .req v14 + K9 .req v15 + KK .req v29 + KL .req v30 + KM .req v31 + + .macro load_round_keys, rounds, rk, tmp + add \tmp, \rk, #64 + ld1 {K0.4s-K3.4s}, [\rk] + ld1 {K4.4s-K5.4s}, [\tmp] + add \tmp, \rk, \rounds, lsl #4 + sub \tmp, \tmp, #32 + ld1 {KK.4s-KM.4s}, [\tmp] .endm .macro enc_round, state, key @@ -379,197 +396,367 @@ ENDPROC(pmull_ghash_update_p8) aesmc \state\().16b, \state\().16b .endm - .macro enc_block, state, rounds - cmp \rounds, #12 - b.lo 2222f /* 128 bits */ - b.eq 1111f /* 192 bits */ - enc_round \state, v17 - enc_round \state, v18 -1111: enc_round \state, v19 - enc_round \state, v20 -2222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29 + .macro enc_qround, s0, s1, s2, s3, key + enc_round \s0, \key + enc_round \s1, \key + enc_round \s2, \key + enc_round \s3, \key + .endm + + .macro enc_block, state, rounds, rk, tmp + add \tmp, \rk, #96 + ld1 {K6.4s-K7.4s}, [\tmp], #32 + .irp key, K0, K1, K2, K3, K4 K5 enc_round \state, \key .endr - aese \state\().16b, v30.16b - eor \state\().16b, \state\().16b, v31.16b + + tbnz \rounds, #2, .Lnot128_\@ +.Lout256_\@: + enc_round \state, K6 + enc_round \state, K7 + +.Lout192_\@: + enc_round \state, KK + aese \state\().16b, KL.16b + eor \state\().16b, \state\().16b, KM.16b + + .subsection 1 +.Lnot128_\@: + ld1 {K8.4s-K9.4s}, [\tmp], #32 + enc_round \state, K6 + enc_round \state, K7 + ld1 {K6.4s-K7.4s}, [\tmp] + enc_round \state, K8 + enc_round \state, K9 + tbz \rounds, #1, .Lout192_\@ + b .Lout256_\@ + .previous .endm + .align 6 .macro pmull_gcm_do_crypt, enc - ld1 {SHASH.2d}, [x4], #16 - ld1 {HH.2d}, [x4] - ld1 {XL.2d}, [x1] - ldr x8, [x5, #8] // load lower counter + stp x29, x30, [sp, #-32]! + mov x29, sp + str x19, [sp, #24] + + load_round_keys x7, x6, x8 + + ld1 {SHASH.2d}, [x3], #16 + ld1 {HH.2d-HH4.2d}, [x3] - movi MASK.16b, #0xe1 trn1 SHASH2.2d, SHASH.2d, HH.2d trn2 T1.2d, SHASH.2d, HH.2d -CPU_LE( rev x8, x8 ) - shl MASK.2d, MASK.2d, #57 eor SHASH2.16b, SHASH2.16b, T1.16b - .if \enc == 1 - ldr x10, [sp] - ld1 {KS0.16b-KS1.16b}, [x10] - .endif + trn1 HH34.2d, HH3.2d, HH4.2d + trn2 T1.2d, HH3.2d, HH4.2d + eor HH34.16b, HH34.16b, T1.16b - cbnz x6, 4f + ld1 {XL.2d}, [x4] -0: ld1 {INP0.16b-INP1.16b}, [x3], #32 + cbz x0, 3f // tag only? - rev x9, x8 - add x11, x8, #1 - add x8, x8, #2 + ldr w8, [x5, #12] // load lower counter +CPU_LE( rev w8, w8 ) - .if \enc == 1 - eor INP0.16b, INP0.16b, KS0.16b // encrypt input - eor INP1.16b, INP1.16b, KS1.16b +0: mov w9, #4 // max blocks per round + add x10, x0, #0xf + lsr x10, x10, #4 // remaining blocks + + subs x0, x0, #64 + csel w9, w10, w9, mi + add w8, w8, w9 + + bmi 1f + ld1 {INP0.16b-INP3.16b}, [x2], #64 + .subsection 1 + /* + * Populate the four input registers right to left with up to 63 bytes + * of data, using overlapping loads to avoid branches. + * + * INP0 INP1 INP2 INP3 + * 1 byte | | | |x | + * 16 bytes | | | |xxxxxxxx| + * 17 bytes | | |xxxxxxxx|x | + * 47 bytes | |xxxxxxxx|xxxxxxxx|xxxxxxx | + * etc etc + * + * Note that this code may read up to 15 bytes before the start of + * the input. It is up to the calling code to ensure this is safe if + * this happens in the first iteration of the loop (i.e., when the + * input size is < 16 bytes) + */ +1: mov x15, #16 + ands x19, x0, #0xf + csel x19, x19, x15, ne + adr_l x17, .Lpermute_table + 16 + + sub x11, x15, x19 + add x12, x17, x11 + sub x17, x17, x11 + ld1 {T1.16b}, [x12] + sub x10, x1, x11 + sub x11, x2, x11 + + cmp x0, #-16 + csel x14, x15, xzr, gt + cmp x0, #-32 + csel x15, x15, xzr, gt + cmp x0, #-48 + csel x16, x19, xzr, gt + csel x1, x1, x10, gt + csel x2, x2, x11, gt + + ld1 {INP0.16b}, [x2], x14 + ld1 {INP1.16b}, [x2], x15 + ld1 {INP2.16b}, [x2], x16 + ld1 {INP3.16b}, [x2] + tbl INP3.16b, {INP3.16b}, T1.16b + b 2f + .previous + +2: .if \enc == 0 + bl pmull_gcm_ghash_4x .endif - ld1 {KS0.8b}, [x5] // load upper counter - rev x11, x11 - sub w0, w0, #2 - mov KS1.8b, KS0.8b - ins KS0.d[1], x9 // set lower counter - ins KS1.d[1], x11 + bl pmull_gcm_enc_4x - rev64 T1.16b, INP1.16b + tbnz x0, #63, 6f + st1 {INP0.16b-INP3.16b}, [x1], #64 + .if \enc == 1 + bl pmull_gcm_ghash_4x + .endif + bne 0b - cmp w7, #12 - b.ge 2f // AES-192/256? +3: ldp x19, x10, [sp, #24] + cbz x10, 5f // output tag? -1: enc_round KS0, v21 - ext IN1.16b, T1.16b, T1.16b, #8 + ld1 {INP3.16b}, [x10] // load lengths[] + mov w9, #1 + bl pmull_gcm_ghash_4x - enc_round KS1, v21 - pmull2 XH2.1q, SHASH.2d, IN1.2d // a1 * b1 + mov w11, #(0x1 << 24) // BE '1U' + ld1 {KS0.16b}, [x5] + mov KS0.s[3], w11 - enc_round KS0, v22 - eor T1.16b, T1.16b, IN1.16b + enc_block KS0, x7, x6, x12 - enc_round KS1, v22 - pmull XL2.1q, SHASH.1d, IN1.1d // a0 * b0 + ext XL.16b, XL.16b, XL.16b, #8 + rev64 XL.16b, XL.16b + eor XL.16b, XL.16b, KS0.16b + st1 {XL.16b}, [x10] // store tag - enc_round KS0, v23 - pmull XM2.1q, SHASH2.1d, T1.1d // (a1 + a0)(b1 + b0) +4: ldp x29, x30, [sp], #32 + ret - enc_round KS1, v23 - rev64 T1.16b, INP0.16b - ext T2.16b, XL.16b, XL.16b, #8 +5: +CPU_LE( rev w8, w8 ) + str w8, [x5, #12] // store lower counter + st1 {XL.2d}, [x4] + b 4b + +6: ld1 {T1.16b-T2.16b}, [x17], #32 // permute vectors + sub x17, x17, x19, lsl #1 + + cmp w9, #1 + beq 7f + .subsection 1 +7: ld1 {INP2.16b}, [x1] + tbx INP2.16b, {INP3.16b}, T1.16b + mov INP3.16b, INP2.16b + b 8f + .previous + + st1 {INP0.16b}, [x1], x14 + st1 {INP1.16b}, [x1], x15 + st1 {INP2.16b}, [x1], x16 + tbl INP3.16b, {INP3.16b}, T1.16b + tbx INP3.16b, {INP2.16b}, T2.16b +8: st1 {INP3.16b}, [x1] - enc_round KS0, v24 - ext IN1.16b, T1.16b, T1.16b, #8 - eor T1.16b, T1.16b, T2.16b + .if \enc == 1 + ld1 {T1.16b}, [x17] + tbl INP3.16b, {INP3.16b}, T1.16b // clear non-data bits + bl pmull_gcm_ghash_4x + .endif + b 3b + .endm - enc_round KS1, v24 - eor XL.16b, XL.16b, IN1.16b + /* + * void pmull_gcm_encrypt(int blocks, u8 dst[], const u8 src[], + * struct ghash_key const *k, u64 dg[], u8 ctr[], + * int rounds, u8 tag) + */ +ENTRY(pmull_gcm_encrypt) + pmull_gcm_do_crypt 1 +ENDPROC(pmull_gcm_encrypt) - enc_round KS0, v25 - eor T1.16b, T1.16b, XL.16b + /* + * void pmull_gcm_decrypt(int blocks, u8 dst[], const u8 src[], + * struct ghash_key const *k, u64 dg[], u8 ctr[], + * int rounds, u8 tag) + */ +ENTRY(pmull_gcm_decrypt) + pmull_gcm_do_crypt 0 +ENDPROC(pmull_gcm_decrypt) - enc_round KS1, v25 - pmull2 XH.1q, HH.2d, XL.2d // a1 * b1 +pmull_gcm_ghash_4x: + movi MASK.16b, #0xe1 + shl MASK.2d, MASK.2d, #57 - enc_round KS0, v26 - pmull XL.1q, HH.1d, XL.1d // a0 * b0 + rev64 T1.16b, INP0.16b + rev64 T2.16b, INP1.16b + rev64 TT3.16b, INP2.16b + rev64 TT4.16b, INP3.16b - enc_round KS1, v26 - pmull2 XM.1q, SHASH2.2d, T1.2d // (a1 + a0)(b1 + b0) + ext XL.16b, XL.16b, XL.16b, #8 - enc_round KS0, v27 - eor XL.16b, XL.16b, XL2.16b - eor XH.16b, XH.16b, XH2.16b + tbz w9, #2, 0f // <4 blocks? + .subsection 1 +0: movi XH2.16b, #0 + movi XM2.16b, #0 + movi XL2.16b, #0 - enc_round KS1, v27 - eor XM.16b, XM.16b, XM2.16b - ext T1.16b, XL.16b, XH.16b, #8 + tbz w9, #0, 1f // 2 blocks? + tbz w9, #1, 2f // 1 block? - enc_round KS0, v28 - eor T2.16b, XL.16b, XH.16b - eor XM.16b, XM.16b, T1.16b + eor T2.16b, T2.16b, XL.16b + ext T1.16b, T2.16b, T2.16b, #8 + b .Lgh3 - enc_round KS1, v28 - eor XM.16b, XM.16b, T2.16b +1: eor TT3.16b, TT3.16b, XL.16b + ext T2.16b, TT3.16b, TT3.16b, #8 + b .Lgh2 - enc_round KS0, v29 - pmull T2.1q, XL.1d, MASK.1d +2: eor TT4.16b, TT4.16b, XL.16b + ext IN1.16b, TT4.16b, TT4.16b, #8 + b .Lgh1 + .previous - enc_round KS1, v29 - mov XH.d[0], XM.d[1] - mov XM.d[1], XL.d[0] + eor T1.16b, T1.16b, XL.16b + ext IN1.16b, T1.16b, T1.16b, #8 - aese KS0.16b, v30.16b - eor XL.16b, XM.16b, T2.16b + pmull2 XH2.1q, HH4.2d, IN1.2d // a1 * b1 + eor T1.16b, T1.16b, IN1.16b + pmull XL2.1q, HH4.1d, IN1.1d // a0 * b0 + pmull2 XM2.1q, HH34.2d, T1.2d // (a1 + a0)(b1 + b0) - aese KS1.16b, v30.16b - ext T2.16b, XL.16b, XL.16b, #8 + ext T1.16b, T2.16b, T2.16b, #8 +.Lgh3: eor T2.16b, T2.16b, T1.16b + pmull2 XH.1q, HH3.2d, T1.2d // a1 * b1 + pmull XL.1q, HH3.1d, T1.1d // a0 * b0 + pmull XM.1q, HH34.1d, T2.1d // (a1 + a0)(b1 + b0) - eor KS0.16b, KS0.16b, v31.16b - pmull XL.1q, XL.1d, MASK.1d - eor T2.16b, T2.16b, XH.16b + eor XH2.16b, XH2.16b, XH.16b + eor XL2.16b, XL2.16b, XL.16b + eor XM2.16b, XM2.16b, XM.16b - eor KS1.16b, KS1.16b, v31.16b - eor XL.16b, XL.16b, T2.16b + ext T2.16b, TT3.16b, TT3.16b, #8 +.Lgh2: eor TT3.16b, TT3.16b, T2.16b + pmull2 XH.1q, HH.2d, T2.2d // a1 * b1 + pmull XL.1q, HH.1d, T2.1d // a0 * b0 + pmull2 XM.1q, SHASH2.2d, TT3.2d // (a1 + a0)(b1 + b0) - .if \enc == 0 - eor INP0.16b, INP0.16b, KS0.16b - eor INP1.16b, INP1.16b, KS1.16b - .endif + eor XH2.16b, XH2.16b, XH.16b + eor XL2.16b, XL2.16b, XL.16b + eor XM2.16b, XM2.16b, XM.16b - st1 {INP0.16b-INP1.16b}, [x2], #32 + ext IN1.16b, TT4.16b, TT4.16b, #8 +.Lgh1: eor TT4.16b, TT4.16b, IN1.16b + pmull XL.1q, SHASH.1d, IN1.1d // a0 * b0 + pmull2 XH.1q, SHASH.2d, IN1.2d // a1 * b1 + pmull XM.1q, SHASH2.1d, TT4.1d // (a1 + a0)(b1 + b0) - cbnz w0, 0b + eor XH.16b, XH.16b, XH2.16b + eor XL.16b, XL.16b, XL2.16b + eor XM.16b, XM.16b, XM2.16b -CPU_LE( rev x8, x8 ) - st1 {XL.2d}, [x1] - str x8, [x5, #8] // store lower counter + eor T2.16b, XL.16b, XH.16b + ext T1.16b, XL.16b, XH.16b, #8 + eor XM.16b, XM.16b, T2.16b - .if \enc == 1 - st1 {KS0.16b-KS1.16b}, [x10] - .endif + __pmull_reduce_p64 + + eor T2.16b, T2.16b, XH.16b + eor XL.16b, XL.16b, T2.16b ret +ENDPROC(pmull_gcm_ghash_4x) + +pmull_gcm_enc_4x: + ld1 {KS0.16b}, [x5] // load upper counter + sub w10, w8, #4 + sub w11, w8, #3 + sub w12, w8, #2 + sub w13, w8, #1 + rev w10, w10 + rev w11, w11 + rev w12, w12 + rev w13, w13 + mov KS1.16b, KS0.16b + mov KS2.16b, KS0.16b + mov KS3.16b, KS0.16b + ins KS0.s[3], w10 // set lower counter + ins KS1.s[3], w11 + ins KS2.s[3], w12 + ins KS3.s[3], w13 + + add x10, x6, #96 // round key pointer + ld1 {K6.4s-K7.4s}, [x10], #32 + .irp key, K0, K1, K2, K3, K4, K5 + enc_qround KS0, KS1, KS2, KS3, \key + .endr -2: b.eq 3f // AES-192? - enc_round KS0, v17 - enc_round KS1, v17 - enc_round KS0, v18 - enc_round KS1, v18 -3: enc_round KS0, v19 - enc_round KS1, v19 - enc_round KS0, v20 - enc_round KS1, v20 - b 1b + tbnz x7, #2, .Lnot128 + .subsection 1 +.Lnot128: + ld1 {K8.4s-K9.4s}, [x10], #32 + .irp key, K6, K7 + enc_qround KS0, KS1, KS2, KS3, \key + .endr + ld1 {K6.4s-K7.4s}, [x10] + .irp key, K8, K9 + enc_qround KS0, KS1, KS2, KS3, \key + .endr + tbz x7, #1, .Lout192 + b .Lout256 + .previous -4: load_round_keys w7, x6 - b 0b - .endm +.Lout256: + .irp key, K6, K7 + enc_qround KS0, KS1, KS2, KS3, \key + .endr - /* - * void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[], const u8 src[], - * struct ghash_key const *k, u8 ctr[], - * int rounds, u8 ks[]) - */ -ENTRY(pmull_gcm_encrypt) - pmull_gcm_do_crypt 1 -ENDPROC(pmull_gcm_encrypt) +.Lout192: + enc_qround KS0, KS1, KS2, KS3, KK - /* - * void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[], const u8 src[], - * struct ghash_key const *k, u8 ctr[], - * int rounds) - */ -ENTRY(pmull_gcm_decrypt) - pmull_gcm_do_crypt 0 -ENDPROC(pmull_gcm_decrypt) + aese KS0.16b, KL.16b + aese KS1.16b, KL.16b + aese KS2.16b, KL.16b + aese KS3.16b, KL.16b + + eor KS0.16b, KS0.16b, KM.16b + eor KS1.16b, KS1.16b, KM.16b + eor KS2.16b, KS2.16b, KM.16b + eor KS3.16b, KS3.16b, KM.16b + + eor INP0.16b, INP0.16b, KS0.16b + eor INP1.16b, INP1.16b, KS1.16b + eor INP2.16b, INP2.16b, KS2.16b + eor INP3.16b, INP3.16b, KS3.16b - /* - * void pmull_gcm_encrypt_block(u8 dst[], u8 src[], u8 rk[], int rounds) - */ -ENTRY(pmull_gcm_encrypt_block) - cbz x2, 0f - load_round_keys w3, x2 -0: ld1 {v0.16b}, [x1] - enc_block v0, w3 - st1 {v0.16b}, [x0] ret -ENDPROC(pmull_gcm_encrypt_block) +ENDPROC(pmull_gcm_enc_4x) + + .section ".rodata", "a" + .align 6 +.Lpermute_table: + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 + .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 + .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf + .previous diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index 70b1469783f9..522cf004ce65 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -58,17 +58,15 @@ asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src, struct ghash_key const *k, const char *head); -asmlinkage void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[], - const u8 src[], struct ghash_key const *k, +asmlinkage void pmull_gcm_encrypt(int bytes, u8 dst[], const u8 src[], + struct ghash_key const *k, u64 dg[], u8 ctr[], u32 const rk[], int rounds, - u8 ks[]); + u8 tag[]); -asmlinkage void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[], - const u8 src[], struct ghash_key const *k, - u8 ctr[], u32 const rk[], int rounds); - -asmlinkage void pmull_gcm_encrypt_block(u8 dst[], u8 const src[], - u32 const rk[], int rounds); +asmlinkage void pmull_gcm_decrypt(int bytes, u8 dst[], const u8 src[], + struct ghash_key const *k, u64 dg[], + u8 ctr[], u32 const rk[], int rounds, + u8 tag[]); static int ghash_init(struct shash_desc *desc) { @@ -85,7 +83,7 @@ static void ghash_do_update(int blocks, u64 dg[], const char *src, struct ghash_key const *k, const char *head)) { - if (likely(crypto_simd_usable())) { + if (likely(crypto_simd_usable() && simd_update)) { kernel_neon_begin(); simd_update(blocks, dg, src, key, head); kernel_neon_end(); @@ -398,136 +396,112 @@ static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[]) } } -static void gcm_final(struct aead_request *req, struct gcm_aes_ctx *ctx, - u64 dg[], u8 tag[], int cryptlen) -{ - u8 mac[AES_BLOCK_SIZE]; - u128 lengths; - - lengths.a = cpu_to_be64(req->assoclen * 8); - lengths.b = cpu_to_be64(cryptlen * 8); - - ghash_do_update(1, dg, (void *)&lengths, &ctx->ghash_key, NULL, - pmull_ghash_update_p64); - - put_unaligned_be64(dg[1], mac); - put_unaligned_be64(dg[0], mac + 8); - - crypto_xor(tag, mac, AES_BLOCK_SIZE); -} - static int gcm_encrypt(struct aead_request *req) { struct crypto_aead *aead = crypto_aead_reqtfm(req); struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead); + int nrounds = num_rounds(&ctx->aes_key); struct skcipher_walk walk; + u8 buf[AES_BLOCK_SIZE]; u8 iv[AES_BLOCK_SIZE]; - u8 ks[2 * AES_BLOCK_SIZE]; - u8 tag[AES_BLOCK_SIZE]; u64 dg[2] = {}; - int nrounds = num_rounds(&ctx->aes_key); + u128 lengths; + u8 *tag; int err; + lengths.a = cpu_to_be64(req->assoclen * 8); + lengths.b = cpu_to_be64(req->cryptlen * 8); + if (req->assoclen) gcm_calculate_auth_mac(req, dg); memcpy(iv, req->iv, GCM_IV_SIZE); - put_unaligned_be32(1, iv + GCM_IV_SIZE); + put_unaligned_be32(2, iv + GCM_IV_SIZE); err = skcipher_walk_aead_encrypt(&walk, req, false); - if (likely(crypto_simd_usable() && walk.total >= 2 * AES_BLOCK_SIZE)) { - u32 const *rk = NULL; - - kernel_neon_begin(); - pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc, nrounds); - put_unaligned_be32(2, iv + GCM_IV_SIZE); - pmull_gcm_encrypt_block(ks, iv, NULL, nrounds); - put_unaligned_be32(3, iv + GCM_IV_SIZE); - pmull_gcm_encrypt_block(ks + AES_BLOCK_SIZE, iv, NULL, nrounds); - put_unaligned_be32(4, iv + GCM_IV_SIZE); - + if (likely(crypto_simd_usable())) { do { - int blocks = walk.nbytes / (2 * AES_BLOCK_SIZE) * 2; + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + int nbytes = walk.nbytes; + + tag = (u8 *)&lengths; - if (rk) - kernel_neon_begin(); + if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) { + src = dst = memcpy(buf + sizeof(buf) - nbytes, + src, nbytes); + } else if (nbytes < walk.total) { + nbytes &= ~(AES_BLOCK_SIZE - 1); + tag = NULL; + } - pmull_gcm_encrypt(blocks, dg, walk.dst.virt.addr, - walk.src.virt.addr, &ctx->ghash_key, - iv, rk, nrounds, ks); + kernel_neon_begin(); + pmull_gcm_encrypt(nbytes, dst, src, &ctx->ghash_key, dg, + iv, ctx->aes_key.key_enc, nrounds, + tag); kernel_neon_end(); - err = skcipher_walk_done(&walk, - walk.nbytes % (2 * AES_BLOCK_SIZE)); + if (unlikely(!nbytes)) + break; - rk = ctx->aes_key.key_enc; - } while (walk.nbytes >= 2 * AES_BLOCK_SIZE); - } else { - aes_encrypt(&ctx->aes_key, tag, iv); - put_unaligned_be32(2, iv + GCM_IV_SIZE); + if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) + memcpy(walk.dst.virt.addr, + buf + sizeof(buf) - nbytes, nbytes); - while (walk.nbytes >= (2 * AES_BLOCK_SIZE)) { - const int blocks = - walk.nbytes / (2 * AES_BLOCK_SIZE) * 2; + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + } while (walk.nbytes); + } else { + while (walk.nbytes >= AES_BLOCK_SIZE) { + int blocks = walk.nbytes / AES_BLOCK_SIZE; + const u8 *src = walk.src.virt.addr; u8 *dst = walk.dst.virt.addr; - u8 *src = walk.src.virt.addr; int remaining = blocks; do { - aes_encrypt(&ctx->aes_key, ks, iv); - crypto_xor_cpy(dst, src, ks, AES_BLOCK_SIZE); + aes_encrypt(&ctx->aes_key, buf, iv); + crypto_xor_cpy(dst, src, buf, AES_BLOCK_SIZE); crypto_inc(iv, AES_BLOCK_SIZE); dst += AES_BLOCK_SIZE; src += AES_BLOCK_SIZE; } while (--remaining > 0); - ghash_do_update(blocks, dg, - walk.dst.virt.addr, &ctx->ghash_key, - NULL, pmull_ghash_update_p64); + ghash_do_update(blocks, dg, walk.dst.virt.addr, + &ctx->ghash_key, NULL, NULL); err = skcipher_walk_done(&walk, - walk.nbytes % (2 * AES_BLOCK_SIZE)); - } - if (walk.nbytes) { - aes_encrypt(&ctx->aes_key, ks, iv); - if (walk.nbytes > AES_BLOCK_SIZE) { - crypto_inc(iv, AES_BLOCK_SIZE); - aes_encrypt(&ctx->aes_key, ks + AES_BLOCK_SIZE, iv); - } + walk.nbytes % AES_BLOCK_SIZE); } - } - /* handle the tail */ - if (walk.nbytes) { - u8 buf[GHASH_BLOCK_SIZE]; - unsigned int nbytes = walk.nbytes; - u8 *dst = walk.dst.virt.addr; - u8 *head = NULL; + /* handle the tail */ + if (walk.nbytes) { + aes_encrypt(&ctx->aes_key, buf, iv); - crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, ks, - walk.nbytes); + crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, + buf, walk.nbytes); - if (walk.nbytes > GHASH_BLOCK_SIZE) { - head = dst; - dst += GHASH_BLOCK_SIZE; - nbytes %= GHASH_BLOCK_SIZE; + memcpy(buf, walk.dst.virt.addr, walk.nbytes); + memset(buf + walk.nbytes, 0, sizeof(buf) - walk.nbytes); } - memcpy(buf, dst, nbytes); - memset(buf + nbytes, 0, GHASH_BLOCK_SIZE - nbytes); - ghash_do_update(!!nbytes, dg, buf, &ctx->ghash_key, head, - pmull_ghash_update_p64); + tag = (u8 *)&lengths; + ghash_do_update(1, dg, tag, &ctx->ghash_key, + walk.nbytes ? buf : NULL, NULL); - err = skcipher_walk_done(&walk, 0); + if (walk.nbytes) + err = skcipher_walk_done(&walk, 0); + + put_unaligned_be64(dg[1], tag); + put_unaligned_be64(dg[0], tag + 8); + put_unaligned_be32(1, iv + GCM_IV_SIZE); + aes_encrypt(&ctx->aes_key, iv, iv); + crypto_xor(tag, iv, AES_BLOCK_SIZE); } if (err) return err; - gcm_final(req, ctx, dg, tag, req->cryptlen); - /* copy authtag to end of dst */ scatterwalk_map_and_copy(tag, req->dst, req->assoclen + req->cryptlen, crypto_aead_authsize(aead), 1); @@ -540,75 +514,65 @@ static int gcm_decrypt(struct aead_request *req) struct crypto_aead *aead = crypto_aead_reqtfm(req); struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead); unsigned int authsize = crypto_aead_authsize(aead); + int nrounds = num_rounds(&ctx->aes_key); struct skcipher_walk walk; - u8 iv[2 * AES_BLOCK_SIZE]; - u8 tag[AES_BLOCK_SIZE]; - u8 buf[2 * GHASH_BLOCK_SIZE]; + u8 buf[AES_BLOCK_SIZE]; + u8 iv[AES_BLOCK_SIZE]; u64 dg[2] = {}; - int nrounds = num_rounds(&ctx->aes_key); + u128 lengths; + u8 *tag; int err; + lengths.a = cpu_to_be64(req->assoclen * 8); + lengths.b = cpu_to_be64((req->cryptlen - authsize) * 8); + if (req->assoclen) gcm_calculate_auth_mac(req, dg); memcpy(iv, req->iv, GCM_IV_SIZE); - put_unaligned_be32(1, iv + GCM_IV_SIZE); + put_unaligned_be32(2, iv + GCM_IV_SIZE); err = skcipher_walk_aead_decrypt(&walk, req, false); - if (likely(crypto_simd_usable() && walk.total >= 2 * AES_BLOCK_SIZE)) { - u32 const *rk = NULL; - - kernel_neon_begin(); - pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc, nrounds); - put_unaligned_be32(2, iv + GCM_IV_SIZE); - + if (likely(crypto_simd_usable())) { do { - int blocks = walk.nbytes / (2 * AES_BLOCK_SIZE) * 2; - int rem = walk.total - blocks * AES_BLOCK_SIZE; - - if (rk) - kernel_neon_begin(); - - pmull_gcm_decrypt(blocks, dg, walk.dst.virt.addr, - walk.src.virt.addr, &ctx->ghash_key, - iv, rk, nrounds); - - /* check if this is the final iteration of the loop */ - if (rem < (2 * AES_BLOCK_SIZE)) { - u8 *iv2 = iv + AES_BLOCK_SIZE; - - if (rem > AES_BLOCK_SIZE) { - memcpy(iv2, iv, AES_BLOCK_SIZE); - crypto_inc(iv2, AES_BLOCK_SIZE); - } + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + int nbytes = walk.nbytes; - pmull_gcm_encrypt_block(iv, iv, NULL, nrounds); + tag = (u8 *)&lengths; - if (rem > AES_BLOCK_SIZE) - pmull_gcm_encrypt_block(iv2, iv2, NULL, - nrounds); + if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) { + src = dst = memcpy(buf + sizeof(buf) - nbytes, + src, nbytes); + } else if (nbytes < walk.total) { + nbytes &= ~(AES_BLOCK_SIZE - 1); + tag = NULL; } + kernel_neon_begin(); + pmull_gcm_decrypt(nbytes, dst, src, &ctx->ghash_key, dg, + iv, ctx->aes_key.key_enc, nrounds, + tag); kernel_neon_end(); - err = skcipher_walk_done(&walk, - walk.nbytes % (2 * AES_BLOCK_SIZE)); + if (unlikely(!nbytes)) + break; - rk = ctx->aes_key.key_enc; - } while (walk.nbytes >= 2 * AES_BLOCK_SIZE); - } else { - aes_encrypt(&ctx->aes_key, tag, iv); - put_unaligned_be32(2, iv + GCM_IV_SIZE); + if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) + memcpy(walk.dst.virt.addr, + buf + sizeof(buf) - nbytes, nbytes); - while (walk.nbytes >= (2 * AES_BLOCK_SIZE)) { - int blocks = walk.nbytes / (2 * AES_BLOCK_SIZE) * 2; + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + } while (walk.nbytes); + } else { + while (walk.nbytes >= AES_BLOCK_SIZE) { + int blocks = walk.nbytes / AES_BLOCK_SIZE; + const u8 *src = walk.src.virt.addr; u8 *dst = walk.dst.virt.addr; - u8 *src = walk.src.virt.addr; ghash_do_update(blocks, dg, walk.src.virt.addr, - &ctx->ghash_key, NULL, - pmull_ghash_update_p64); + &ctx->ghash_key, NULL, NULL); do { aes_encrypt(&ctx->aes_key, buf, iv); @@ -620,49 +584,38 @@ static int gcm_decrypt(struct aead_request *req) } while (--blocks > 0); err = skcipher_walk_done(&walk, - walk.nbytes % (2 * AES_BLOCK_SIZE)); + walk.nbytes % AES_BLOCK_SIZE); } - if (walk.nbytes) { - if (walk.nbytes > AES_BLOCK_SIZE) { - u8 *iv2 = iv + AES_BLOCK_SIZE; - - memcpy(iv2, iv, AES_BLOCK_SIZE); - crypto_inc(iv2, AES_BLOCK_SIZE); - aes_encrypt(&ctx->aes_key, iv2, iv2); - } - aes_encrypt(&ctx->aes_key, iv, iv); + /* handle the tail */ + if (walk.nbytes) { + memcpy(buf, walk.src.virt.addr, walk.nbytes); + memset(buf + walk.nbytes, 0, sizeof(buf) - walk.nbytes); } - } - /* handle the tail */ - if (walk.nbytes) { - const u8 *src = walk.src.virt.addr; - const u8 *head = NULL; - unsigned int nbytes = walk.nbytes; + tag = (u8 *)&lengths; + ghash_do_update(1, dg, tag, &ctx->ghash_key, + walk.nbytes ? buf : NULL, NULL); - if (walk.nbytes > GHASH_BLOCK_SIZE) { - head = src; - src += GHASH_BLOCK_SIZE; - nbytes %= GHASH_BLOCK_SIZE; - } + if (walk.nbytes) { + aes_encrypt(&ctx->aes_key, buf, iv); - memcpy(buf, src, nbytes); - memset(buf + nbytes, 0, GHASH_BLOCK_SIZE - nbytes); - ghash_do_update(!!nbytes, dg, buf, &ctx->ghash_key, head, - pmull_ghash_update_p64); + crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, + buf, walk.nbytes); - crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, iv, - walk.nbytes); + err = skcipher_walk_done(&walk, 0); + } - err = skcipher_walk_done(&walk, 0); + put_unaligned_be64(dg[1], tag); + put_unaligned_be64(dg[0], tag + 8); + put_unaligned_be32(1, iv + GCM_IV_SIZE); + aes_encrypt(&ctx->aes_key, iv, iv); + crypto_xor(tag, iv, AES_BLOCK_SIZE); } if (err) return err; - gcm_final(req, ctx, dg, tag, req->cryptlen - authsize); - /* compare calculated auth tag with the stored one */ scatterwalk_map_and_copy(buf, req->src, req->assoclen + req->cryptlen - authsize, @@ -675,7 +628,7 @@ static int gcm_decrypt(struct aead_request *req) static struct aead_alg gcm_aes_alg = { .ivsize = GCM_IV_SIZE, - .chunksize = 2 * AES_BLOCK_SIZE, + .chunksize = AES_BLOCK_SIZE, .maxauthsize = AES_BLOCK_SIZE, .setkey = gcm_setkey, .setauthsize = gcm_setauthsize, diff --git a/arch/arm64/crypto/poly1305-armv8.pl b/arch/arm64/crypto/poly1305-armv8.pl new file mode 100644 index 000000000000..6e5576d19af8 --- /dev/null +++ b/arch/arm64/crypto/poly1305-armv8.pl @@ -0,0 +1,913 @@ +#!/usr/bin/env perl +# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause +# +# ==================================================================== +# Written by Andy Polyakov, @dot-asm, initially for the OpenSSL +# project. +# ==================================================================== +# +# This module implements Poly1305 hash for ARMv8. +# +# June 2015 +# +# Numbers are cycles per processed byte with poly1305_blocks alone. +# +# IALU/gcc-4.9 NEON +# +# Apple A7 1.86/+5% 0.72 +# Cortex-A53 2.69/+58% 1.47 +# Cortex-A57 2.70/+7% 1.14 +# Denver 1.64/+50% 1.18(*) +# X-Gene 2.13/+68% 2.27 +# Mongoose 1.77/+75% 1.12 +# Kryo 2.70/+55% 1.13 +# ThunderX2 1.17/+95% 1.36 +# +# (*) estimate based on resources availability is less than 1.0, +# i.e. measured result is worse than expected, presumably binary +# translator is not almighty; + +$flavour=shift; +$output=shift; + +if ($flavour && $flavour ne "void") { + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or + die "can't locate arm-xlate.pl"; + + open STDOUT,"| \"$^X\" $xlate $flavour $output"; +} else { + open STDOUT,">$output"; +} + +my ($ctx,$inp,$len,$padbit) = map("x$_",(0..3)); +my ($mac,$nonce)=($inp,$len); + +my ($h0,$h1,$h2,$r0,$r1,$s1,$t0,$t1,$d0,$d1,$d2) = map("x$_",(4..14)); + +$code.=<<___; +#ifndef __KERNEL__ +# include "arm_arch.h" +.extern OPENSSL_armcap_P +#endif + +.text + +// forward "declarations" are required for Apple +.globl poly1305_blocks +.globl poly1305_emit + +.globl poly1305_init +.type poly1305_init,%function +.align 5 +poly1305_init: + cmp $inp,xzr + stp xzr,xzr,[$ctx] // zero hash value + stp xzr,xzr,[$ctx,#16] // [along with is_base2_26] + + csel x0,xzr,x0,eq + b.eq .Lno_key + +#ifndef __KERNEL__ + adrp x17,OPENSSL_armcap_P + ldr w17,[x17,#:lo12:OPENSSL_armcap_P] +#endif + + ldp $r0,$r1,[$inp] // load key + mov $s1,#0xfffffffc0fffffff + movk $s1,#0x0fff,lsl#48 +#ifdef __AARCH64EB__ + rev $r0,$r0 // flip bytes + rev $r1,$r1 +#endif + and $r0,$r0,$s1 // &=0ffffffc0fffffff + and $s1,$s1,#-4 + and $r1,$r1,$s1 // &=0ffffffc0ffffffc + mov w#$s1,#-1 + stp $r0,$r1,[$ctx,#32] // save key value + str w#$s1,[$ctx,#48] // impossible key power value + +#ifndef __KERNEL__ + tst w17,#ARMV7_NEON + + adr $d0,.Lpoly1305_blocks + adr $r0,.Lpoly1305_blocks_neon + adr $d1,.Lpoly1305_emit + + csel $d0,$d0,$r0,eq + +# ifdef __ILP32__ + stp w#$d0,w#$d1,[$len] +# else + stp $d0,$d1,[$len] +# endif +#endif + mov x0,#1 +.Lno_key: + ret +.size poly1305_init,.-poly1305_init + +.type poly1305_blocks,%function +.align 5 +poly1305_blocks: +.Lpoly1305_blocks: + ands $len,$len,#-16 + b.eq .Lno_data + + ldp $h0,$h1,[$ctx] // load hash value + ldp $h2,x17,[$ctx,#16] // [along with is_base2_26] + ldp $r0,$r1,[$ctx,#32] // load key value + +#ifdef __AARCH64EB__ + lsr $d0,$h0,#32 + mov w#$d1,w#$h0 + lsr $d2,$h1,#32 + mov w15,w#$h1 + lsr x16,$h2,#32 +#else + mov w#$d0,w#$h0 + lsr $d1,$h0,#32 + mov w#$d2,w#$h1 + lsr x15,$h1,#32 + mov w16,w#$h2 +#endif + + add $d0,$d0,$d1,lsl#26 // base 2^26 -> base 2^64 + lsr $d1,$d2,#12 + adds $d0,$d0,$d2,lsl#52 + add $d1,$d1,x15,lsl#14 + adc $d1,$d1,xzr + lsr $d2,x16,#24 + adds $d1,$d1,x16,lsl#40 + adc $d2,$d2,xzr + + cmp x17,#0 // is_base2_26? + add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2) + csel $h0,$h0,$d0,eq // choose between radixes + csel $h1,$h1,$d1,eq + csel $h2,$h2,$d2,eq + +.Loop: + ldp $t0,$t1,[$inp],#16 // load input + sub $len,$len,#16 +#ifdef __AARCH64EB__ + rev $t0,$t0 + rev $t1,$t1 +#endif + adds $h0,$h0,$t0 // accumulate input + adcs $h1,$h1,$t1 + + mul $d0,$h0,$r0 // h0*r0 + adc $h2,$h2,$padbit + umulh $d1,$h0,$r0 + + mul $t0,$h1,$s1 // h1*5*r1 + umulh $t1,$h1,$s1 + + adds $d0,$d0,$t0 + mul $t0,$h0,$r1 // h0*r1 + adc $d1,$d1,$t1 + umulh $d2,$h0,$r1 + + adds $d1,$d1,$t0 + mul $t0,$h1,$r0 // h1*r0 + adc $d2,$d2,xzr + umulh $t1,$h1,$r0 + + adds $d1,$d1,$t0 + mul $t0,$h2,$s1 // h2*5*r1 + adc $d2,$d2,$t1 + mul $t1,$h2,$r0 // h2*r0 + + adds $d1,$d1,$t0 + adc $d2,$d2,$t1 + + and $t0,$d2,#-4 // final reduction + and $h2,$d2,#3 + add $t0,$t0,$d2,lsr#2 + adds $h0,$d0,$t0 + adcs $h1,$d1,xzr + adc $h2,$h2,xzr + + cbnz $len,.Loop + + stp $h0,$h1,[$ctx] // store hash value + stp $h2,xzr,[$ctx,#16] // [and clear is_base2_26] + +.Lno_data: + ret +.size poly1305_blocks,.-poly1305_blocks + +.type poly1305_emit,%function +.align 5 +poly1305_emit: +.Lpoly1305_emit: + ldp $h0,$h1,[$ctx] // load hash base 2^64 + ldp $h2,$r0,[$ctx,#16] // [along with is_base2_26] + ldp $t0,$t1,[$nonce] // load nonce + +#ifdef __AARCH64EB__ + lsr $d0,$h0,#32 + mov w#$d1,w#$h0 + lsr $d2,$h1,#32 + mov w15,w#$h1 + lsr x16,$h2,#32 +#else + mov w#$d0,w#$h0 + lsr $d1,$h0,#32 + mov w#$d2,w#$h1 + lsr x15,$h1,#32 + mov w16,w#$h2 +#endif + + add $d0,$d0,$d1,lsl#26 // base 2^26 -> base 2^64 + lsr $d1,$d2,#12 + adds $d0,$d0,$d2,lsl#52 + add $d1,$d1,x15,lsl#14 + adc $d1,$d1,xzr + lsr $d2,x16,#24 + adds $d1,$d1,x16,lsl#40 + adc $d2,$d2,xzr + + cmp $r0,#0 // is_base2_26? + csel $h0,$h0,$d0,eq // choose between radixes + csel $h1,$h1,$d1,eq + csel $h2,$h2,$d2,eq + + adds $d0,$h0,#5 // compare to modulus + adcs $d1,$h1,xzr + adc $d2,$h2,xzr + + tst $d2,#-4 // see if it's carried/borrowed + + csel $h0,$h0,$d0,eq + csel $h1,$h1,$d1,eq + +#ifdef __AARCH64EB__ + ror $t0,$t0,#32 // flip nonce words + ror $t1,$t1,#32 +#endif + adds $h0,$h0,$t0 // accumulate nonce + adc $h1,$h1,$t1 +#ifdef __AARCH64EB__ + rev $h0,$h0 // flip output bytes + rev $h1,$h1 +#endif + stp $h0,$h1,[$mac] // write result + + ret +.size poly1305_emit,.-poly1305_emit +___ +my ($R0,$R1,$S1,$R2,$S2,$R3,$S3,$R4,$S4) = map("v$_.4s",(0..8)); +my ($IN01_0,$IN01_1,$IN01_2,$IN01_3,$IN01_4) = map("v$_.2s",(9..13)); +my ($IN23_0,$IN23_1,$IN23_2,$IN23_3,$IN23_4) = map("v$_.2s",(14..18)); +my ($ACC0,$ACC1,$ACC2,$ACC3,$ACC4) = map("v$_.2d",(19..23)); +my ($H0,$H1,$H2,$H3,$H4) = map("v$_.2s",(24..28)); +my ($T0,$T1,$MASK) = map("v$_",(29..31)); + +my ($in2,$zeros)=("x16","x17"); +my $is_base2_26 = $zeros; # borrow + +$code.=<<___; +.type poly1305_mult,%function +.align 5 +poly1305_mult: + mul $d0,$h0,$r0 // h0*r0 + umulh $d1,$h0,$r0 + + mul $t0,$h1,$s1 // h1*5*r1 + umulh $t1,$h1,$s1 + + adds $d0,$d0,$t0 + mul $t0,$h0,$r1 // h0*r1 + adc $d1,$d1,$t1 + umulh $d2,$h0,$r1 + + adds $d1,$d1,$t0 + mul $t0,$h1,$r0 // h1*r0 + adc $d2,$d2,xzr + umulh $t1,$h1,$r0 + + adds $d1,$d1,$t0 + mul $t0,$h2,$s1 // h2*5*r1 + adc $d2,$d2,$t1 + mul $t1,$h2,$r0 // h2*r0 + + adds $d1,$d1,$t0 + adc $d2,$d2,$t1 + + and $t0,$d2,#-4 // final reduction + and $h2,$d2,#3 + add $t0,$t0,$d2,lsr#2 + adds $h0,$d0,$t0 + adcs $h1,$d1,xzr + adc $h2,$h2,xzr + + ret +.size poly1305_mult,.-poly1305_mult + +.type poly1305_splat,%function +.align 4 +poly1305_splat: + and x12,$h0,#0x03ffffff // base 2^64 -> base 2^26 + ubfx x13,$h0,#26,#26 + extr x14,$h1,$h0,#52 + and x14,x14,#0x03ffffff + ubfx x15,$h1,#14,#26 + extr x16,$h2,$h1,#40 + + str w12,[$ctx,#16*0] // r0 + add w12,w13,w13,lsl#2 // r1*5 + str w13,[$ctx,#16*1] // r1 + add w13,w14,w14,lsl#2 // r2*5 + str w12,[$ctx,#16*2] // s1 + str w14,[$ctx,#16*3] // r2 + add w14,w15,w15,lsl#2 // r3*5 + str w13,[$ctx,#16*4] // s2 + str w15,[$ctx,#16*5] // r3 + add w15,w16,w16,lsl#2 // r4*5 + str w14,[$ctx,#16*6] // s3 + str w16,[$ctx,#16*7] // r4 + str w15,[$ctx,#16*8] // s4 + + ret +.size poly1305_splat,.-poly1305_splat + +#ifdef __KERNEL__ +.globl poly1305_blocks_neon +#endif +.type poly1305_blocks_neon,%function +.align 5 +poly1305_blocks_neon: +.Lpoly1305_blocks_neon: + ldr $is_base2_26,[$ctx,#24] + cmp $len,#128 + b.lo .Lpoly1305_blocks + + .inst 0xd503233f // paciasp + stp x29,x30,[sp,#-80]! + add x29,sp,#0 + + stp d8,d9,[sp,#16] // meet ABI requirements + stp d10,d11,[sp,#32] + stp d12,d13,[sp,#48] + stp d14,d15,[sp,#64] + + cbz $is_base2_26,.Lbase2_64_neon + + ldp w10,w11,[$ctx] // load hash value base 2^26 + ldp w12,w13,[$ctx,#8] + ldr w14,[$ctx,#16] + + tst $len,#31 + b.eq .Leven_neon + + ldp $r0,$r1,[$ctx,#32] // load key value + + add $h0,x10,x11,lsl#26 // base 2^26 -> base 2^64 + lsr $h1,x12,#12 + adds $h0,$h0,x12,lsl#52 + add $h1,$h1,x13,lsl#14 + adc $h1,$h1,xzr + lsr $h2,x14,#24 + adds $h1,$h1,x14,lsl#40 + adc $d2,$h2,xzr // can be partially reduced... + + ldp $d0,$d1,[$inp],#16 // load input + sub $len,$len,#16 + add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2) + +#ifdef __AARCH64EB__ + rev $d0,$d0 + rev $d1,$d1 +#endif + adds $h0,$h0,$d0 // accumulate input + adcs $h1,$h1,$d1 + adc $h2,$h2,$padbit + + bl poly1305_mult + + and x10,$h0,#0x03ffffff // base 2^64 -> base 2^26 + ubfx x11,$h0,#26,#26 + extr x12,$h1,$h0,#52 + and x12,x12,#0x03ffffff + ubfx x13,$h1,#14,#26 + extr x14,$h2,$h1,#40 + + b .Leven_neon + +.align 4 +.Lbase2_64_neon: + ldp $r0,$r1,[$ctx,#32] // load key value + + ldp $h0,$h1,[$ctx] // load hash value base 2^64 + ldr $h2,[$ctx,#16] + + tst $len,#31 + b.eq .Linit_neon + + ldp $d0,$d1,[$inp],#16 // load input + sub $len,$len,#16 + add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2) +#ifdef __AARCH64EB__ + rev $d0,$d0 + rev $d1,$d1 +#endif + adds $h0,$h0,$d0 // accumulate input + adcs $h1,$h1,$d1 + adc $h2,$h2,$padbit + + bl poly1305_mult + +.Linit_neon: + ldr w17,[$ctx,#48] // first table element + and x10,$h0,#0x03ffffff // base 2^64 -> base 2^26 + ubfx x11,$h0,#26,#26 + extr x12,$h1,$h0,#52 + and x12,x12,#0x03ffffff + ubfx x13,$h1,#14,#26 + extr x14,$h2,$h1,#40 + + cmp w17,#-1 // is value impossible? + b.ne .Leven_neon + + fmov ${H0},x10 + fmov ${H1},x11 + fmov ${H2},x12 + fmov ${H3},x13 + fmov ${H4},x14 + + ////////////////////////////////// initialize r^n table + mov $h0,$r0 // r^1 + add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2) + mov $h1,$r1 + mov $h2,xzr + add $ctx,$ctx,#48+12 + bl poly1305_splat + + bl poly1305_mult // r^2 + sub $ctx,$ctx,#4 + bl poly1305_splat + + bl poly1305_mult // r^3 + sub $ctx,$ctx,#4 + bl poly1305_splat + + bl poly1305_mult // r^4 + sub $ctx,$ctx,#4 + bl poly1305_splat + sub $ctx,$ctx,#48 // restore original $ctx + b .Ldo_neon + +.align 4 +.Leven_neon: + fmov ${H0},x10 + fmov ${H1},x11 + fmov ${H2},x12 + fmov ${H3},x13 + fmov ${H4},x14 + +.Ldo_neon: + ldp x8,x12,[$inp,#32] // inp[2:3] + subs $len,$len,#64 + ldp x9,x13,[$inp,#48] + add $in2,$inp,#96 + adr $zeros,.Lzeros + + lsl $padbit,$padbit,#24 + add x15,$ctx,#48 + +#ifdef __AARCH64EB__ + rev x8,x8 + rev x12,x12 + rev x9,x9 + rev x13,x13 +#endif + and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 + and x5,x9,#0x03ffffff + ubfx x6,x8,#26,#26 + ubfx x7,x9,#26,#26 + add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 + extr x8,x12,x8,#52 + extr x9,x13,x9,#52 + add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 + fmov $IN23_0,x4 + and x8,x8,#0x03ffffff + and x9,x9,#0x03ffffff + ubfx x10,x12,#14,#26 + ubfx x11,x13,#14,#26 + add x12,$padbit,x12,lsr#40 + add x13,$padbit,x13,lsr#40 + add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 + fmov $IN23_1,x6 + add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 + add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 + fmov $IN23_2,x8 + fmov $IN23_3,x10 + fmov $IN23_4,x12 + + ldp x8,x12,[$inp],#16 // inp[0:1] + ldp x9,x13,[$inp],#48 + + ld1 {$R0,$R1,$S1,$R2},[x15],#64 + ld1 {$S2,$R3,$S3,$R4},[x15],#64 + ld1 {$S4},[x15] + +#ifdef __AARCH64EB__ + rev x8,x8 + rev x12,x12 + rev x9,x9 + rev x13,x13 +#endif + and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 + and x5,x9,#0x03ffffff + ubfx x6,x8,#26,#26 + ubfx x7,x9,#26,#26 + add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 + extr x8,x12,x8,#52 + extr x9,x13,x9,#52 + add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 + fmov $IN01_0,x4 + and x8,x8,#0x03ffffff + and x9,x9,#0x03ffffff + ubfx x10,x12,#14,#26 + ubfx x11,x13,#14,#26 + add x12,$padbit,x12,lsr#40 + add x13,$padbit,x13,lsr#40 + add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 + fmov $IN01_1,x6 + add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 + add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 + movi $MASK.2d,#-1 + fmov $IN01_2,x8 + fmov $IN01_3,x10 + fmov $IN01_4,x12 + ushr $MASK.2d,$MASK.2d,#38 + + b.ls .Lskip_loop + +.align 4 +.Loop_neon: + //////////////////////////////////////////////////////////////// + // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2 + // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r + // \___________________/ + // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2 + // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r + // \___________________/ \____________________/ + // + // Note that we start with inp[2:3]*r^2. This is because it + // doesn't depend on reduction in previous iteration. + //////////////////////////////////////////////////////////////// + // d4 = h0*r4 + h1*r3 + h2*r2 + h3*r1 + h4*r0 + // d3 = h0*r3 + h1*r2 + h2*r1 + h3*r0 + h4*5*r4 + // d2 = h0*r2 + h1*r1 + h2*r0 + h3*5*r4 + h4*5*r3 + // d1 = h0*r1 + h1*r0 + h2*5*r4 + h3*5*r3 + h4*5*r2 + // d0 = h0*r0 + h1*5*r4 + h2*5*r3 + h3*5*r2 + h4*5*r1 + + subs $len,$len,#64 + umull $ACC4,$IN23_0,${R4}[2] + csel $in2,$zeros,$in2,lo + umull $ACC3,$IN23_0,${R3}[2] + umull $ACC2,$IN23_0,${R2}[2] + ldp x8,x12,[$in2],#16 // inp[2:3] (or zero) + umull $ACC1,$IN23_0,${R1}[2] + ldp x9,x13,[$in2],#48 + umull $ACC0,$IN23_0,${R0}[2] +#ifdef __AARCH64EB__ + rev x8,x8 + rev x12,x12 + rev x9,x9 + rev x13,x13 +#endif + + umlal $ACC4,$IN23_1,${R3}[2] + and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 + umlal $ACC3,$IN23_1,${R2}[2] + and x5,x9,#0x03ffffff + umlal $ACC2,$IN23_1,${R1}[2] + ubfx x6,x8,#26,#26 + umlal $ACC1,$IN23_1,${R0}[2] + ubfx x7,x9,#26,#26 + umlal $ACC0,$IN23_1,${S4}[2] + add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 + + umlal $ACC4,$IN23_2,${R2}[2] + extr x8,x12,x8,#52 + umlal $ACC3,$IN23_2,${R1}[2] + extr x9,x13,x9,#52 + umlal $ACC2,$IN23_2,${R0}[2] + add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 + umlal $ACC1,$IN23_2,${S4}[2] + fmov $IN23_0,x4 + umlal $ACC0,$IN23_2,${S3}[2] + and x8,x8,#0x03ffffff + + umlal $ACC4,$IN23_3,${R1}[2] + and x9,x9,#0x03ffffff + umlal $ACC3,$IN23_3,${R0}[2] + ubfx x10,x12,#14,#26 + umlal $ACC2,$IN23_3,${S4}[2] + ubfx x11,x13,#14,#26 + umlal $ACC1,$IN23_3,${S3}[2] + add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 + umlal $ACC0,$IN23_3,${S2}[2] + fmov $IN23_1,x6 + + add $IN01_2,$IN01_2,$H2 + add x12,$padbit,x12,lsr#40 + umlal $ACC4,$IN23_4,${R0}[2] + add x13,$padbit,x13,lsr#40 + umlal $ACC3,$IN23_4,${S4}[2] + add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 + umlal $ACC2,$IN23_4,${S3}[2] + add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 + umlal $ACC1,$IN23_4,${S2}[2] + fmov $IN23_2,x8 + umlal $ACC0,$IN23_4,${S1}[2] + fmov $IN23_3,x10 + + //////////////////////////////////////////////////////////////// + // (hash+inp[0:1])*r^4 and accumulate + + add $IN01_0,$IN01_0,$H0 + fmov $IN23_4,x12 + umlal $ACC3,$IN01_2,${R1}[0] + ldp x8,x12,[$inp],#16 // inp[0:1] + umlal $ACC0,$IN01_2,${S3}[0] + ldp x9,x13,[$inp],#48 + umlal $ACC4,$IN01_2,${R2}[0] + umlal $ACC1,$IN01_2,${S4}[0] + umlal $ACC2,$IN01_2,${R0}[0] +#ifdef __AARCH64EB__ + rev x8,x8 + rev x12,x12 + rev x9,x9 + rev x13,x13 +#endif + + add $IN01_1,$IN01_1,$H1 + umlal $ACC3,$IN01_0,${R3}[0] + umlal $ACC4,$IN01_0,${R4}[0] + and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 + umlal $ACC2,$IN01_0,${R2}[0] + and x5,x9,#0x03ffffff + umlal $ACC0,$IN01_0,${R0}[0] + ubfx x6,x8,#26,#26 + umlal $ACC1,$IN01_0,${R1}[0] + ubfx x7,x9,#26,#26 + + add $IN01_3,$IN01_3,$H3 + add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 + umlal $ACC3,$IN01_1,${R2}[0] + extr x8,x12,x8,#52 + umlal $ACC4,$IN01_1,${R3}[0] + extr x9,x13,x9,#52 + umlal $ACC0,$IN01_1,${S4}[0] + add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 + umlal $ACC2,$IN01_1,${R1}[0] + fmov $IN01_0,x4 + umlal $ACC1,$IN01_1,${R0}[0] + and x8,x8,#0x03ffffff + + add $IN01_4,$IN01_4,$H4 + and x9,x9,#0x03ffffff + umlal $ACC3,$IN01_3,${R0}[0] + ubfx x10,x12,#14,#26 + umlal $ACC0,$IN01_3,${S2}[0] + ubfx x11,x13,#14,#26 + umlal $ACC4,$IN01_3,${R1}[0] + add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 + umlal $ACC1,$IN01_3,${S3}[0] + fmov $IN01_1,x6 + umlal $ACC2,$IN01_3,${S4}[0] + add x12,$padbit,x12,lsr#40 + + umlal $ACC3,$IN01_4,${S4}[0] + add x13,$padbit,x13,lsr#40 + umlal $ACC0,$IN01_4,${S1}[0] + add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 + umlal $ACC4,$IN01_4,${R0}[0] + add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 + umlal $ACC1,$IN01_4,${S2}[0] + fmov $IN01_2,x8 + umlal $ACC2,$IN01_4,${S3}[0] + fmov $IN01_3,x10 + fmov $IN01_4,x12 + + ///////////////////////////////////////////////////////////////// + // lazy reduction as discussed in "NEON crypto" by D.J. Bernstein + // and P. Schwabe + // + // [see discussion in poly1305-armv4 module] + + ushr $T0.2d,$ACC3,#26 + xtn $H3,$ACC3 + ushr $T1.2d,$ACC0,#26 + and $ACC0,$ACC0,$MASK.2d + add $ACC4,$ACC4,$T0.2d // h3 -> h4 + bic $H3,#0xfc,lsl#24 // &=0x03ffffff + add $ACC1,$ACC1,$T1.2d // h0 -> h1 + + ushr $T0.2d,$ACC4,#26 + xtn $H4,$ACC4 + ushr $T1.2d,$ACC1,#26 + xtn $H1,$ACC1 + bic $H4,#0xfc,lsl#24 + add $ACC2,$ACC2,$T1.2d // h1 -> h2 + + add $ACC0,$ACC0,$T0.2d + shl $T0.2d,$T0.2d,#2 + shrn $T1.2s,$ACC2,#26 + xtn $H2,$ACC2 + add $ACC0,$ACC0,$T0.2d // h4 -> h0 + bic $H1,#0xfc,lsl#24 + add $H3,$H3,$T1.2s // h2 -> h3 + bic $H2,#0xfc,lsl#24 + + shrn $T0.2s,$ACC0,#26 + xtn $H0,$ACC0 + ushr $T1.2s,$H3,#26 + bic $H3,#0xfc,lsl#24 + bic $H0,#0xfc,lsl#24 + add $H1,$H1,$T0.2s // h0 -> h1 + add $H4,$H4,$T1.2s // h3 -> h4 + + b.hi .Loop_neon + +.Lskip_loop: + dup $IN23_2,${IN23_2}[0] + add $IN01_2,$IN01_2,$H2 + + //////////////////////////////////////////////////////////////// + // multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1 + + adds $len,$len,#32 + b.ne .Long_tail + + dup $IN23_2,${IN01_2}[0] + add $IN23_0,$IN01_0,$H0 + add $IN23_3,$IN01_3,$H3 + add $IN23_1,$IN01_1,$H1 + add $IN23_4,$IN01_4,$H4 + +.Long_tail: + dup $IN23_0,${IN23_0}[0] + umull2 $ACC0,$IN23_2,${S3} + umull2 $ACC3,$IN23_2,${R1} + umull2 $ACC4,$IN23_2,${R2} + umull2 $ACC2,$IN23_2,${R0} + umull2 $ACC1,$IN23_2,${S4} + + dup $IN23_1,${IN23_1}[0] + umlal2 $ACC0,$IN23_0,${R0} + umlal2 $ACC2,$IN23_0,${R2} + umlal2 $ACC3,$IN23_0,${R3} + umlal2 $ACC4,$IN23_0,${R4} + umlal2 $ACC1,$IN23_0,${R1} + + dup $IN23_3,${IN23_3}[0] + umlal2 $ACC0,$IN23_1,${S4} + umlal2 $ACC3,$IN23_1,${R2} + umlal2 $ACC2,$IN23_1,${R1} + umlal2 $ACC4,$IN23_1,${R3} + umlal2 $ACC1,$IN23_1,${R0} + + dup $IN23_4,${IN23_4}[0] + umlal2 $ACC3,$IN23_3,${R0} + umlal2 $ACC4,$IN23_3,${R1} + umlal2 $ACC0,$IN23_3,${S2} + umlal2 $ACC1,$IN23_3,${S3} + umlal2 $ACC2,$IN23_3,${S4} + + umlal2 $ACC3,$IN23_4,${S4} + umlal2 $ACC0,$IN23_4,${S1} + umlal2 $ACC4,$IN23_4,${R0} + umlal2 $ACC1,$IN23_4,${S2} + umlal2 $ACC2,$IN23_4,${S3} + + b.eq .Lshort_tail + + //////////////////////////////////////////////////////////////// + // (hash+inp[0:1])*r^4:r^3 and accumulate + + add $IN01_0,$IN01_0,$H0 + umlal $ACC3,$IN01_2,${R1} + umlal $ACC0,$IN01_2,${S3} + umlal $ACC4,$IN01_2,${R2} + umlal $ACC1,$IN01_2,${S4} + umlal $ACC2,$IN01_2,${R0} + + add $IN01_1,$IN01_1,$H1 + umlal $ACC3,$IN01_0,${R3} + umlal $ACC0,$IN01_0,${R0} + umlal $ACC4,$IN01_0,${R4} + umlal $ACC1,$IN01_0,${R1} + umlal $ACC2,$IN01_0,${R2} + + add $IN01_3,$IN01_3,$H3 + umlal $ACC3,$IN01_1,${R2} + umlal $ACC0,$IN01_1,${S4} + umlal $ACC4,$IN01_1,${R3} + umlal $ACC1,$IN01_1,${R0} + umlal $ACC2,$IN01_1,${R1} + + add $IN01_4,$IN01_4,$H4 + umlal $ACC3,$IN01_3,${R0} + umlal $ACC0,$IN01_3,${S2} + umlal $ACC4,$IN01_3,${R1} + umlal $ACC1,$IN01_3,${S3} + umlal $ACC2,$IN01_3,${S4} + + umlal $ACC3,$IN01_4,${S4} + umlal $ACC0,$IN01_4,${S1} + umlal $ACC4,$IN01_4,${R0} + umlal $ACC1,$IN01_4,${S2} + umlal $ACC2,$IN01_4,${S3} + +.Lshort_tail: + //////////////////////////////////////////////////////////////// + // horizontal add + + addp $ACC3,$ACC3,$ACC3 + ldp d8,d9,[sp,#16] // meet ABI requirements + addp $ACC0,$ACC0,$ACC0 + ldp d10,d11,[sp,#32] + addp $ACC4,$ACC4,$ACC4 + ldp d12,d13,[sp,#48] + addp $ACC1,$ACC1,$ACC1 + ldp d14,d15,[sp,#64] + addp $ACC2,$ACC2,$ACC2 + ldr x30,[sp,#8] + .inst 0xd50323bf // autiasp + + //////////////////////////////////////////////////////////////// + // lazy reduction, but without narrowing + + ushr $T0.2d,$ACC3,#26 + and $ACC3,$ACC3,$MASK.2d + ushr $T1.2d,$ACC0,#26 + and $ACC0,$ACC0,$MASK.2d + + add $ACC4,$ACC4,$T0.2d // h3 -> h4 + add $ACC1,$ACC1,$T1.2d // h0 -> h1 + + ushr $T0.2d,$ACC4,#26 + and $ACC4,$ACC4,$MASK.2d + ushr $T1.2d,$ACC1,#26 + and $ACC1,$ACC1,$MASK.2d + add $ACC2,$ACC2,$T1.2d // h1 -> h2 + + add $ACC0,$ACC0,$T0.2d + shl $T0.2d,$T0.2d,#2 + ushr $T1.2d,$ACC2,#26 + and $ACC2,$ACC2,$MASK.2d + add $ACC0,$ACC0,$T0.2d // h4 -> h0 + add $ACC3,$ACC3,$T1.2d // h2 -> h3 + + ushr $T0.2d,$ACC0,#26 + and $ACC0,$ACC0,$MASK.2d + ushr $T1.2d,$ACC3,#26 + and $ACC3,$ACC3,$MASK.2d + add $ACC1,$ACC1,$T0.2d // h0 -> h1 + add $ACC4,$ACC4,$T1.2d // h3 -> h4 + + //////////////////////////////////////////////////////////////// + // write the result, can be partially reduced + + st4 {$ACC0,$ACC1,$ACC2,$ACC3}[0],[$ctx],#16 + mov x4,#1 + st1 {$ACC4}[0],[$ctx] + str x4,[$ctx,#8] // set is_base2_26 + + ldr x29,[sp],#80 + ret +.size poly1305_blocks_neon,.-poly1305_blocks_neon + +.align 5 +.Lzeros: +.long 0,0,0,0,0,0,0,0 +.asciz "Poly1305 for ARMv8, CRYPTOGAMS by \@dot-asm" +.align 2 +#if !defined(__KERNEL__) && !defined(_WIN64) +.comm OPENSSL_armcap_P,4,4 +.hidden OPENSSL_armcap_P +#endif +___ + +foreach (split("\n",$code)) { + s/\b(shrn\s+v[0-9]+)\.[24]d/$1.2s/ or + s/\b(fmov\s+)v([0-9]+)[^,]*,\s*x([0-9]+)/$1d$2,x$3/ or + (m/\bdup\b/ and (s/\.[24]s/.2d/g or 1)) or + (m/\b(eor|and)/ and (s/\.[248][sdh]/.16b/g or 1)) or + (m/\bum(ul|la)l\b/ and (s/\.4s/.2s/g or 1)) or + (m/\bum(ul|la)l2\b/ and (s/\.2s/.4s/g or 1)) or + (m/\bst[1-4]\s+{[^}]+}\[/ and (s/\.[24]d/.s/g or 1)); + + s/\.[124]([sd])\[/.$1\[/; + s/w#x([0-9]+)/w$1/g; + + print $_,"\n"; +} +close STDOUT; diff --git a/arch/arm64/crypto/poly1305-core.S_shipped b/arch/arm64/crypto/poly1305-core.S_shipped new file mode 100644 index 000000000000..8d1c4e420ccd --- /dev/null +++ b/arch/arm64/crypto/poly1305-core.S_shipped @@ -0,0 +1,835 @@ +#ifndef __KERNEL__ +# include "arm_arch.h" +.extern OPENSSL_armcap_P +#endif + +.text + +// forward "declarations" are required for Apple +.globl poly1305_blocks +.globl poly1305_emit + +.globl poly1305_init +.type poly1305_init,%function +.align 5 +poly1305_init: + cmp x1,xzr + stp xzr,xzr,[x0] // zero hash value + stp xzr,xzr,[x0,#16] // [along with is_base2_26] + + csel x0,xzr,x0,eq + b.eq .Lno_key + +#ifndef __KERNEL__ + adrp x17,OPENSSL_armcap_P + ldr w17,[x17,#:lo12:OPENSSL_armcap_P] +#endif + + ldp x7,x8,[x1] // load key + mov x9,#0xfffffffc0fffffff + movk x9,#0x0fff,lsl#48 +#ifdef __AARCH64EB__ + rev x7,x7 // flip bytes + rev x8,x8 +#endif + and x7,x7,x9 // &=0ffffffc0fffffff + and x9,x9,#-4 + and x8,x8,x9 // &=0ffffffc0ffffffc + mov w9,#-1 + stp x7,x8,[x0,#32] // save key value + str w9,[x0,#48] // impossible key power value + +#ifndef __KERNEL__ + tst w17,#ARMV7_NEON + + adr x12,.Lpoly1305_blocks + adr x7,.Lpoly1305_blocks_neon + adr x13,.Lpoly1305_emit + + csel x12,x12,x7,eq + +# ifdef __ILP32__ + stp w12,w13,[x2] +# else + stp x12,x13,[x2] +# endif +#endif + mov x0,#1 +.Lno_key: + ret +.size poly1305_init,.-poly1305_init + +.type poly1305_blocks,%function +.align 5 +poly1305_blocks: +.Lpoly1305_blocks: + ands x2,x2,#-16 + b.eq .Lno_data + + ldp x4,x5,[x0] // load hash value + ldp x6,x17,[x0,#16] // [along with is_base2_26] + ldp x7,x8,[x0,#32] // load key value + +#ifdef __AARCH64EB__ + lsr x12,x4,#32 + mov w13,w4 + lsr x14,x5,#32 + mov w15,w5 + lsr x16,x6,#32 +#else + mov w12,w4 + lsr x13,x4,#32 + mov w14,w5 + lsr x15,x5,#32 + mov w16,w6 +#endif + + add x12,x12,x13,lsl#26 // base 2^26 -> base 2^64 + lsr x13,x14,#12 + adds x12,x12,x14,lsl#52 + add x13,x13,x15,lsl#14 + adc x13,x13,xzr + lsr x14,x16,#24 + adds x13,x13,x16,lsl#40 + adc x14,x14,xzr + + cmp x17,#0 // is_base2_26? + add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2) + csel x4,x4,x12,eq // choose between radixes + csel x5,x5,x13,eq + csel x6,x6,x14,eq + +.Loop: + ldp x10,x11,[x1],#16 // load input + sub x2,x2,#16 +#ifdef __AARCH64EB__ + rev x10,x10 + rev x11,x11 +#endif + adds x4,x4,x10 // accumulate input + adcs x5,x5,x11 + + mul x12,x4,x7 // h0*r0 + adc x6,x6,x3 + umulh x13,x4,x7 + + mul x10,x5,x9 // h1*5*r1 + umulh x11,x5,x9 + + adds x12,x12,x10 + mul x10,x4,x8 // h0*r1 + adc x13,x13,x11 + umulh x14,x4,x8 + + adds x13,x13,x10 + mul x10,x5,x7 // h1*r0 + adc x14,x14,xzr + umulh x11,x5,x7 + + adds x13,x13,x10 + mul x10,x6,x9 // h2*5*r1 + adc x14,x14,x11 + mul x11,x6,x7 // h2*r0 + + adds x13,x13,x10 + adc x14,x14,x11 + + and x10,x14,#-4 // final reduction + and x6,x14,#3 + add x10,x10,x14,lsr#2 + adds x4,x12,x10 + adcs x5,x13,xzr + adc x6,x6,xzr + + cbnz x2,.Loop + + stp x4,x5,[x0] // store hash value + stp x6,xzr,[x0,#16] // [and clear is_base2_26] + +.Lno_data: + ret +.size poly1305_blocks,.-poly1305_blocks + +.type poly1305_emit,%function +.align 5 +poly1305_emit: +.Lpoly1305_emit: + ldp x4,x5,[x0] // load hash base 2^64 + ldp x6,x7,[x0,#16] // [along with is_base2_26] + ldp x10,x11,[x2] // load nonce + +#ifdef __AARCH64EB__ + lsr x12,x4,#32 + mov w13,w4 + lsr x14,x5,#32 + mov w15,w5 + lsr x16,x6,#32 +#else + mov w12,w4 + lsr x13,x4,#32 + mov w14,w5 + lsr x15,x5,#32 + mov w16,w6 +#endif + + add x12,x12,x13,lsl#26 // base 2^26 -> base 2^64 + lsr x13,x14,#12 + adds x12,x12,x14,lsl#52 + add x13,x13,x15,lsl#14 + adc x13,x13,xzr + lsr x14,x16,#24 + adds x13,x13,x16,lsl#40 + adc x14,x14,xzr + + cmp x7,#0 // is_base2_26? + csel x4,x4,x12,eq // choose between radixes + csel x5,x5,x13,eq + csel x6,x6,x14,eq + + adds x12,x4,#5 // compare to modulus + adcs x13,x5,xzr + adc x14,x6,xzr + + tst x14,#-4 // see if it's carried/borrowed + + csel x4,x4,x12,eq + csel x5,x5,x13,eq + +#ifdef __AARCH64EB__ + ror x10,x10,#32 // flip nonce words + ror x11,x11,#32 +#endif + adds x4,x4,x10 // accumulate nonce + adc x5,x5,x11 +#ifdef __AARCH64EB__ + rev x4,x4 // flip output bytes + rev x5,x5 +#endif + stp x4,x5,[x1] // write result + + ret +.size poly1305_emit,.-poly1305_emit +.type poly1305_mult,%function +.align 5 +poly1305_mult: + mul x12,x4,x7 // h0*r0 + umulh x13,x4,x7 + + mul x10,x5,x9 // h1*5*r1 + umulh x11,x5,x9 + + adds x12,x12,x10 + mul x10,x4,x8 // h0*r1 + adc x13,x13,x11 + umulh x14,x4,x8 + + adds x13,x13,x10 + mul x10,x5,x7 // h1*r0 + adc x14,x14,xzr + umulh x11,x5,x7 + + adds x13,x13,x10 + mul x10,x6,x9 // h2*5*r1 + adc x14,x14,x11 + mul x11,x6,x7 // h2*r0 + + adds x13,x13,x10 + adc x14,x14,x11 + + and x10,x14,#-4 // final reduction + and x6,x14,#3 + add x10,x10,x14,lsr#2 + adds x4,x12,x10 + adcs x5,x13,xzr + adc x6,x6,xzr + + ret +.size poly1305_mult,.-poly1305_mult + +.type poly1305_splat,%function +.align 4 +poly1305_splat: + and x12,x4,#0x03ffffff // base 2^64 -> base 2^26 + ubfx x13,x4,#26,#26 + extr x14,x5,x4,#52 + and x14,x14,#0x03ffffff + ubfx x15,x5,#14,#26 + extr x16,x6,x5,#40 + + str w12,[x0,#16*0] // r0 + add w12,w13,w13,lsl#2 // r1*5 + str w13,[x0,#16*1] // r1 + add w13,w14,w14,lsl#2 // r2*5 + str w12,[x0,#16*2] // s1 + str w14,[x0,#16*3] // r2 + add w14,w15,w15,lsl#2 // r3*5 + str w13,[x0,#16*4] // s2 + str w15,[x0,#16*5] // r3 + add w15,w16,w16,lsl#2 // r4*5 + str w14,[x0,#16*6] // s3 + str w16,[x0,#16*7] // r4 + str w15,[x0,#16*8] // s4 + + ret +.size poly1305_splat,.-poly1305_splat + +#ifdef __KERNEL__ +.globl poly1305_blocks_neon +#endif +.type poly1305_blocks_neon,%function +.align 5 +poly1305_blocks_neon: +.Lpoly1305_blocks_neon: + ldr x17,[x0,#24] + cmp x2,#128 + b.lo .Lpoly1305_blocks + + .inst 0xd503233f // paciasp + stp x29,x30,[sp,#-80]! + add x29,sp,#0 + + stp d8,d9,[sp,#16] // meet ABI requirements + stp d10,d11,[sp,#32] + stp d12,d13,[sp,#48] + stp d14,d15,[sp,#64] + + cbz x17,.Lbase2_64_neon + + ldp w10,w11,[x0] // load hash value base 2^26 + ldp w12,w13,[x0,#8] + ldr w14,[x0,#16] + + tst x2,#31 + b.eq .Leven_neon + + ldp x7,x8,[x0,#32] // load key value + + add x4,x10,x11,lsl#26 // base 2^26 -> base 2^64 + lsr x5,x12,#12 + adds x4,x4,x12,lsl#52 + add x5,x5,x13,lsl#14 + adc x5,x5,xzr + lsr x6,x14,#24 + adds x5,x5,x14,lsl#40 + adc x14,x6,xzr // can be partially reduced... + + ldp x12,x13,[x1],#16 // load input + sub x2,x2,#16 + add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2) + +#ifdef __AARCH64EB__ + rev x12,x12 + rev x13,x13 +#endif + adds x4,x4,x12 // accumulate input + adcs x5,x5,x13 + adc x6,x6,x3 + + bl poly1305_mult + + and x10,x4,#0x03ffffff // base 2^64 -> base 2^26 + ubfx x11,x4,#26,#26 + extr x12,x5,x4,#52 + and x12,x12,#0x03ffffff + ubfx x13,x5,#14,#26 + extr x14,x6,x5,#40 + + b .Leven_neon + +.align 4 +.Lbase2_64_neon: + ldp x7,x8,[x0,#32] // load key value + + ldp x4,x5,[x0] // load hash value base 2^64 + ldr x6,[x0,#16] + + tst x2,#31 + b.eq .Linit_neon + + ldp x12,x13,[x1],#16 // load input + sub x2,x2,#16 + add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2) +#ifdef __AARCH64EB__ + rev x12,x12 + rev x13,x13 +#endif + adds x4,x4,x12 // accumulate input + adcs x5,x5,x13 + adc x6,x6,x3 + + bl poly1305_mult + +.Linit_neon: + ldr w17,[x0,#48] // first table element + and x10,x4,#0x03ffffff // base 2^64 -> base 2^26 + ubfx x11,x4,#26,#26 + extr x12,x5,x4,#52 + and x12,x12,#0x03ffffff + ubfx x13,x5,#14,#26 + extr x14,x6,x5,#40 + + cmp w17,#-1 // is value impossible? + b.ne .Leven_neon + + fmov d24,x10 + fmov d25,x11 + fmov d26,x12 + fmov d27,x13 + fmov d28,x14 + + ////////////////////////////////// initialize r^n table + mov x4,x7 // r^1 + add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2) + mov x5,x8 + mov x6,xzr + add x0,x0,#48+12 + bl poly1305_splat + + bl poly1305_mult // r^2 + sub x0,x0,#4 + bl poly1305_splat + + bl poly1305_mult // r^3 + sub x0,x0,#4 + bl poly1305_splat + + bl poly1305_mult // r^4 + sub x0,x0,#4 + bl poly1305_splat + sub x0,x0,#48 // restore original x0 + b .Ldo_neon + +.align 4 +.Leven_neon: + fmov d24,x10 + fmov d25,x11 + fmov d26,x12 + fmov d27,x13 + fmov d28,x14 + +.Ldo_neon: + ldp x8,x12,[x1,#32] // inp[2:3] + subs x2,x2,#64 + ldp x9,x13,[x1,#48] + add x16,x1,#96 + adr x17,.Lzeros + + lsl x3,x3,#24 + add x15,x0,#48 + +#ifdef __AARCH64EB__ + rev x8,x8 + rev x12,x12 + rev x9,x9 + rev x13,x13 +#endif + and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 + and x5,x9,#0x03ffffff + ubfx x6,x8,#26,#26 + ubfx x7,x9,#26,#26 + add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 + extr x8,x12,x8,#52 + extr x9,x13,x9,#52 + add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 + fmov d14,x4 + and x8,x8,#0x03ffffff + and x9,x9,#0x03ffffff + ubfx x10,x12,#14,#26 + ubfx x11,x13,#14,#26 + add x12,x3,x12,lsr#40 + add x13,x3,x13,lsr#40 + add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 + fmov d15,x6 + add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 + add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 + fmov d16,x8 + fmov d17,x10 + fmov d18,x12 + + ldp x8,x12,[x1],#16 // inp[0:1] + ldp x9,x13,[x1],#48 + + ld1 {v0.4s,v1.4s,v2.4s,v3.4s},[x15],#64 + ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x15],#64 + ld1 {v8.4s},[x15] + +#ifdef __AARCH64EB__ + rev x8,x8 + rev x12,x12 + rev x9,x9 + rev x13,x13 +#endif + and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 + and x5,x9,#0x03ffffff + ubfx x6,x8,#26,#26 + ubfx x7,x9,#26,#26 + add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 + extr x8,x12,x8,#52 + extr x9,x13,x9,#52 + add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 + fmov d9,x4 + and x8,x8,#0x03ffffff + and x9,x9,#0x03ffffff + ubfx x10,x12,#14,#26 + ubfx x11,x13,#14,#26 + add x12,x3,x12,lsr#40 + add x13,x3,x13,lsr#40 + add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 + fmov d10,x6 + add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 + add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 + movi v31.2d,#-1 + fmov d11,x8 + fmov d12,x10 + fmov d13,x12 + ushr v31.2d,v31.2d,#38 + + b.ls .Lskip_loop + +.align 4 +.Loop_neon: + //////////////////////////////////////////////////////////////// + // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2 + // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r + // ___________________/ + // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2 + // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r + // ___________________/ ____________________/ + // + // Note that we start with inp[2:3]*r^2. This is because it + // doesn't depend on reduction in previous iteration. + //////////////////////////////////////////////////////////////// + // d4 = h0*r4 + h1*r3 + h2*r2 + h3*r1 + h4*r0 + // d3 = h0*r3 + h1*r2 + h2*r1 + h3*r0 + h4*5*r4 + // d2 = h0*r2 + h1*r1 + h2*r0 + h3*5*r4 + h4*5*r3 + // d1 = h0*r1 + h1*r0 + h2*5*r4 + h3*5*r3 + h4*5*r2 + // d0 = h0*r0 + h1*5*r4 + h2*5*r3 + h3*5*r2 + h4*5*r1 + + subs x2,x2,#64 + umull v23.2d,v14.2s,v7.s[2] + csel x16,x17,x16,lo + umull v22.2d,v14.2s,v5.s[2] + umull v21.2d,v14.2s,v3.s[2] + ldp x8,x12,[x16],#16 // inp[2:3] (or zero) + umull v20.2d,v14.2s,v1.s[2] + ldp x9,x13,[x16],#48 + umull v19.2d,v14.2s,v0.s[2] +#ifdef __AARCH64EB__ + rev x8,x8 + rev x12,x12 + rev x9,x9 + rev x13,x13 +#endif + + umlal v23.2d,v15.2s,v5.s[2] + and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 + umlal v22.2d,v15.2s,v3.s[2] + and x5,x9,#0x03ffffff + umlal v21.2d,v15.2s,v1.s[2] + ubfx x6,x8,#26,#26 + umlal v20.2d,v15.2s,v0.s[2] + ubfx x7,x9,#26,#26 + umlal v19.2d,v15.2s,v8.s[2] + add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 + + umlal v23.2d,v16.2s,v3.s[2] + extr x8,x12,x8,#52 + umlal v22.2d,v16.2s,v1.s[2] + extr x9,x13,x9,#52 + umlal v21.2d,v16.2s,v0.s[2] + add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 + umlal v20.2d,v16.2s,v8.s[2] + fmov d14,x4 + umlal v19.2d,v16.2s,v6.s[2] + and x8,x8,#0x03ffffff + + umlal v23.2d,v17.2s,v1.s[2] + and x9,x9,#0x03ffffff + umlal v22.2d,v17.2s,v0.s[2] + ubfx x10,x12,#14,#26 + umlal v21.2d,v17.2s,v8.s[2] + ubfx x11,x13,#14,#26 + umlal v20.2d,v17.2s,v6.s[2] + add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 + umlal v19.2d,v17.2s,v4.s[2] + fmov d15,x6 + + add v11.2s,v11.2s,v26.2s + add x12,x3,x12,lsr#40 + umlal v23.2d,v18.2s,v0.s[2] + add x13,x3,x13,lsr#40 + umlal v22.2d,v18.2s,v8.s[2] + add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 + umlal v21.2d,v18.2s,v6.s[2] + add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 + umlal v20.2d,v18.2s,v4.s[2] + fmov d16,x8 + umlal v19.2d,v18.2s,v2.s[2] + fmov d17,x10 + + //////////////////////////////////////////////////////////////// + // (hash+inp[0:1])*r^4 and accumulate + + add v9.2s,v9.2s,v24.2s + fmov d18,x12 + umlal v22.2d,v11.2s,v1.s[0] + ldp x8,x12,[x1],#16 // inp[0:1] + umlal v19.2d,v11.2s,v6.s[0] + ldp x9,x13,[x1],#48 + umlal v23.2d,v11.2s,v3.s[0] + umlal v20.2d,v11.2s,v8.s[0] + umlal v21.2d,v11.2s,v0.s[0] +#ifdef __AARCH64EB__ + rev x8,x8 + rev x12,x12 + rev x9,x9 + rev x13,x13 +#endif + + add v10.2s,v10.2s,v25.2s + umlal v22.2d,v9.2s,v5.s[0] + umlal v23.2d,v9.2s,v7.s[0] + and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 + umlal v21.2d,v9.2s,v3.s[0] + and x5,x9,#0x03ffffff + umlal v19.2d,v9.2s,v0.s[0] + ubfx x6,x8,#26,#26 + umlal v20.2d,v9.2s,v1.s[0] + ubfx x7,x9,#26,#26 + + add v12.2s,v12.2s,v27.2s + add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 + umlal v22.2d,v10.2s,v3.s[0] + extr x8,x12,x8,#52 + umlal v23.2d,v10.2s,v5.s[0] + extr x9,x13,x9,#52 + umlal v19.2d,v10.2s,v8.s[0] + add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 + umlal v21.2d,v10.2s,v1.s[0] + fmov d9,x4 + umlal v20.2d,v10.2s,v0.s[0] + and x8,x8,#0x03ffffff + + add v13.2s,v13.2s,v28.2s + and x9,x9,#0x03ffffff + umlal v22.2d,v12.2s,v0.s[0] + ubfx x10,x12,#14,#26 + umlal v19.2d,v12.2s,v4.s[0] + ubfx x11,x13,#14,#26 + umlal v23.2d,v12.2s,v1.s[0] + add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 + umlal v20.2d,v12.2s,v6.s[0] + fmov d10,x6 + umlal v21.2d,v12.2s,v8.s[0] + add x12,x3,x12,lsr#40 + + umlal v22.2d,v13.2s,v8.s[0] + add x13,x3,x13,lsr#40 + umlal v19.2d,v13.2s,v2.s[0] + add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 + umlal v23.2d,v13.2s,v0.s[0] + add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 + umlal v20.2d,v13.2s,v4.s[0] + fmov d11,x8 + umlal v21.2d,v13.2s,v6.s[0] + fmov d12,x10 + fmov d13,x12 + + ///////////////////////////////////////////////////////////////// + // lazy reduction as discussed in "NEON crypto" by D.J. Bernstein + // and P. Schwabe + // + // [see discussion in poly1305-armv4 module] + + ushr v29.2d,v22.2d,#26 + xtn v27.2s,v22.2d + ushr v30.2d,v19.2d,#26 + and v19.16b,v19.16b,v31.16b + add v23.2d,v23.2d,v29.2d // h3 -> h4 + bic v27.2s,#0xfc,lsl#24 // &=0x03ffffff + add v20.2d,v20.2d,v30.2d // h0 -> h1 + + ushr v29.2d,v23.2d,#26 + xtn v28.2s,v23.2d + ushr v30.2d,v20.2d,#26 + xtn v25.2s,v20.2d + bic v28.2s,#0xfc,lsl#24 + add v21.2d,v21.2d,v30.2d // h1 -> h2 + + add v19.2d,v19.2d,v29.2d + shl v29.2d,v29.2d,#2 + shrn v30.2s,v21.2d,#26 + xtn v26.2s,v21.2d + add v19.2d,v19.2d,v29.2d // h4 -> h0 + bic v25.2s,#0xfc,lsl#24 + add v27.2s,v27.2s,v30.2s // h2 -> h3 + bic v26.2s,#0xfc,lsl#24 + + shrn v29.2s,v19.2d,#26 + xtn v24.2s,v19.2d + ushr v30.2s,v27.2s,#26 + bic v27.2s,#0xfc,lsl#24 + bic v24.2s,#0xfc,lsl#24 + add v25.2s,v25.2s,v29.2s // h0 -> h1 + add v28.2s,v28.2s,v30.2s // h3 -> h4 + + b.hi .Loop_neon + +.Lskip_loop: + dup v16.2d,v16.d[0] + add v11.2s,v11.2s,v26.2s + + //////////////////////////////////////////////////////////////// + // multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1 + + adds x2,x2,#32 + b.ne .Long_tail + + dup v16.2d,v11.d[0] + add v14.2s,v9.2s,v24.2s + add v17.2s,v12.2s,v27.2s + add v15.2s,v10.2s,v25.2s + add v18.2s,v13.2s,v28.2s + +.Long_tail: + dup v14.2d,v14.d[0] + umull2 v19.2d,v16.4s,v6.4s + umull2 v22.2d,v16.4s,v1.4s + umull2 v23.2d,v16.4s,v3.4s + umull2 v21.2d,v16.4s,v0.4s + umull2 v20.2d,v16.4s,v8.4s + + dup v15.2d,v15.d[0] + umlal2 v19.2d,v14.4s,v0.4s + umlal2 v21.2d,v14.4s,v3.4s + umlal2 v22.2d,v14.4s,v5.4s + umlal2 v23.2d,v14.4s,v7.4s + umlal2 v20.2d,v14.4s,v1.4s + + dup v17.2d,v17.d[0] + umlal2 v19.2d,v15.4s,v8.4s + umlal2 v22.2d,v15.4s,v3.4s + umlal2 v21.2d,v15.4s,v1.4s + umlal2 v23.2d,v15.4s,v5.4s + umlal2 v20.2d,v15.4s,v0.4s + + dup v18.2d,v18.d[0] + umlal2 v22.2d,v17.4s,v0.4s + umlal2 v23.2d,v17.4s,v1.4s + umlal2 v19.2d,v17.4s,v4.4s + umlal2 v20.2d,v17.4s,v6.4s + umlal2 v21.2d,v17.4s,v8.4s + + umlal2 v22.2d,v18.4s,v8.4s + umlal2 v19.2d,v18.4s,v2.4s + umlal2 v23.2d,v18.4s,v0.4s + umlal2 v20.2d,v18.4s,v4.4s + umlal2 v21.2d,v18.4s,v6.4s + + b.eq .Lshort_tail + + //////////////////////////////////////////////////////////////// + // (hash+inp[0:1])*r^4:r^3 and accumulate + + add v9.2s,v9.2s,v24.2s + umlal v22.2d,v11.2s,v1.2s + umlal v19.2d,v11.2s,v6.2s + umlal v23.2d,v11.2s,v3.2s + umlal v20.2d,v11.2s,v8.2s + umlal v21.2d,v11.2s,v0.2s + + add v10.2s,v10.2s,v25.2s + umlal v22.2d,v9.2s,v5.2s + umlal v19.2d,v9.2s,v0.2s + umlal v23.2d,v9.2s,v7.2s + umlal v20.2d,v9.2s,v1.2s + umlal v21.2d,v9.2s,v3.2s + + add v12.2s,v12.2s,v27.2s + umlal v22.2d,v10.2s,v3.2s + umlal v19.2d,v10.2s,v8.2s + umlal v23.2d,v10.2s,v5.2s + umlal v20.2d,v10.2s,v0.2s + umlal v21.2d,v10.2s,v1.2s + + add v13.2s,v13.2s,v28.2s + umlal v22.2d,v12.2s,v0.2s + umlal v19.2d,v12.2s,v4.2s + umlal v23.2d,v12.2s,v1.2s + umlal v20.2d,v12.2s,v6.2s + umlal v21.2d,v12.2s,v8.2s + + umlal v22.2d,v13.2s,v8.2s + umlal v19.2d,v13.2s,v2.2s + umlal v23.2d,v13.2s,v0.2s + umlal v20.2d,v13.2s,v4.2s + umlal v21.2d,v13.2s,v6.2s + +.Lshort_tail: + //////////////////////////////////////////////////////////////// + // horizontal add + + addp v22.2d,v22.2d,v22.2d + ldp d8,d9,[sp,#16] // meet ABI requirements + addp v19.2d,v19.2d,v19.2d + ldp d10,d11,[sp,#32] + addp v23.2d,v23.2d,v23.2d + ldp d12,d13,[sp,#48] + addp v20.2d,v20.2d,v20.2d + ldp d14,d15,[sp,#64] + addp v21.2d,v21.2d,v21.2d + ldr x30,[sp,#8] + .inst 0xd50323bf // autiasp + + //////////////////////////////////////////////////////////////// + // lazy reduction, but without narrowing + + ushr v29.2d,v22.2d,#26 + and v22.16b,v22.16b,v31.16b + ushr v30.2d,v19.2d,#26 + and v19.16b,v19.16b,v31.16b + + add v23.2d,v23.2d,v29.2d // h3 -> h4 + add v20.2d,v20.2d,v30.2d // h0 -> h1 + + ushr v29.2d,v23.2d,#26 + and v23.16b,v23.16b,v31.16b + ushr v30.2d,v20.2d,#26 + and v20.16b,v20.16b,v31.16b + add v21.2d,v21.2d,v30.2d // h1 -> h2 + + add v19.2d,v19.2d,v29.2d + shl v29.2d,v29.2d,#2 + ushr v30.2d,v21.2d,#26 + and v21.16b,v21.16b,v31.16b + add v19.2d,v19.2d,v29.2d // h4 -> h0 + add v22.2d,v22.2d,v30.2d // h2 -> h3 + + ushr v29.2d,v19.2d,#26 + and v19.16b,v19.16b,v31.16b + ushr v30.2d,v22.2d,#26 + and v22.16b,v22.16b,v31.16b + add v20.2d,v20.2d,v29.2d // h0 -> h1 + add v23.2d,v23.2d,v30.2d // h3 -> h4 + + //////////////////////////////////////////////////////////////// + // write the result, can be partially reduced + + st4 {v19.s,v20.s,v21.s,v22.s}[0],[x0],#16 + mov x4,#1 + st1 {v23.s}[0],[x0] + str x4,[x0,#8] // set is_base2_26 + + ldr x29,[sp],#80 + ret +.size poly1305_blocks_neon,.-poly1305_blocks_neon + +.align 5 +.Lzeros: +.long 0,0,0,0,0,0,0,0 +.asciz "Poly1305 for ARMv8, CRYPTOGAMS by @dot-asm" +.align 2 +#if !defined(__KERNEL__) && !defined(_WIN64) +.comm OPENSSL_armcap_P,4,4 +.hidden OPENSSL_armcap_P +#endif diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c new file mode 100644 index 000000000000..dd843d0ee83a --- /dev/null +++ b/arch/arm64/crypto/poly1305-glue.c @@ -0,0 +1,237 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * OpenSSL/Cryptogams accelerated Poly1305 transform for arm64 + * + * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org> + */ + +#include <asm/hwcap.h> +#include <asm/neon.h> +#include <asm/simd.h> +#include <asm/unaligned.h> +#include <crypto/algapi.h> +#include <crypto/internal/hash.h> +#include <crypto/internal/poly1305.h> +#include <crypto/internal/simd.h> +#include <linux/cpufeature.h> +#include <linux/crypto.h> +#include <linux/jump_label.h> +#include <linux/module.h> + +asmlinkage void poly1305_init_arm64(void *state, const u8 *key); +asmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit); +asmlinkage void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit); +asmlinkage void poly1305_emit(void *state, __le32 *digest, const u32 *nonce); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); + +void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) +{ + poly1305_init_arm64(&dctx->h, key); + dctx->s[0] = get_unaligned_le32(key + 16); + dctx->s[1] = get_unaligned_le32(key + 20); + dctx->s[2] = get_unaligned_le32(key + 24); + dctx->s[3] = get_unaligned_le32(key + 28); + dctx->buflen = 0; +} +EXPORT_SYMBOL(poly1305_init_arch); + +static int neon_poly1305_init(struct shash_desc *desc) +{ + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); + + dctx->buflen = 0; + dctx->rset = 0; + dctx->sset = false; + + return 0; +} + +static void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, + u32 len, u32 hibit, bool do_neon) +{ + if (unlikely(!dctx->sset)) { + if (!dctx->rset) { + poly1305_init_arch(dctx, src); + src += POLY1305_BLOCK_SIZE; + len -= POLY1305_BLOCK_SIZE; + dctx->rset = 1; + } + if (len >= POLY1305_BLOCK_SIZE) { + dctx->s[0] = get_unaligned_le32(src + 0); + dctx->s[1] = get_unaligned_le32(src + 4); + dctx->s[2] = get_unaligned_le32(src + 8); + dctx->s[3] = get_unaligned_le32(src + 12); + src += POLY1305_BLOCK_SIZE; + len -= POLY1305_BLOCK_SIZE; + dctx->sset = true; + } + if (len < POLY1305_BLOCK_SIZE) + return; + } + + len &= ~(POLY1305_BLOCK_SIZE - 1); + + if (static_branch_likely(&have_neon) && likely(do_neon)) + poly1305_blocks_neon(&dctx->h, src, len, hibit); + else + poly1305_blocks(&dctx->h, src, len, hibit); +} + +static void neon_poly1305_do_update(struct poly1305_desc_ctx *dctx, + const u8 *src, u32 len, bool do_neon) +{ + if (unlikely(dctx->buflen)) { + u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen); + + memcpy(dctx->buf + dctx->buflen, src, bytes); + src += bytes; + len -= bytes; + dctx->buflen += bytes; + + if (dctx->buflen == POLY1305_BLOCK_SIZE) { + neon_poly1305_blocks(dctx, dctx->buf, + POLY1305_BLOCK_SIZE, 1, false); + dctx->buflen = 0; + } + } + + if (likely(len >= POLY1305_BLOCK_SIZE)) { + neon_poly1305_blocks(dctx, src, len, 1, do_neon); + src += round_down(len, POLY1305_BLOCK_SIZE); + len %= POLY1305_BLOCK_SIZE; + } + + if (unlikely(len)) { + dctx->buflen = len; + memcpy(dctx->buf, src, len); + } +} + +static int neon_poly1305_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen) +{ + bool do_neon = crypto_simd_usable() && srclen > 128; + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); + + if (static_branch_likely(&have_neon) && do_neon) + kernel_neon_begin(); + neon_poly1305_do_update(dctx, src, srclen, do_neon); + if (static_branch_likely(&have_neon) && do_neon) + kernel_neon_end(); + return 0; +} + +void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, + unsigned int nbytes) +{ + if (unlikely(dctx->buflen)) { + u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen); + + memcpy(dctx->buf + dctx->buflen, src, bytes); + src += bytes; + nbytes -= bytes; + dctx->buflen += bytes; + + if (dctx->buflen == POLY1305_BLOCK_SIZE) { + poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1); + dctx->buflen = 0; + } + } + + if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { + unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); + + if (static_branch_likely(&have_neon) && crypto_simd_usable()) { + kernel_neon_begin(); + poly1305_blocks_neon(&dctx->h, src, len, 1); + kernel_neon_end(); + } else { + poly1305_blocks(&dctx->h, src, len, 1); + } + src += len; + nbytes %= POLY1305_BLOCK_SIZE; + } + + if (unlikely(nbytes)) { + dctx->buflen = nbytes; + memcpy(dctx->buf, src, nbytes); + } +} +EXPORT_SYMBOL(poly1305_update_arch); + +void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) +{ + __le32 digest[4]; + u64 f = 0; + + if (unlikely(dctx->buflen)) { + dctx->buf[dctx->buflen++] = 1; + memset(dctx->buf + dctx->buflen, 0, + POLY1305_BLOCK_SIZE - dctx->buflen); + poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); + } + + poly1305_emit(&dctx->h, digest, dctx->s); + + /* mac = (h + s) % (2^128) */ + f = (f >> 32) + le32_to_cpu(digest[0]); + put_unaligned_le32(f, dst); + f = (f >> 32) + le32_to_cpu(digest[1]); + put_unaligned_le32(f, dst + 4); + f = (f >> 32) + le32_to_cpu(digest[2]); + put_unaligned_le32(f, dst + 8); + f = (f >> 32) + le32_to_cpu(digest[3]); + put_unaligned_le32(f, dst + 12); + + *dctx = (struct poly1305_desc_ctx){}; +} +EXPORT_SYMBOL(poly1305_final_arch); + +static int neon_poly1305_final(struct shash_desc *desc, u8 *dst) +{ + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); + + if (unlikely(!dctx->sset)) + return -ENOKEY; + + poly1305_final_arch(dctx, dst); + return 0; +} + +static struct shash_alg neon_poly1305_alg = { + .init = neon_poly1305_init, + .update = neon_poly1305_update, + .final = neon_poly1305_final, + .digestsize = POLY1305_DIGEST_SIZE, + .descsize = sizeof(struct poly1305_desc_ctx), + + .base.cra_name = "poly1305", + .base.cra_driver_name = "poly1305-neon", + .base.cra_priority = 200, + .base.cra_blocksize = POLY1305_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, +}; + +static int __init neon_poly1305_mod_init(void) +{ + if (!cpu_have_named_feature(ASIMD)) + return 0; + + static_branch_enable(&have_neon); + + return crypto_register_shash(&neon_poly1305_alg); +} + +static void __exit neon_poly1305_mod_exit(void) +{ + if (cpu_have_named_feature(ASIMD)) + crypto_unregister_shash(&neon_poly1305_alg); +} + +module_init(neon_poly1305_mod_init); +module_exit(neon_poly1305_mod_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("poly1305"); +MODULE_ALIAS_CRYPTO("poly1305-neon"); diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h index f74909ba29bd..f68a0e64482a 100644 --- a/arch/arm64/include/asm/asm-uaccess.h +++ b/arch/arm64/include/asm/asm-uaccess.h @@ -58,30 +58,4 @@ alternative_else_nop_endif .endm #endif -/* - * These macros are no-ops when UAO is present. - */ - .macro uaccess_disable_not_uao, tmp1, tmp2 - uaccess_ttbr0_disable \tmp1, \tmp2 -alternative_if ARM64_ALT_PAN_NOT_UAO - SET_PSTATE_PAN(1) -alternative_else_nop_endif - .endm - - .macro uaccess_enable_not_uao, tmp1, tmp2, tmp3 - uaccess_ttbr0_enable \tmp1, \tmp2, \tmp3 -alternative_if ARM64_ALT_PAN_NOT_UAO - SET_PSTATE_PAN(0) -alternative_else_nop_endif - .endm - -/* - * Remove the address tag from a virtual address, if present. - */ - .macro clear_address_tag, dst, addr - tst \addr, #(1 << 55) - bic \dst, \addr, #(0xff << 56) - csel \dst, \dst, \addr, eq - .endm - #endif diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index c6bd87d2915b..574808b9df4c 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -321,7 +321,8 @@ static inline s64 __lse_atomic64_dec_if_positive(atomic64_t *v) } #define __CMPXCHG_CASE(w, sfx, name, sz, mb, cl...) \ -static inline u##sz __lse__cmpxchg_case_##name##sz(volatile void *ptr, \ +static __always_inline u##sz \ +__lse__cmpxchg_case_##name##sz(volatile void *ptr, \ u##sz old, \ u##sz new) \ { \ @@ -362,7 +363,8 @@ __CMPXCHG_CASE(x, , mb_, 64, al, "memory") #undef __CMPXCHG_CASE #define __CMPXCHG_DBL(name, mb, cl...) \ -static inline long __lse__cmpxchg_double##name(unsigned long old1, \ +static __always_inline long \ +__lse__cmpxchg_double##name(unsigned long old1, \ unsigned long old2, \ unsigned long new1, \ unsigned long new2, \ diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index e0e2b1946f42..7d9cc5ec4971 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -29,6 +29,18 @@ SB_BARRIER_INSN"nop\n", \ ARM64_HAS_SB)) +#ifdef CONFIG_ARM64_PSEUDO_NMI +#define pmr_sync() \ + do { \ + extern struct static_key_false gic_pmr_sync; \ + \ + if (static_branch_unlikely(&gic_pmr_sync)) \ + dsb(sy); \ + } while(0) +#else +#define pmr_sync() do {} while (0) +#endif + #define mb() dsb(sy) #define rmb() dsb(ld) #define wmb() dsb(st) diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 43da6dd29592..806e9dc2a852 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -11,6 +11,7 @@ #define CTR_L1IP_MASK 3 #define CTR_DMINLINE_SHIFT 16 #define CTR_IMINLINE_SHIFT 0 +#define CTR_IMINLINE_MASK 0xf #define CTR_ERG_SHIFT 20 #define CTR_CWG_SHIFT 24 #define CTR_CWG_MASK 15 @@ -18,7 +19,7 @@ #define CTR_DIC_SHIFT 29 #define CTR_CACHE_MINLINE_MASK \ - (0xf << CTR_DMINLINE_SHIFT | 0xf << CTR_IMINLINE_SHIFT) + (0xf << CTR_DMINLINE_SHIFT | CTR_IMINLINE_MASK << CTR_IMINLINE_SHIFT) #define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK) diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index f19fe4b9acc4..b92683871119 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -52,7 +52,11 @@ #define ARM64_HAS_IRQ_PRIO_MASKING 42 #define ARM64_HAS_DCPODP 43 #define ARM64_WORKAROUND_1463225 44 +#define ARM64_WORKAROUND_CAVIUM_TX2_219_TVM 45 +#define ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM 46 +#define ARM64_WORKAROUND_1542419 47 +#define ARM64_WORKAROUND_1319367 48 -#define ARM64_NCAPS 45 +#define ARM64_NCAPS 49 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 9cde5d2e768f..4261d55e8506 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -659,6 +659,20 @@ static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange) default: return CONFIG_ARM64_PA_BITS; } } + +/* Check whether hardware update of the Access flag is supported */ +static inline bool cpu_has_hw_af(void) +{ + u64 mmfr1; + + if (!IS_ENABLED(CONFIG_ARM64_HW_AFDBM)) + return false; + + mmfr1 = read_cpuid(ID_AA64MMFR1_EL1); + return cpuid_feature_extract_unsigned_field(mmfr1, + ID_AA64MMFR1_HADBS_SHIFT); +} + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index b1454d117cd2..aca07c2f6e6e 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -79,6 +79,7 @@ #define CAVIUM_CPU_PART_THUNDERX_83XX 0x0A3 #define CAVIUM_CPU_PART_THUNDERX2 0x0AF +#define BRCM_CPU_PART_BRAHMA_B53 0x100 #define BRCM_CPU_PART_VULCAN 0x516 #define QCOM_CPU_PART_FALKOR_V1 0x800 @@ -105,6 +106,7 @@ #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) #define MIDR_CAVIUM_THUNDERX2 MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX2) +#define MIDR_BRAHMA_B53 MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_BRAHMA_B53) #define MIDR_BRCM_VULCAN MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_VULCAN) #define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1) #define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR) diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 063c964af705..72acd2db167f 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -8,7 +8,9 @@ #include <linux/irqflags.h> #include <asm/arch_gicv3.h> +#include <asm/barrier.h> #include <asm/cpufeature.h> +#include <asm/ptrace.h> #define DAIF_PROCCTX 0 #define DAIF_PROCCTX_NOIRQ PSR_I_BIT @@ -65,7 +67,7 @@ static inline void local_daif_restore(unsigned long flags) if (system_uses_irq_prio_masking()) { gic_write_pmr(GIC_PRIO_IRQON); - dsb(sy); + pmr_sync(); } } else if (system_uses_irq_prio_masking()) { u64 pmr; @@ -109,4 +111,19 @@ static inline void local_daif_restore(unsigned long flags) trace_hardirqs_off(); } +/* + * Called by synchronous exception handlers to restore the DAIF bits that were + * modified by taking an exception. + */ +static inline void local_daif_inherit(struct pt_regs *regs) +{ + unsigned long flags = regs->pstate & DAIF_MASK; + + /* + * We can't use local_daif_restore(regs->pstate) here as + * system_has_prio_mask_debugging() won't restore the I bit if it can + * use the pmr instead. + */ + write_sysreg(flags, daif); +} #endif diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index a17393ff6677..4d5f3b5f50cd 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -8,14 +8,15 @@ #define __ASM_EXCEPTION_H #include <asm/esr.h> +#include <asm/kprobes.h> +#include <asm/ptrace.h> #include <linux/interrupt.h> -#define __exception __attribute__((section(".exception.text"))) #ifdef CONFIG_FUNCTION_GRAPH_TRACER #define __exception_irq_entry __irq_entry #else -#define __exception_irq_entry __exception +#define __exception_irq_entry __kprobes #endif static inline u32 disr_to_esr(u64 disr) @@ -31,5 +32,22 @@ static inline u32 disr_to_esr(u64 disr) } asmlinkage void enter_from_user_mode(void); +void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); +void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); +void do_undefinstr(struct pt_regs *regs); +asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr); +void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr, + struct pt_regs *regs); +void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs); +void do_sve_acc(unsigned int esr, struct pt_regs *regs); +void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs); +void do_sysinstr(unsigned int esr, struct pt_regs *regs); +void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); +void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr); +void do_cp15instr(unsigned int esr, struct pt_regs *regs); +void el0_svc_handler(struct pt_regs *regs); +void el0_svc_compat_handler(struct pt_regs *regs); +void do_el0_ia_bp_hardening(unsigned long addr, unsigned int esr, + struct pt_regs *regs); #endif /* __ASM_EXCEPTION_H */ diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index d48667b04c41..91fa4baa1a93 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -11,9 +11,20 @@ #include <asm/insn.h> #define HAVE_FUNCTION_GRAPH_FP_TEST + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +#define ARCH_SUPPORTS_FTRACE_OPS 1 +#else #define MCOUNT_ADDR ((unsigned long)_mcount) +#endif + +/* The BL at the callsite's adjusted rec->ip */ #define MCOUNT_INSN_SIZE AARCH64_INSN_SIZE +#define FTRACE_PLT_IDX 0 +#define FTRACE_REGS_PLT_IDX 1 +#define NR_FTRACE_PLTS 2 + /* * Currently, gcc tends to save the link register after the local variables * on the stack. This causes the max stack tracer to report the function @@ -44,12 +55,24 @@ extern void return_to_handler(void); static inline unsigned long ftrace_call_adjust(unsigned long addr) { /* + * Adjust addr to point at the BL in the callsite. + * See ftrace_init_nop() for the callsite sequence. + */ + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) + return addr + AARCH64_INSN_SIZE; + /* * addr is the address of the mcount call instruction. * recordmcount does the necessary offset calculation. */ return addr; } +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +struct dyn_ftrace; +int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); +#define ftrace_init_nop ftrace_init_nop +#endif + #define ftrace_return_address(n) return_address(n) /* diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 39e7780bedd6..bb313dde58a4 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -440,6 +440,9 @@ u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst, int shift, enum aarch64_insn_variant variant, enum aarch64_insn_logic_type type); +u32 aarch64_insn_gen_move_reg(enum aarch64_insn_register dst, + enum aarch64_insn_register src, + enum aarch64_insn_variant variant); u32 aarch64_insn_gen_logical_immediate(enum aarch64_insn_logic_type type, enum aarch64_insn_variant variant, enum aarch64_insn_register Rn, diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index 1a59f0ed1ae3..aa4b6521ef14 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -6,6 +6,7 @@ #define __ASM_IRQFLAGS_H #include <asm/alternative.h> +#include <asm/barrier.h> #include <asm/ptrace.h> #include <asm/sysreg.h> @@ -34,14 +35,14 @@ static inline void arch_local_irq_enable(void) } asm volatile(ALTERNATIVE( - "msr daifclr, #2 // arch_local_irq_enable\n" - "nop", - __msr_s(SYS_ICC_PMR_EL1, "%0") - "dsb sy", + "msr daifclr, #2 // arch_local_irq_enable", + __msr_s(SYS_ICC_PMR_EL1, "%0"), ARM64_HAS_IRQ_PRIO_MASKING) : : "r" ((unsigned long) GIC_PRIO_IRQON) : "memory"); + + pmr_sync(); } static inline void arch_local_irq_disable(void) @@ -116,14 +117,14 @@ static inline unsigned long arch_local_irq_save(void) static inline void arch_local_irq_restore(unsigned long flags) { asm volatile(ALTERNATIVE( - "msr daif, %0\n" - "nop", - __msr_s(SYS_ICC_PMR_EL1, "%0") - "dsb sy", - ARM64_HAS_IRQ_PRIO_MASKING) + "msr daif, %0", + __msr_s(SYS_ICC_PMR_EL1, "%0"), + ARM64_HAS_IRQ_PRIO_MASKING) : : "r" (flags) : "memory"); + + pmr_sync(); } #endif /* __ASM_IRQFLAGS_H */ diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index ddf9d762ac62..6e5d839f42b5 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -61,7 +61,6 @@ * RW: 64bit by default, can be overridden for 32bit VMs * TAC: Trap ACTLR * TSC: Trap SMC - * TVM: Trap VM ops (until M+C set in SCTLR_EL1) * TSW: Trap cache operations by set/way * TWE: Trap WFE * TWI: Trap WFI @@ -74,7 +73,7 @@ * SWIO: Turn set/way invalidates into set/way clean+invalidate */ #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \ - HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \ + HCR_BSU_IS | HCR_FB | HCR_TAC | \ HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \ HCR_FMO | HCR_IMO) #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index d69c1efc63e7..5efe5ca8fecf 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -53,8 +53,18 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) /* trap error record accesses */ vcpu->arch.hcr_el2 |= HCR_TERR; } - if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) + + if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) { vcpu->arch.hcr_el2 |= HCR_FWB; + } else { + /* + * For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C + * get set in SCTLR_EL1 such that we can detect when the guest + * MMU gets turned on and do the necessary cache maintenance + * then. + */ + vcpu->arch.hcr_el2 |= HCR_TVM; + } if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) vcpu->arch.hcr_el2 &= ~HCR_RW; @@ -77,14 +87,19 @@ static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu) return (unsigned long *)&vcpu->arch.hcr_el2; } -static inline void vcpu_clear_wfe_traps(struct kvm_vcpu *vcpu) +static inline void vcpu_clear_wfx_traps(struct kvm_vcpu *vcpu) { vcpu->arch.hcr_el2 &= ~HCR_TWE; + if (atomic_read(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count)) + vcpu->arch.hcr_el2 &= ~HCR_TWI; + else + vcpu->arch.hcr_el2 |= HCR_TWI; } -static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu) +static inline void vcpu_set_wfx_traps(struct kvm_vcpu *vcpu) { vcpu->arch.hcr_el2 |= HCR_TWE; + vcpu->arch.hcr_el2 |= HCR_TWI; } static inline void vcpu_ptrauth_enable(struct kvm_vcpu *vcpu) @@ -258,6 +273,11 @@ static inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu) return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_ISV); } +static inline unsigned long kvm_vcpu_dabt_iss_nisv_sanitized(const struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_get_hsr(vcpu) & (ESR_ELx_CM | ESR_ELx_WNR | ESR_ELx_FSC); +} + static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu) { return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SSE); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f656169db8c3..c61260cf63c5 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -44,6 +44,7 @@ KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1) #define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2) +#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3) DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); @@ -83,6 +84,14 @@ struct kvm_arch { /* Mandated version of PSCI */ u32 psci_version; + + /* + * If we encounter a data abort without valid instruction syndrome + * information, report this to user space. User space can (and + * should) opt in to this feature if KVM_CAP_ARM_NISV_TO_USER is + * supported. + */ + bool return_nisv_io_abort_to_user; }; #define KVM_NR_MEM_OBJS 40 @@ -338,6 +347,13 @@ struct kvm_vcpu_arch { /* True when deferrable sysregs are loaded on the physical CPU, * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */ bool sysregs_loaded_on_cpu; + + /* Guest PV state */ + struct { + u64 steal; + u64 last_steal; + gpa_t base; + } steal; }; /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */ @@ -478,6 +494,27 @@ void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run, int kvm_perf_init(void); int kvm_perf_teardown(void); +long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu); +gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu); +void kvm_update_stolen_time(struct kvm_vcpu *vcpu); + +int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr); +int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr); +int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr); + +static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch) +{ + vcpu_arch->steal.base = GPA_INVALID; +} + +static inline bool kvm_arm_is_pvtime_enabled(struct kvm_vcpu_arch *vcpu_arch) +{ + return (vcpu_arch->steal.base != GPA_INVALID); +} + void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome); struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); @@ -600,8 +637,7 @@ static inline void kvm_arm_vhe_guest_enter(void) * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU. */ - if (system_uses_irq_prio_masking()) - dsb(sy); + pmr_sync(); } static inline void kvm_arm_vhe_guest_exit(void) diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 86825aa20852..97f21cc66657 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -47,30 +47,6 @@ #define read_sysreg_el2(r) read_sysreg_elx(r, _EL2, _EL1) #define write_sysreg_el2(v,r) write_sysreg_elx(v, r, _EL2, _EL1) -/** - * hyp_alternate_select - Generates patchable code sequences that are - * used to switch between two implementations of a function, depending - * on the availability of a feature. - * - * @fname: a symbol name that will be defined as a function returning a - * function pointer whose type will match @orig and @alt - * @orig: A pointer to the default function, as returned by @fname when - * @cond doesn't hold - * @alt: A pointer to the alternate function, as returned by @fname - * when @cond holds - * @cond: a CPU feature (as described in asm/cpufeature.h) - */ -#define hyp_alternate_select(fname, orig, alt, cond) \ -typeof(orig) * __hyp_text fname(void) \ -{ \ - typeof(alt) *val = orig; \ - asm volatile(ALTERNATIVE("nop \n", \ - "mov %0, %1 \n", \ - cond) \ - : "+r" (val) : "r" (alt)); \ - return val; \ -} - int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu); void __vgic_v3_save_state(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index b61b50bf68b1..a4f9ca5479b0 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -69,12 +69,6 @@ #define KERNEL_START _text #define KERNEL_END _end -#ifdef CONFIG_ARM64_VA_BITS_52 -#define MAX_USER_VA_BITS 52 -#else -#define MAX_USER_VA_BITS VA_BITS -#endif - /* * Generic and tag-based KASAN require 1/8th and 1/16th of the kernel virtual * address space for the shadow region respectively. They can bloat the stack @@ -215,12 +209,18 @@ static inline unsigned long kaslr_offset(void) * up with a tagged userland pointer. Clear the tag to get a sane pointer to * pass on to access_ok(), for instance. */ -#define untagged_addr(addr) \ +#define __untagged_addr(addr) \ ((__force __typeof__(addr))sign_extend64((__force u64)(addr), 55)) +#define untagged_addr(addr) ({ \ + u64 __addr = (__force u64)addr; \ + __addr &= __untagged_addr(__addr); \ + (__force __typeof__(addr))__addr; \ +}) + #ifdef CONFIG_KASAN_SW_TAGS #define __tag_shifted(tag) ((u64)(tag) << 56) -#define __tag_reset(addr) untagged_addr(addr) +#define __tag_reset(addr) __untagged_addr(addr) #define __tag_get(addr) (__u8)((u64)(addr) >> 56) #else #define __tag_shifted(tag) 0UL diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index f80e13cbf8ec..1e93de68c044 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -21,7 +21,7 @@ struct mod_arch_specific { struct mod_plt_sec init; /* for CONFIG_DYNAMIC_FTRACE */ - struct plt_entry *ftrace_trampoline; + struct plt_entry *ftrace_trampolines; }; #endif diff --git a/arch/arm64/include/asm/paravirt.h b/arch/arm64/include/asm/paravirt.h index 799d9dd6f7cc..cf3a0fd7c1a7 100644 --- a/arch/arm64/include/asm/paravirt.h +++ b/arch/arm64/include/asm/paravirt.h @@ -21,6 +21,13 @@ static inline u64 paravirt_steal_clock(int cpu) { return pv_ops.time.steal_clock(cpu); } -#endif + +int __init pv_time_init(void); + +#else + +#define pv_time_init() do {} while (0) + +#endif // CONFIG_PARAVIRT #endif diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 3df60f97da1f..d9fbd433cc17 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -69,7 +69,7 @@ #define PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS) #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) -#define PTRS_PER_PGD (1 << (MAX_USER_VA_BITS - PGDIR_SHIFT)) +#define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT)) /* * Section address mask and size definitions. diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 9a21b84536f2..8dc6c5cdabe6 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -32,11 +32,11 @@ #define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG) #define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG) -#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) -#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE)) -#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC)) -#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT)) -#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL)) +#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) +#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE)) +#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC)) +#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT)) +#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL)) #define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE)) #define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) @@ -80,8 +80,9 @@ #define PAGE_S2_DEVICE __pgprot(_PROT_DEFAULT | PAGE_S2_MEMATTR(DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_S2_XN) #define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) -#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) -#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE) +/* shared+writable pages are clean by default, hence PTE_RDONLY|PTE_WRITE */ +#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) +#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE) #define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN) #define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 7576df00eb50..5d15b4735a0e 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -17,7 +17,7 @@ * VMALLOC range. * * VMALLOC_START: beginning of the kernel vmalloc space - * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space + * VMALLOC_END: extends to the available space below vmemmap, PCI I/O space * and fixed mappings */ #define VMALLOC_START (MODULES_END) @@ -283,23 +283,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, set_pte(ptep, pte); } -#define __HAVE_ARCH_PTE_SAME -static inline int pte_same(pte_t pte_a, pte_t pte_b) -{ - pteval_t lhs, rhs; - - lhs = pte_val(pte_a); - rhs = pte_val(pte_b); - - if (pte_present(pte_a)) - lhs &= ~PTE_RDONLY; - - if (pte_present(pte_b)) - rhs &= ~PTE_RDONLY; - - return (lhs == rhs); -} - /* * Huge pte definitions. */ @@ -876,15 +859,26 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, #define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) -#define kc_vaddr_to_offset(v) ((v) & ~PAGE_END) -#define kc_offset_to_vaddr(o) ((o) | PAGE_END) - #ifdef CONFIG_ARM64_PA_BITS_52 #define phys_to_ttbr(addr) (((addr) | ((addr) >> 46)) & TTBR_BADDR_MASK_52) #else #define phys_to_ttbr(addr) (addr) #endif +/* + * On arm64 without hardware Access Flag, copying from user will fail because + * the pte is old and cannot be marked young. So we always end up with zeroed + * page after fork() + CoW for pfn mappings. We don't always have a + * hardware-managed access flag on arm64. + */ +static inline bool arch_faults_on_old_pte(void) +{ + WARN_ON(preemptible()); + + return !cpu_has_hw_af(); +} +#define arch_faults_on_old_pte arch_faults_on_old_pte + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_PGTABLE_H */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 5623685c7d13..5ba63204d078 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -9,7 +9,7 @@ #define __ASM_PROCESSOR_H #define KERNEL_DS UL(-1) -#define USER_DS ((UL(1) << MAX_USER_VA_BITS) - 1) +#define USER_DS ((UL(1) << VA_BITS) - 1) /* * On arm64 systems, unaligned accesses by the CPU are cheap, and so there is @@ -26,10 +26,12 @@ #include <linux/init.h> #include <linux/stddef.h> #include <linux/string.h> +#include <linux/thread_info.h> #include <asm/alternative.h> #include <asm/cpufeature.h> #include <asm/hw_breakpoint.h> +#include <asm/kasan.h> #include <asm/lse.h> #include <asm/pgtable-hwdef.h> #include <asm/pointer_auth.h> @@ -214,6 +216,18 @@ static inline void start_thread(struct pt_regs *regs, unsigned long pc, regs->sp = sp; } +static inline bool is_ttbr0_addr(unsigned long addr) +{ + /* entry assembly clears tags for TTBR0 addrs */ + return addr < TASK_SIZE; +} + +static inline bool is_ttbr1_addr(unsigned long addr) +{ + /* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */ + return arch_kasan_reset_tag(addr) >= PAGE_OFFSET; +} + #ifdef CONFIG_COMPAT static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp) diff --git a/arch/arm64/include/asm/pvclock-abi.h b/arch/arm64/include/asm/pvclock-abi.h new file mode 100644 index 000000000000..c4f1c0a0789c --- /dev/null +++ b/arch/arm64/include/asm/pvclock-abi.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2019 Arm Ltd. */ + +#ifndef __ASM_PVCLOCK_ABI_H +#define __ASM_PVCLOCK_ABI_H + +/* The below structure is defined in ARM DEN0057A */ + +struct pvclock_vcpu_stolen_time { + __le32 revision; + __le32 attributes; + __le64 stolen_time; + /* Structure must be 64 byte aligned, pad to that size */ + u8 padding[48]; +} __packed; + +#endif diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h index 06d880b3526c..b383b4802a7b 100644 --- a/arch/arm64/include/asm/syscall_wrapper.h +++ b/arch/arm64/include/asm/syscall_wrapper.h @@ -66,24 +66,18 @@ struct pt_regs; } \ static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) -#ifndef SYSCALL_DEFINE0 #define SYSCALL_DEFINE0(sname) \ SYSCALL_METADATA(_##sname, 0); \ asmlinkage long __arm64_sys_##sname(const struct pt_regs *__unused); \ ALLOW_ERROR_INJECTION(__arm64_sys_##sname, ERRNO); \ asmlinkage long __arm64_sys_##sname(const struct pt_regs *__unused) -#endif -#ifndef COND_SYSCALL #define COND_SYSCALL(name) \ asmlinkage long __weak __arm64_sys_##name(const struct pt_regs *regs) \ { \ return sys_ni_syscall(); \ } -#endif -#ifndef SYS_NI #define SYS_NI(name) SYSCALL_ALIAS(__arm64_sys_##name, sys_ni_posix_timers); -#endif #endif /* __ASM_SYSCALL_WRAPPER_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 972d196c7714..6e919fafb43d 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -212,7 +212,7 @@ #define SYS_FAR_EL1 sys_reg(3, 0, 6, 0, 0) #define SYS_PAR_EL1 sys_reg(3, 0, 7, 4, 0) -#define SYS_PAR_EL1_F BIT(1) +#define SYS_PAR_EL1_F BIT(0) #define SYS_PAR_EL1_FST GENMASK(6, 1) /*** Statistical Profiling Extension ***/ diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index 59690613ac31..cee5928e1b7d 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -42,16 +42,6 @@ static inline int __in_irqentry_text(unsigned long ptr) ptr < (unsigned long)&__irqentry_text_end; } -static inline int in_exception_text(unsigned long ptr) -{ - int in; - - in = ptr >= (unsigned long)&__exception_text_start && - ptr < (unsigned long)&__exception_text_end; - - return in ? : __in_irqentry_text(ptr); -} - static inline int in_entry_text(unsigned long ptr) { return ptr >= (unsigned long)&__entry_text_start && diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 097d6bfac0b7..127712b0b970 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -378,20 +378,34 @@ do { \ extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n); #define raw_copy_from_user(to, from, n) \ ({ \ - __arch_copy_from_user((to), __uaccess_mask_ptr(from), (n)); \ + unsigned long __acfu_ret; \ + uaccess_enable_not_uao(); \ + __acfu_ret = __arch_copy_from_user((to), \ + __uaccess_mask_ptr(from), (n)); \ + uaccess_disable_not_uao(); \ + __acfu_ret; \ }) extern unsigned long __must_check __arch_copy_to_user(void __user *to, const void *from, unsigned long n); #define raw_copy_to_user(to, from, n) \ ({ \ - __arch_copy_to_user(__uaccess_mask_ptr(to), (from), (n)); \ + unsigned long __actu_ret; \ + uaccess_enable_not_uao(); \ + __actu_ret = __arch_copy_to_user(__uaccess_mask_ptr(to), \ + (from), (n)); \ + uaccess_disable_not_uao(); \ + __actu_ret; \ }) extern unsigned long __must_check __arch_copy_in_user(void __user *to, const void __user *from, unsigned long n); #define raw_copy_in_user(to, from, n) \ ({ \ - __arch_copy_in_user(__uaccess_mask_ptr(to), \ - __uaccess_mask_ptr(from), (n)); \ + unsigned long __aciu_ret; \ + uaccess_enable_not_uao(); \ + __aciu_ret = __arch_copy_in_user(__uaccess_mask_ptr(to), \ + __uaccess_mask_ptr(from), (n)); \ + uaccess_disable_not_uao(); \ + __aciu_ret; \ }) #define INLINE_COPY_TO_USER @@ -400,8 +414,11 @@ extern unsigned long __must_check __arch_copy_in_user(void __user *to, const voi extern unsigned long __must_check __arch_clear_user(void __user *to, unsigned long n); static inline unsigned long __must_check __clear_user(void __user *to, unsigned long n) { - if (access_ok(to, n)) + if (access_ok(to, n)) { + uaccess_enable_not_uao(); n = __arch_clear_user(__uaccess_mask_ptr(to), n); + uaccess_disable_not_uao(); + } return n; } #define clear_user __clear_user diff --git a/arch/arm64/include/asm/vdso/compat_barrier.h b/arch/arm64/include/asm/vdso/compat_barrier.h index fb60a88b5ed4..3fd8fd6d8fc2 100644 --- a/arch/arm64/include/asm/vdso/compat_barrier.h +++ b/arch/arm64/include/asm/vdso/compat_barrier.h @@ -20,7 +20,7 @@ #define dmb(option) __asm__ __volatile__ ("dmb " #option : : : "memory") -#if __LINUX_ARM_ARCH__ >= 8 +#if __LINUX_ARM_ARCH__ >= 8 && defined(CONFIG_AS_DMB_ISHLD) #define aarch32_smp_mb() dmb(ish) #define aarch32_smp_rmb() dmb(ishld) #define aarch32_smp_wmb() dmb(ishst) diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h index 0c731bfc7c8c..0c20a7c1bee5 100644 --- a/arch/arm64/include/asm/vdso/vsyscall.h +++ b/arch/arm64/include/asm/vdso/vsyscall.h @@ -31,13 +31,6 @@ int __arm64_get_clock_mode(struct timekeeper *tk) #define __arch_get_clock_mode __arm64_get_clock_mode static __always_inline -int __arm64_use_vsyscall(struct vdso_data *vdata) -{ - return !vdata[CS_HRES_COARSE].clock_mode; -} -#define __arch_use_vsyscall __arm64_use_vsyscall - -static __always_inline void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk) { vdata[CS_HRES_COARSE].mask = VDSO_PRECISION_MASK; diff --git a/arch/arm64/include/asm/vdso_datapage.h b/arch/arm64/include/asm/vdso_datapage.h deleted file mode 100644 index 1f38bf330a6e..000000000000 --- a/arch/arm64/include/asm/vdso_datapage.h +++ /dev/null @@ -1,33 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2012 ARM Limited - */ -#ifndef __ASM_VDSO_DATAPAGE_H -#define __ASM_VDSO_DATAPAGE_H - -#ifndef __ASSEMBLY__ - -struct vdso_data { - __u64 cs_cycle_last; /* Timebase at clocksource init */ - __u64 raw_time_sec; /* Raw time */ - __u64 raw_time_nsec; - __u64 xtime_clock_sec; /* Kernel time */ - __u64 xtime_clock_nsec; - __u64 xtime_coarse_sec; /* Coarse time */ - __u64 xtime_coarse_nsec; - __u64 wtm_clock_sec; /* Wall to monotonic time */ - __u64 wtm_clock_nsec; - __u32 tb_seq_count; /* Timebase sequence counter */ - /* cs_* members must be adjacent and in this order (ldp accesses) */ - __u32 cs_mono_mult; /* NTP-adjusted clocksource multiplier */ - __u32 cs_shift; /* Clocksource shift (mono = raw) */ - __u32 cs_raw_mult; /* Raw clocksource multiplier */ - __u32 tz_minuteswest; /* Whacky timezone stuff */ - __u32 tz_dsttime; - __u32 use_syscall; - __u32 hrtimer_res; -}; - -#endif /* !__ASSEMBLY__ */ - -#endif /* __ASM_VDSO_DATAPAGE_H */ diff --git a/arch/arm64/include/asm/xen/xen-ops.h b/arch/arm64/include/asm/xen/xen-ops.h deleted file mode 100644 index e6e784051932..000000000000 --- a/arch/arm64/include/asm/xen/xen-ops.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_XEN_OPS_H -#define _ASM_XEN_OPS_H - -void xen_efi_runtime_setup(void); - -#endif /* _ASM_XEN_OPS_H */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 67c21f9bdbad..820e5751ada7 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -164,8 +164,9 @@ struct kvm_vcpu_events { struct { __u8 serror_pending; __u8 serror_has_esr; + __u8 ext_dabt_pending; /* Align it to 8 bytes */ - __u8 pad[6]; + __u8 pad[5]; __u64 serror_esr; } exception; __u32 reserved[12]; @@ -323,6 +324,8 @@ struct kvm_vcpu_events { #define KVM_ARM_VCPU_TIMER_CTRL 1 #define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 +#define KVM_ARM_VCPU_PVTIME_CTRL 2 +#define KVM_ARM_VCPU_PVTIME_IPA 0 /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_VCPU2_SHIFT 28 diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 478491f07b4f..fc6488660f64 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -13,9 +13,9 @@ CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE) # Object file lists. obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ - entry-fpsimd.o process.o ptrace.o setup.o signal.o \ - sys.o stacktrace.o time.o traps.o io.o vdso.o \ - hyp-stub.o psci.o cpu_ops.o insn.o \ + entry-common.o entry-fpsimd.o process.o ptrace.o \ + setup.o signal.o sys.o stacktrace.o time.o traps.o \ + io.o vdso.o hyp-stub.o psci.o cpu_ops.o insn.o \ return_address.o cpuinfo.o cpu_errata.o \ cpufeature.o alternative.o cacheinfo.o \ smp.o smp_spin_table.o topology.o smccc-call.o \ diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 2ec09debc2bb..ca158be21f83 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -174,6 +174,9 @@ static void __init register_insn_emulation(struct insn_emulation_ops *ops) struct insn_emulation *insn; insn = kzalloc(sizeof(*insn), GFP_KERNEL); + if (!insn) + return; + insn->ops = ops; insn->min = INSN_UNDEF; @@ -233,6 +236,8 @@ static void __init register_insn_emulation_sysctl(void) insns_sysctl = kcalloc(nr_insn_emulated + 1, sizeof(*sysctl), GFP_KERNEL); + if (!insns_sysctl) + return; raw_spin_lock_irqsave(&insn_emulation_lock, flags); list_for_each_entry(insn, &insn_emulation, node) { diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 214685760e1c..a5bdce8af65b 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -56,6 +56,7 @@ int main(void) DEFINE(S_X24, offsetof(struct pt_regs, regs[24])); DEFINE(S_X26, offsetof(struct pt_regs, regs[26])); DEFINE(S_X28, offsetof(struct pt_regs, regs[28])); + DEFINE(S_FP, offsetof(struct pt_regs, regs[29])); DEFINE(S_LR, offsetof(struct pt_regs, regs[30])); DEFINE(S_SP, offsetof(struct pt_regs, sp)); DEFINE(S_PSTATE, offsetof(struct pt_regs, pstate)); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 1e43ba5c79b7..6a09ca7644ea 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -6,12 +6,12 @@ */ #include <linux/arm-smccc.h> -#include <linux/psci.h> #include <linux/types.h> #include <linux/cpu.h> #include <asm/cpu.h> #include <asm/cputype.h> #include <asm/cpufeature.h> +#include <asm/smp_plat.h> static bool __maybe_unused is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope) @@ -87,13 +87,21 @@ has_mismatched_cache_type(const struct arm64_cpu_capabilities *entry, } static void -cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *__unused) +cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *cap) { u64 mask = arm64_ftr_reg_ctrel0.strict_mask; + bool enable_uct_trap = false; /* Trap CTR_EL0 access on this CPU, only if it has a mismatch */ if ((read_cpuid_cachetype() & mask) != (arm64_ftr_reg_ctrel0.sys_val & mask)) + enable_uct_trap = true; + + /* ... or if the system is affected by an erratum */ + if (cap->capability == ARM64_WORKAROUND_1542419) + enable_uct_trap = true; + + if (enable_uct_trap) sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); } @@ -128,8 +136,8 @@ static void install_bp_hardening_cb(bp_hardening_cb_t fn, int cpu, slot = -1; /* - * enable_smccc_arch_workaround_1() passes NULL for the hyp_vecs - * start/end if we're a guest. Skip the hyp-vectors work. + * detect_harden_bp_fw() passes NULL for the hyp_vecs start/end if + * we're a guest. Skip the hyp-vectors work. */ if (!hyp_vecs_start) { __this_cpu_write(bp_hardening_data.fn, fn); @@ -166,9 +174,7 @@ static void install_bp_hardening_cb(bp_hardening_cb_t fn, } #endif /* CONFIG_KVM_INDIRECT_VECTORS */ -#include <uapi/linux/psci.h> #include <linux/arm-smccc.h> -#include <linux/psci.h> static void call_smc_arch_workaround_1(void) { @@ -212,43 +218,31 @@ static int detect_harden_bp_fw(void) struct arm_smccc_res res; u32 midr = read_cpuid_id(); - if (psci_ops.smccc_version == SMCCC_VERSION_1_0) + arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_1, &res); + + switch ((int)res.a0) { + case 1: + /* Firmware says we're just fine */ + return 0; + case 0: + break; + default: return -1; + } - switch (psci_ops.conduit) { - case PSCI_CONDUIT_HVC: - arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_1, &res); - switch ((int)res.a0) { - case 1: - /* Firmware says we're just fine */ - return 0; - case 0: - cb = call_hvc_arch_workaround_1; - /* This is a guest, no need to patch KVM vectors */ - smccc_start = NULL; - smccc_end = NULL; - break; - default: - return -1; - } + switch (arm_smccc_1_1_get_conduit()) { + case SMCCC_CONDUIT_HVC: + cb = call_hvc_arch_workaround_1; + /* This is a guest, no need to patch KVM vectors */ + smccc_start = NULL; + smccc_end = NULL; break; - case PSCI_CONDUIT_SMC: - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_1, &res); - switch ((int)res.a0) { - case 1: - /* Firmware says we're just fine */ - return 0; - case 0: - cb = call_smc_arch_workaround_1; - smccc_start = __smccc_workaround_1_smc_start; - smccc_end = __smccc_workaround_1_smc_end; - break; - default: - return -1; - } + case SMCCC_CONDUIT_SMC: + cb = call_smc_arch_workaround_1; + smccc_start = __smccc_workaround_1_smc_start; + smccc_end = __smccc_workaround_1_smc_end; break; default: @@ -308,11 +302,11 @@ void __init arm64_update_smccc_conduit(struct alt_instr *alt, BUG_ON(nr_inst != 1); - switch (psci_ops.conduit) { - case PSCI_CONDUIT_HVC: + switch (arm_smccc_1_1_get_conduit()) { + case SMCCC_CONDUIT_HVC: insn = aarch64_insn_get_hvc_value(); break; - case PSCI_CONDUIT_SMC: + case SMCCC_CONDUIT_SMC: insn = aarch64_insn_get_smc_value(); break; default: @@ -338,6 +332,8 @@ void __init arm64_enable_wa2_handling(struct alt_instr *alt, void arm64_set_ssbd_mitigation(bool state) { + int conduit; + if (!IS_ENABLED(CONFIG_ARM64_SSBD)) { pr_info_once("SSBD disabled by kernel configuration\n"); return; @@ -351,19 +347,10 @@ void arm64_set_ssbd_mitigation(bool state) return; } - switch (psci_ops.conduit) { - case PSCI_CONDUIT_HVC: - arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL); - break; + conduit = arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_WORKAROUND_2, state, + NULL); - case PSCI_CONDUIT_SMC: - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL); - break; - - default: - WARN_ON_ONCE(1); - break; - } + WARN_ON_ONCE(conduit == SMCCC_CONDUIT_NONE); } static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, @@ -373,6 +360,7 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, bool required = true; s32 val; bool this_cpu_safe = false; + int conduit; WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); @@ -390,25 +378,10 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, goto out_printmsg; } - if (psci_ops.smccc_version == SMCCC_VERSION_1_0) { - ssbd_state = ARM64_SSBD_UNKNOWN; - if (!this_cpu_safe) - __ssb_safe = false; - return false; - } - - switch (psci_ops.conduit) { - case PSCI_CONDUIT_HVC: - arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_2, &res); - break; - - case PSCI_CONDUIT_SMC: - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_2, &res); - break; + conduit = arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_2, &res); - default: + if (conduit == SMCCC_CONDUIT_NONE) { ssbd_state = ARM64_SSBD_UNKNOWN; if (!this_cpu_safe) __ssb_safe = false; @@ -488,6 +461,7 @@ static const struct midr_range arm64_ssb_cpus[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A35), MIDR_ALL_VERSIONS(MIDR_CORTEX_A53), MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), + MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53), {}, }; @@ -572,6 +546,7 @@ static const struct midr_range spectre_v2_safe_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A35), MIDR_ALL_VERSIONS(MIDR_CORTEX_A53), MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), + MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53), { /* sentinel */ } }; @@ -623,9 +598,45 @@ check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope) return (need_wa > 0); } -#ifdef CONFIG_HARDEN_EL2_VECTORS +static const __maybe_unused struct midr_range tx2_family_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), + MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), + {}, +}; -static const struct midr_range arm64_harden_el2_vectors[] = { +static bool __maybe_unused +needs_tx2_tvm_workaround(const struct arm64_cpu_capabilities *entry, + int scope) +{ + int i; + + if (!is_affected_midr_range_list(entry, scope) || + !is_hyp_mode_available()) + return false; + + for_each_possible_cpu(i) { + if (MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0) != 0) + return true; + } + + return false; +} + +static bool __maybe_unused +has_neoverse_n1_erratum_1542419(const struct arm64_cpu_capabilities *entry, + int scope) +{ + u32 midr = read_cpuid_id(); + bool has_dic = read_cpuid_cachetype() & BIT(CTR_DIC_SHIFT); + const struct midr_range range = MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1); + + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + return is_midr_in_range(midr, &range) && has_dic; +} + +#if defined(CONFIG_HARDEN_EL2_VECTORS) || defined(CONFIG_ARM64_ERRATUM_1319367) + +static const struct midr_range ca57_a72[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), {}, @@ -634,17 +645,23 @@ static const struct midr_range arm64_harden_el2_vectors[] = { #endif #ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI - -static const struct midr_range arm64_repeat_tlbi_cpus[] = { +static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = { #ifdef CONFIG_QCOM_FALKOR_ERRATUM_1009 - MIDR_RANGE(MIDR_QCOM_FALKOR_V1, 0, 0, 0, 0), + { + ERRATA_MIDR_REV(MIDR_QCOM_FALKOR_V1, 0, 0) + }, + { + .midr_range.model = MIDR_QCOM_KRYO, + .matches = is_kryo_midr, + }, #endif #ifdef CONFIG_ARM64_ERRATUM_1286807 - MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 0), + { + ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 0), + }, #endif {}, }; - #endif #ifdef CONFIG_CAVIUM_ERRATUM_27456 @@ -712,6 +729,33 @@ static const struct midr_range erratum_1418040_list[] = { }; #endif +#ifdef CONFIG_ARM64_ERRATUM_845719 +static const struct midr_range erratum_845719_list[] = { + /* Cortex-A53 r0p[01234] */ + MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4), + /* Brahma-B53 r0p[0] */ + MIDR_REV(MIDR_BRAHMA_B53, 0, 0), + {}, +}; +#endif + +#ifdef CONFIG_ARM64_ERRATUM_843419 +static const struct arm64_cpu_capabilities erratum_843419_list[] = { + { + /* Cortex-A53 r0p[01234] */ + .matches = is_affected_midr_range, + ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4), + MIDR_FIXED(0x4, BIT(8)), + }, + { + /* Brahma-B53 r0p[0] */ + .matches = is_affected_midr_range, + ERRATA_MIDR_REV(MIDR_BRAHMA_B53, 0, 0), + }, + {}, +}; +#endif + const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE { @@ -743,19 +787,18 @@ const struct arm64_cpu_capabilities arm64_errata[] = { #endif #ifdef CONFIG_ARM64_ERRATUM_843419 { - /* Cortex-A53 r0p[01234] */ .desc = "ARM erratum 843419", .capability = ARM64_WORKAROUND_843419, - ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4), - MIDR_FIXED(0x4, BIT(8)), + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .matches = cpucap_multi_entry_cap_matches, + .match_list = erratum_843419_list, }, #endif #ifdef CONFIG_ARM64_ERRATUM_845719 { - /* Cortex-A53 r0p[01234] */ .desc = "ARM erratum 845719", .capability = ARM64_WORKAROUND_845719, - ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4), + ERRATA_MIDR_RANGE_LIST(erratum_845719_list), }, #endif #ifdef CONFIG_CAVIUM_ERRATUM_23154 @@ -791,6 +834,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .desc = "Qualcomm Technologies Falkor/Kryo erratum 1003", .capability = ARM64_WORKAROUND_QCOM_FALKOR_E1003, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .matches = cpucap_multi_entry_cap_matches, .match_list = qcom_erratum_1003_list, }, @@ -799,7 +843,9 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .desc = "Qualcomm erratum 1009, ARM erratum 1286807", .capability = ARM64_WORKAROUND_REPEAT_TLBI, - ERRATA_MIDR_RANGE_LIST(arm64_repeat_tlbi_cpus), + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .matches = cpucap_multi_entry_cap_matches, + .match_list = arm64_repeat_tlbi_list, }, #endif #ifdef CONFIG_ARM64_ERRATUM_858921 @@ -819,7 +865,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .desc = "EL2 vector hardening", .capability = ARM64_HARDEN_EL2_VECTORS, - ERRATA_MIDR_RANGE_LIST(arm64_harden_el2_vectors), + ERRATA_MIDR_RANGE_LIST(ca57_a72), }, #endif { @@ -852,6 +898,36 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .matches = has_cortex_a76_erratum_1463225, }, #endif +#ifdef CONFIG_CAVIUM_TX2_ERRATUM_219 + { + .desc = "Cavium ThunderX2 erratum 219 (KVM guest sysreg trapping)", + .capability = ARM64_WORKAROUND_CAVIUM_TX2_219_TVM, + ERRATA_MIDR_RANGE_LIST(tx2_family_cpus), + .matches = needs_tx2_tvm_workaround, + }, + { + .desc = "Cavium ThunderX2 erratum 219 (PRFM removal)", + .capability = ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM, + ERRATA_MIDR_RANGE_LIST(tx2_family_cpus), + }, +#endif +#ifdef CONFIG_ARM64_ERRATUM_1542419 + { + /* we depend on the firmware portion for correctness */ + .desc = "ARM erratum 1542419 (kernel portion)", + .capability = ARM64_WORKAROUND_1542419, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .matches = has_neoverse_n1_erratum_1542419, + .cpu_enable = cpu_enable_trap_ctr_access, + }, +#endif +#ifdef CONFIG_ARM64_ERRATUM_1319367 + { + .desc = "ARM erratum 1319367", + .capability = ARM64_WORKAROUND_1319367, + ERRATA_MIDR_RANGE_LIST(ca57_a72), + }, +#endif { } }; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9323bcc40a58..04cf64e9f0c9 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -136,6 +136,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_SB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FRINTTS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_GPI_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), @@ -175,11 +176,16 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { }; static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = { - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SM4_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SHA3_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BITPERM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_AES_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SVEVER_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SM4_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SHA3_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BITPERM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_AES_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SVEVER_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -976,6 +982,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), MIDR_ALL_VERSIONS(MIDR_HISI_TSV110), + MIDR_ALL_VERSIONS(MIDR_NVIDIA_CARMEL), { /* sentinel */ } }; char const *str = "kpti command line option"; diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 05933c065732..56bba746da1c 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -329,7 +329,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_cntfrq = arch_timer_get_cntfrq(); /* * Use the effective value of the CTR_EL0 than the raw value - * exposed by the CPU. CTR_E0.IDC field value must be interpreted + * exposed by the CPU. CTR_EL0.IDC field value must be interpreted * with the CLIDR_EL1 fields to avoid triggering false warnings * when there is a mismatch across the CPUs. Keep track of the * effective value of the CTR_EL0 in our internal records for diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c new file mode 100644 index 000000000000..5dce5e56995a --- /dev/null +++ b/arch/arm64/kernel/entry-common.c @@ -0,0 +1,332 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Exception handling code + * + * Copyright (C) 2019 ARM Ltd. + */ + +#include <linux/context_tracking.h> +#include <linux/ptrace.h> +#include <linux/thread_info.h> + +#include <asm/cpufeature.h> +#include <asm/daifflags.h> +#include <asm/esr.h> +#include <asm/exception.h> +#include <asm/kprobes.h> +#include <asm/mmu.h> +#include <asm/sysreg.h> + +static void notrace el1_abort(struct pt_regs *regs, unsigned long esr) +{ + unsigned long far = read_sysreg(far_el1); + + local_daif_inherit(regs); + far = untagged_addr(far); + do_mem_abort(far, esr, regs); +} +NOKPROBE_SYMBOL(el1_abort); + +static void notrace el1_pc(struct pt_regs *regs, unsigned long esr) +{ + unsigned long far = read_sysreg(far_el1); + + local_daif_inherit(regs); + do_sp_pc_abort(far, esr, regs); +} +NOKPROBE_SYMBOL(el1_pc); + +static void el1_undef(struct pt_regs *regs) +{ + local_daif_inherit(regs); + do_undefinstr(regs); +} +NOKPROBE_SYMBOL(el1_undef); + +static void el1_inv(struct pt_regs *regs, unsigned long esr) +{ + local_daif_inherit(regs); + bad_mode(regs, 0, esr); +} +NOKPROBE_SYMBOL(el1_inv); + +static void notrace el1_dbg(struct pt_regs *regs, unsigned long esr) +{ + unsigned long far = read_sysreg(far_el1); + + /* + * The CPU masked interrupts, and we are leaving them masked during + * do_debug_exception(). Update PMR as if we had called + * local_mask_daif(). + */ + if (system_uses_irq_prio_masking()) + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + + do_debug_exception(far, esr, regs); +} +NOKPROBE_SYMBOL(el1_dbg); + +asmlinkage void notrace el1_sync_handler(struct pt_regs *regs) +{ + unsigned long esr = read_sysreg(esr_el1); + + switch (ESR_ELx_EC(esr)) { + case ESR_ELx_EC_DABT_CUR: + case ESR_ELx_EC_IABT_CUR: + el1_abort(regs, esr); + break; + /* + * We don't handle ESR_ELx_EC_SP_ALIGN, since we will have hit a + * recursive exception when trying to push the initial pt_regs. + */ + case ESR_ELx_EC_PC_ALIGN: + el1_pc(regs, esr); + break; + case ESR_ELx_EC_SYS64: + case ESR_ELx_EC_UNKNOWN: + el1_undef(regs); + break; + case ESR_ELx_EC_BREAKPT_CUR: + case ESR_ELx_EC_SOFTSTP_CUR: + case ESR_ELx_EC_WATCHPT_CUR: + case ESR_ELx_EC_BRK64: + el1_dbg(regs, esr); + break; + default: + el1_inv(regs, esr); + }; +} +NOKPROBE_SYMBOL(el1_sync_handler); + +static void notrace el0_da(struct pt_regs *regs, unsigned long esr) +{ + unsigned long far = read_sysreg(far_el1); + + user_exit_irqoff(); + local_daif_restore(DAIF_PROCCTX); + far = untagged_addr(far); + do_mem_abort(far, esr, regs); +} +NOKPROBE_SYMBOL(el0_da); + +static void notrace el0_ia(struct pt_regs *regs, unsigned long esr) +{ + unsigned long far = read_sysreg(far_el1); + + /* + * We've taken an instruction abort from userspace and not yet + * re-enabled IRQs. If the address is a kernel address, apply + * BP hardening prior to enabling IRQs and pre-emption. + */ + if (!is_ttbr0_addr(far)) + arm64_apply_bp_hardening(); + + user_exit_irqoff(); + local_daif_restore(DAIF_PROCCTX); + do_mem_abort(far, esr, regs); +} +NOKPROBE_SYMBOL(el0_ia); + +static void notrace el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr) +{ + user_exit_irqoff(); + local_daif_restore(DAIF_PROCCTX); + do_fpsimd_acc(esr, regs); +} +NOKPROBE_SYMBOL(el0_fpsimd_acc); + +static void notrace el0_sve_acc(struct pt_regs *regs, unsigned long esr) +{ + user_exit_irqoff(); + local_daif_restore(DAIF_PROCCTX); + do_sve_acc(esr, regs); +} +NOKPROBE_SYMBOL(el0_sve_acc); + +static void notrace el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr) +{ + user_exit_irqoff(); + local_daif_restore(DAIF_PROCCTX); + do_fpsimd_exc(esr, regs); +} +NOKPROBE_SYMBOL(el0_fpsimd_exc); + +static void notrace el0_sys(struct pt_regs *regs, unsigned long esr) +{ + user_exit_irqoff(); + local_daif_restore(DAIF_PROCCTX); + do_sysinstr(esr, regs); +} +NOKPROBE_SYMBOL(el0_sys); + +static void notrace el0_pc(struct pt_regs *regs, unsigned long esr) +{ + unsigned long far = read_sysreg(far_el1); + + if (!is_ttbr0_addr(instruction_pointer(regs))) + arm64_apply_bp_hardening(); + + user_exit_irqoff(); + local_daif_restore(DAIF_PROCCTX); + do_sp_pc_abort(far, esr, regs); +} +NOKPROBE_SYMBOL(el0_pc); + +static void notrace el0_sp(struct pt_regs *regs, unsigned long esr) +{ + user_exit_irqoff(); + local_daif_restore(DAIF_PROCCTX_NOIRQ); + do_sp_pc_abort(regs->sp, esr, regs); +} +NOKPROBE_SYMBOL(el0_sp); + +static void notrace el0_undef(struct pt_regs *regs) +{ + user_exit_irqoff(); + local_daif_restore(DAIF_PROCCTX); + do_undefinstr(regs); +} +NOKPROBE_SYMBOL(el0_undef); + +static void notrace el0_inv(struct pt_regs *regs, unsigned long esr) +{ + user_exit_irqoff(); + local_daif_restore(DAIF_PROCCTX); + bad_el0_sync(regs, 0, esr); +} +NOKPROBE_SYMBOL(el0_inv); + +static void notrace el0_dbg(struct pt_regs *regs, unsigned long esr) +{ + /* Only watchpoints write FAR_EL1, otherwise its UNKNOWN */ + unsigned long far = read_sysreg(far_el1); + + if (system_uses_irq_prio_masking()) + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + + user_exit_irqoff(); + do_debug_exception(far, esr, regs); + local_daif_restore(DAIF_PROCCTX_NOIRQ); +} +NOKPROBE_SYMBOL(el0_dbg); + +static void notrace el0_svc(struct pt_regs *regs) +{ + if (system_uses_irq_prio_masking()) + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + + el0_svc_handler(regs); +} +NOKPROBE_SYMBOL(el0_svc); + +asmlinkage void notrace el0_sync_handler(struct pt_regs *regs) +{ + unsigned long esr = read_sysreg(esr_el1); + + switch (ESR_ELx_EC(esr)) { + case ESR_ELx_EC_SVC64: + el0_svc(regs); + break; + case ESR_ELx_EC_DABT_LOW: + el0_da(regs, esr); + break; + case ESR_ELx_EC_IABT_LOW: + el0_ia(regs, esr); + break; + case ESR_ELx_EC_FP_ASIMD: + el0_fpsimd_acc(regs, esr); + break; + case ESR_ELx_EC_SVE: + el0_sve_acc(regs, esr); + break; + case ESR_ELx_EC_FP_EXC64: + el0_fpsimd_exc(regs, esr); + break; + case ESR_ELx_EC_SYS64: + case ESR_ELx_EC_WFx: + el0_sys(regs, esr); + break; + case ESR_ELx_EC_SP_ALIGN: + el0_sp(regs, esr); + break; + case ESR_ELx_EC_PC_ALIGN: + el0_pc(regs, esr); + break; + case ESR_ELx_EC_UNKNOWN: + el0_undef(regs); + break; + case ESR_ELx_EC_BREAKPT_LOW: + case ESR_ELx_EC_SOFTSTP_LOW: + case ESR_ELx_EC_WATCHPT_LOW: + case ESR_ELx_EC_BRK64: + el0_dbg(regs, esr); + break; + default: + el0_inv(regs, esr); + } +} +NOKPROBE_SYMBOL(el0_sync_handler); + +#ifdef CONFIG_COMPAT +static void notrace el0_cp15(struct pt_regs *regs, unsigned long esr) +{ + user_exit_irqoff(); + local_daif_restore(DAIF_PROCCTX); + do_cp15instr(esr, regs); +} +NOKPROBE_SYMBOL(el0_cp15); + +static void notrace el0_svc_compat(struct pt_regs *regs) +{ + if (system_uses_irq_prio_masking()) + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + + el0_svc_compat_handler(regs); +} +NOKPROBE_SYMBOL(el0_svc_compat); + +asmlinkage void notrace el0_sync_compat_handler(struct pt_regs *regs) +{ + unsigned long esr = read_sysreg(esr_el1); + + switch (ESR_ELx_EC(esr)) { + case ESR_ELx_EC_SVC32: + el0_svc_compat(regs); + break; + case ESR_ELx_EC_DABT_LOW: + el0_da(regs, esr); + break; + case ESR_ELx_EC_IABT_LOW: + el0_ia(regs, esr); + break; + case ESR_ELx_EC_FP_ASIMD: + el0_fpsimd_acc(regs, esr); + break; + case ESR_ELx_EC_FP_EXC32: + el0_fpsimd_exc(regs, esr); + break; + case ESR_ELx_EC_PC_ALIGN: + el0_pc(regs, esr); + break; + case ESR_ELx_EC_UNKNOWN: + case ESR_ELx_EC_CP14_MR: + case ESR_ELx_EC_CP14_LS: + case ESR_ELx_EC_CP14_64: + el0_undef(regs); + break; + case ESR_ELx_EC_CP15_32: + case ESR_ELx_EC_CP15_64: + el0_cp15(regs, esr); + break; + case ESR_ELx_EC_BREAKPT_LOW: + case ESR_ELx_EC_SOFTSTP_LOW: + case ESR_ELx_EC_WATCHPT_LOW: + case ESR_ELx_EC_BKPT32: + el0_dbg(regs, esr); + break; + default: + el0_inv(regs, esr); + } +} +NOKPROBE_SYMBOL(el0_sync_compat_handler); +#endif /* CONFIG_COMPAT */ diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index 33d003d80121..4fe1514fcbfd 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -7,10 +7,137 @@ */ #include <linux/linkage.h> +#include <asm/asm-offsets.h> #include <asm/assembler.h> #include <asm/ftrace.h> #include <asm/insn.h> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +/* + * Due to -fpatchable-function-entry=2, the compiler has placed two NOPs before + * the regular function prologue. For an enabled callsite, ftrace_init_nop() and + * ftrace_make_call() have patched those NOPs to: + * + * MOV X9, LR + * BL <entry> + * + * ... where <entry> is either ftrace_caller or ftrace_regs_caller. + * + * Each instrumented function follows the AAPCS, so here x0-x8 and x19-x30 are + * live, and x9-x18 are safe to clobber. + * + * We save the callsite's context into a pt_regs before invoking any ftrace + * callbacks. So that we can get a sensible backtrace, we create a stack record + * for the callsite and the ftrace entry assembly. This is not sufficient for + * reliable stacktrace: until we create the callsite stack record, its caller + * is missing from the LR and existing chain of frame records. + */ + .macro ftrace_regs_entry, allregs=0 + /* Make room for pt_regs, plus a callee frame */ + sub sp, sp, #(S_FRAME_SIZE + 16) + + /* Save function arguments (and x9 for simplicity) */ + stp x0, x1, [sp, #S_X0] + stp x2, x3, [sp, #S_X2] + stp x4, x5, [sp, #S_X4] + stp x6, x7, [sp, #S_X6] + stp x8, x9, [sp, #S_X8] + + /* Optionally save the callee-saved registers, always save the FP */ + .if \allregs == 1 + stp x10, x11, [sp, #S_X10] + stp x12, x13, [sp, #S_X12] + stp x14, x15, [sp, #S_X14] + stp x16, x17, [sp, #S_X16] + stp x18, x19, [sp, #S_X18] + stp x20, x21, [sp, #S_X20] + stp x22, x23, [sp, #S_X22] + stp x24, x25, [sp, #S_X24] + stp x26, x27, [sp, #S_X26] + stp x28, x29, [sp, #S_X28] + .else + str x29, [sp, #S_FP] + .endif + + /* Save the callsite's SP and LR */ + add x10, sp, #(S_FRAME_SIZE + 16) + stp x9, x10, [sp, #S_LR] + + /* Save the PC after the ftrace callsite */ + str x30, [sp, #S_PC] + + /* Create a frame record for the callsite above pt_regs */ + stp x29, x9, [sp, #S_FRAME_SIZE] + add x29, sp, #S_FRAME_SIZE + + /* Create our frame record within pt_regs. */ + stp x29, x30, [sp, #S_STACKFRAME] + add x29, sp, #S_STACKFRAME + .endm + +ENTRY(ftrace_regs_caller) + ftrace_regs_entry 1 + b ftrace_common +ENDPROC(ftrace_regs_caller) + +ENTRY(ftrace_caller) + ftrace_regs_entry 0 + b ftrace_common +ENDPROC(ftrace_caller) + +ENTRY(ftrace_common) + sub x0, x30, #AARCH64_INSN_SIZE // ip (callsite's BL insn) + mov x1, x9 // parent_ip (callsite's LR) + ldr_l x2, function_trace_op // op + mov x3, sp // regs + +GLOBAL(ftrace_call) + bl ftrace_stub + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +GLOBAL(ftrace_graph_call) // ftrace_graph_caller(); + nop // If enabled, this will be replaced + // "b ftrace_graph_caller" +#endif + +/* + * At the callsite x0-x8 and x19-x30 were live. Any C code will have preserved + * x19-x29 per the AAPCS, and we created frame records upon entry, so we need + * to restore x0-x8, x29, and x30. + */ +ftrace_common_return: + /* Restore function arguments */ + ldp x0, x1, [sp] + ldp x2, x3, [sp, #S_X2] + ldp x4, x5, [sp, #S_X4] + ldp x6, x7, [sp, #S_X6] + ldr x8, [sp, #S_X8] + + /* Restore the callsite's FP, LR, PC */ + ldr x29, [sp, #S_FP] + ldr x30, [sp, #S_LR] + ldr x9, [sp, #S_PC] + + /* Restore the callsite's SP */ + add sp, sp, #S_FRAME_SIZE + 16 + + ret x9 +ENDPROC(ftrace_common) + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +ENTRY(ftrace_graph_caller) + ldr x0, [sp, #S_PC] + sub x0, x0, #AARCH64_INSN_SIZE // ip (callsite's BL insn) + add x1, sp, #S_LR // parent_ip (callsite's LR) + ldr x2, [sp, #S_FRAME_SIZE] // parent fp (callsite's FP) + bl prepare_ftrace_return + b ftrace_common_return +ENDPROC(ftrace_graph_caller) +#else +#endif + +#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ + /* * Gcc with -pg will put the following code in the beginning of each function: * mov x0, x30 @@ -160,11 +287,6 @@ GLOBAL(ftrace_graph_call) // ftrace_graph_caller(); mcount_exit ENDPROC(ftrace_caller) -#endif /* CONFIG_DYNAMIC_FTRACE */ - -ENTRY(ftrace_stub) - ret -ENDPROC(ftrace_stub) #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* @@ -184,7 +306,15 @@ ENTRY(ftrace_graph_caller) mcount_exit ENDPROC(ftrace_graph_caller) +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ + +ENTRY(ftrace_stub) + ret +ENDPROC(ftrace_stub) +#ifdef CONFIG_FUNCTION_GRAPH_TRACER /* * void return_to_handler(void) * diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 84a822748c84..583f71abbe98 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -269,8 +269,10 @@ alternative_else_nop_endif alternative_if ARM64_HAS_IRQ_PRIO_MASKING ldr x20, [sp, #S_PMR_SAVE] msr_s SYS_ICC_PMR_EL1, x20 - /* Ensure priority change is seen by redistributor */ - dsb sy + mrs_s x21, SYS_ICC_CTLR_EL1 + tbz x21, #6, .L__skip_pmr_sync\@ // Check for ICC_CTLR_EL1.PMHE + dsb sy // Ensure priority change is seen by redistributor +.L__skip_pmr_sync\@: alternative_else_nop_endif ldp x21, x22, [sp, #S_PC] // load ELR, SPSR @@ -578,76 +580,9 @@ ENDPROC(el1_error_invalid) .align 6 el1_sync: kernel_entry 1 - mrs x1, esr_el1 // read the syndrome register - lsr x24, x1, #ESR_ELx_EC_SHIFT // exception class - cmp x24, #ESR_ELx_EC_DABT_CUR // data abort in EL1 - b.eq el1_da - cmp x24, #ESR_ELx_EC_IABT_CUR // instruction abort in EL1 - b.eq el1_ia - cmp x24, #ESR_ELx_EC_SYS64 // configurable trap - b.eq el1_undef - cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception - b.eq el1_pc - cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL1 - b.eq el1_undef - cmp x24, #ESR_ELx_EC_BREAKPT_CUR // debug exception in EL1 - b.ge el1_dbg - b el1_inv - -el1_ia: - /* - * Fall through to the Data abort case - */ -el1_da: - /* - * Data abort handling - */ - mrs x3, far_el1 - inherit_daif pstate=x23, tmp=x2 - clear_address_tag x0, x3 - mov x2, sp // struct pt_regs - bl do_mem_abort - - kernel_exit 1 -el1_pc: - /* - * PC alignment exception handling. We don't handle SP alignment faults, - * since we will have hit a recursive exception when trying to push the - * initial pt_regs. - */ - mrs x0, far_el1 - inherit_daif pstate=x23, tmp=x2 - mov x2, sp - bl do_sp_pc_abort - ASM_BUG() -el1_undef: - /* - * Undefined instruction - */ - inherit_daif pstate=x23, tmp=x2 mov x0, sp - bl do_undefinstr + bl el1_sync_handler kernel_exit 1 -el1_dbg: - /* - * Debug exception handling - */ - cmp x24, #ESR_ELx_EC_BRK64 // if BRK64 - cinc x24, x24, eq // set bit '0' - tbz x24, #0, el1_inv // EL1 only - gic_prio_kentry_setup tmp=x3 - mrs x0, far_el1 - mov x2, sp // struct pt_regs - bl do_debug_exception - kernel_exit 1 -el1_inv: - // TODO: add support for undefined instructions in kernel mode - inherit_daif pstate=x23, tmp=x2 - mov x0, sp - mov x2, x1 - mov x1, #BAD_SYNC - bl bad_mode - ASM_BUG() ENDPROC(el1_sync) .align 6 @@ -680,7 +615,7 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING orr x24, x24, x0 alternative_else_nop_endif cbnz x24, 1f // preempt count != 0 || NMI return path - bl preempt_schedule_irq // irq en/disable is done inside + bl arm64_preempt_schedule_irq // irq en/disable is done inside 1: #endif @@ -714,70 +649,18 @@ ENDPROC(el1_irq) .align 6 el0_sync: kernel_entry 0 - mrs x25, esr_el1 // read the syndrome register - lsr x24, x25, #ESR_ELx_EC_SHIFT // exception class - cmp x24, #ESR_ELx_EC_SVC64 // SVC in 64-bit state - b.eq el0_svc - cmp x24, #ESR_ELx_EC_DABT_LOW // data abort in EL0 - b.eq el0_da - cmp x24, #ESR_ELx_EC_IABT_LOW // instruction abort in EL0 - b.eq el0_ia - cmp x24, #ESR_ELx_EC_FP_ASIMD // FP/ASIMD access - b.eq el0_fpsimd_acc - cmp x24, #ESR_ELx_EC_SVE // SVE access - b.eq el0_sve_acc - cmp x24, #ESR_ELx_EC_FP_EXC64 // FP/ASIMD exception - b.eq el0_fpsimd_exc - cmp x24, #ESR_ELx_EC_SYS64 // configurable trap - ccmp x24, #ESR_ELx_EC_WFx, #4, ne - b.eq el0_sys - cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception - b.eq el0_sp - cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception - b.eq el0_pc - cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0 - b.eq el0_undef - cmp x24, #ESR_ELx_EC_BREAKPT_LOW // debug exception in EL0 - b.ge el0_dbg - b el0_inv + mov x0, sp + bl el0_sync_handler + b ret_to_user #ifdef CONFIG_COMPAT .align 6 el0_sync_compat: kernel_entry 0, 32 - mrs x25, esr_el1 // read the syndrome register - lsr x24, x25, #ESR_ELx_EC_SHIFT // exception class - cmp x24, #ESR_ELx_EC_SVC32 // SVC in 32-bit state - b.eq el0_svc_compat - cmp x24, #ESR_ELx_EC_DABT_LOW // data abort in EL0 - b.eq el0_da - cmp x24, #ESR_ELx_EC_IABT_LOW // instruction abort in EL0 - b.eq el0_ia - cmp x24, #ESR_ELx_EC_FP_ASIMD // FP/ASIMD access - b.eq el0_fpsimd_acc - cmp x24, #ESR_ELx_EC_FP_EXC32 // FP/ASIMD exception - b.eq el0_fpsimd_exc - cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception - b.eq el0_pc - cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0 - b.eq el0_undef - cmp x24, #ESR_ELx_EC_CP15_32 // CP15 MRC/MCR trap - b.eq el0_cp15 - cmp x24, #ESR_ELx_EC_CP15_64 // CP15 MRRC/MCRR trap - b.eq el0_cp15 - cmp x24, #ESR_ELx_EC_CP14_MR // CP14 MRC/MCR trap - b.eq el0_undef - cmp x24, #ESR_ELx_EC_CP14_LS // CP14 LDC/STC trap - b.eq el0_undef - cmp x24, #ESR_ELx_EC_CP14_64 // CP14 MRRC/MCRR trap - b.eq el0_undef - cmp x24, #ESR_ELx_EC_BREAKPT_LOW // debug exception in EL0 - b.ge el0_dbg - b el0_inv -el0_svc_compat: mov x0, sp - bl el0_svc_compat_handler + bl el0_sync_compat_handler b ret_to_user +ENDPROC(el0_sync) .align 6 el0_irq_compat: @@ -787,140 +670,8 @@ el0_irq_compat: el0_error_compat: kernel_entry 0, 32 b el0_error_naked - -el0_cp15: - /* - * Trapped CP15 (MRC, MCR, MRRC, MCRR) instructions - */ - ct_user_exit_irqoff - enable_daif - mov x0, x25 - mov x1, sp - bl do_cp15instr - b ret_to_user #endif -el0_da: - /* - * Data abort handling - */ - mrs x26, far_el1 - ct_user_exit_irqoff - enable_daif - clear_address_tag x0, x26 - mov x1, x25 - mov x2, sp - bl do_mem_abort - b ret_to_user -el0_ia: - /* - * Instruction abort handling - */ - mrs x26, far_el1 - gic_prio_kentry_setup tmp=x0 - ct_user_exit_irqoff - enable_da_f -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_off -#endif - mov x0, x26 - mov x1, x25 - mov x2, sp - bl do_el0_ia_bp_hardening - b ret_to_user -el0_fpsimd_acc: - /* - * Floating Point or Advanced SIMD access - */ - ct_user_exit_irqoff - enable_daif - mov x0, x25 - mov x1, sp - bl do_fpsimd_acc - b ret_to_user -el0_sve_acc: - /* - * Scalable Vector Extension access - */ - ct_user_exit_irqoff - enable_daif - mov x0, x25 - mov x1, sp - bl do_sve_acc - b ret_to_user -el0_fpsimd_exc: - /* - * Floating Point, Advanced SIMD or SVE exception - */ - ct_user_exit_irqoff - enable_daif - mov x0, x25 - mov x1, sp - bl do_fpsimd_exc - b ret_to_user -el0_sp: - ldr x26, [sp, #S_SP] - b el0_sp_pc -el0_pc: - mrs x26, far_el1 -el0_sp_pc: - /* - * Stack or PC alignment exception handling - */ - gic_prio_kentry_setup tmp=x0 - ct_user_exit_irqoff - enable_da_f -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_off -#endif - mov x0, x26 - mov x1, x25 - mov x2, sp - bl do_sp_pc_abort - b ret_to_user -el0_undef: - /* - * Undefined instruction - */ - ct_user_exit_irqoff - enable_daif - mov x0, sp - bl do_undefinstr - b ret_to_user -el0_sys: - /* - * System instructions, for trapped cache maintenance instructions - */ - ct_user_exit_irqoff - enable_daif - mov x0, x25 - mov x1, sp - bl do_sysinstr - b ret_to_user -el0_dbg: - /* - * Debug exception handling - */ - tbnz x24, #0, el0_inv // EL0 only - mrs x24, far_el1 - gic_prio_kentry_setup tmp=x3 - ct_user_exit_irqoff - mov x0, x24 - mov x1, x25 - mov x2, sp - bl do_debug_exception - enable_da_f - b ret_to_user -el0_inv: - ct_user_exit_irqoff - enable_daif - mov x0, sp - mov x1, #BAD_SYNC - mov x2, x25 - bl bad_el0_sync - b ret_to_user -ENDPROC(el0_sync) - .align 6 el0_irq: kernel_entry 0 @@ -998,17 +749,6 @@ finish_ret_to_user: kernel_exit 0 ENDPROC(ret_to_user) -/* - * SVC handler. - */ - .align 6 -el0_svc: - gic_prio_kentry_setup tmp=x1 - mov x0, sp - bl el0_svc_handler - b ret_to_user -ENDPROC(el0_svc) - .popsection // .entry.text #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 @@ -1070,7 +810,9 @@ alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003 #else ldr x30, =vectors #endif +alternative_if_not ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM prfm plil1strm, [x30, #(1b - tramp_vectors)] +alternative_else_nop_endif msr vbar_el1, x30 add x30, x30, #(1b - tramp_vectors) isb diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 37d3912cfe06..3eb338f14386 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -920,7 +920,7 @@ void fpsimd_release_task(struct task_struct *dead_task) * would have disabled the SVE access trap for userspace during * ret_to_user, making an SVE access trap impossible in that case. */ -asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs) +void do_sve_acc(unsigned int esr, struct pt_regs *regs) { /* Even if we chose not to use SVE, the hardware could still trap: */ if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) { @@ -947,7 +947,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs) /* * Trapped FP/ASIMD access. */ -asmlinkage void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs) +void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs) { /* TODO: implement lazy context saving/restoring */ WARN_ON(1); @@ -956,7 +956,7 @@ asmlinkage void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs) /* * Raise a SIGFPE for the current process. */ -asmlinkage void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs) +void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs) { unsigned int si_code = FPE_FLTUNK; diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 171773257974..8618faa82e6d 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -62,6 +62,19 @@ int ftrace_update_ftrace_func(ftrace_func_t func) return ftrace_modify_code(pc, 0, new, false); } +static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr) +{ +#ifdef CONFIG_ARM64_MODULE_PLTS + struct plt_entry *plt = mod->arch.ftrace_trampolines; + + if (addr == FTRACE_ADDR) + return &plt[FTRACE_PLT_IDX]; + if (addr == FTRACE_REGS_ADDR && IS_ENABLED(CONFIG_FTRACE_WITH_REGS)) + return &plt[FTRACE_REGS_PLT_IDX]; +#endif + return NULL; +} + /* * Turn on the call to ftrace_caller() in instrumented function */ @@ -72,9 +85,11 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) long offset = (long)pc - (long)addr; if (offset < -SZ_128M || offset >= SZ_128M) { -#ifdef CONFIG_ARM64_MODULE_PLTS - struct plt_entry trampoline, *dst; struct module *mod; + struct plt_entry *plt; + + if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS)) + return -EINVAL; /* * On kernels that support module PLTs, the offset between the @@ -93,43 +108,13 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) if (WARN_ON(!mod)) return -EINVAL; - /* - * There is only one ftrace trampoline per module. For now, - * this is not a problem since on arm64, all dynamic ftrace - * invocations are routed via ftrace_caller(). This will need - * to be revisited if support for multiple ftrace entry points - * is added in the future, but for now, the pr_err() below - * deals with a theoretical issue only. - * - * Note that PLTs are place relative, and plt_entries_equal() - * checks whether they point to the same target. Here, we need - * to check if the actual opcodes are in fact identical, - * regardless of the offset in memory so use memcmp() instead. - */ - dst = mod->arch.ftrace_trampoline; - trampoline = get_plt_entry(addr, dst); - if (memcmp(dst, &trampoline, sizeof(trampoline))) { - if (plt_entry_is_initialized(dst)) { - pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n"); - return -EINVAL; - } - - /* point the trampoline to our ftrace entry point */ - module_disable_ro(mod); - *dst = trampoline; - module_enable_ro(mod, true); - - /* - * Ensure updated trampoline is visible to instruction - * fetch before we patch in the branch. - */ - __flush_icache_range((unsigned long)&dst[0], - (unsigned long)&dst[1]); + plt = get_ftrace_plt(mod, addr); + if (!plt) { + pr_err("ftrace: no module PLT for %ps\n", (void *)addr); + return -EINVAL; } - addr = (unsigned long)dst; -#else /* CONFIG_ARM64_MODULE_PLTS */ - return -EINVAL; -#endif /* CONFIG_ARM64_MODULE_PLTS */ + + addr = (unsigned long)plt; } old = aarch64_insn_gen_nop(); @@ -138,6 +123,55 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) return ftrace_modify_code(pc, old, new, true); } +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + unsigned long pc = rec->ip; + u32 old, new; + + old = aarch64_insn_gen_branch_imm(pc, old_addr, + AARCH64_INSN_BRANCH_LINK); + new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); + + return ftrace_modify_code(pc, old, new, true); +} + +/* + * The compiler has inserted two NOPs before the regular function prologue. + * All instrumented functions follow the AAPCS, so x0-x8 and x19-x30 are live, + * and x9-x18 are free for our use. + * + * At runtime we want to be able to swing a single NOP <-> BL to enable or + * disable the ftrace call. The BL requires us to save the original LR value, + * so here we insert a <MOV X9, LR> over the first NOP so the instructions + * before the regular prologue are: + * + * | Compiled | Disabled | Enabled | + * +----------+------------+------------+ + * | NOP | MOV X9, LR | MOV X9, LR | + * | NOP | NOP | BL <entry> | + * + * The LR value will be recovered by ftrace_regs_entry, and restored into LR + * before returning to the regular function prologue. When a function is not + * being traced, the MOV is not harmful given x9 is not live per the AAPCS. + * + * Note: ftrace_process_locs() has pre-adjusted rec->ip to be the address of + * the BL. + */ +int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) +{ + unsigned long pc = rec->ip - AARCH64_INSN_SIZE; + u32 old, new; + + old = aarch64_insn_gen_nop(); + new = aarch64_insn_gen_move_reg(AARCH64_INSN_REG_9, + AARCH64_INSN_REG_LR, + AARCH64_INSN_VARIANT_64BIT); + return ftrace_modify_code(pc, old, new, true); +} +#endif + /* * Turn off the call to ftrace_caller() in instrumented function */ @@ -150,9 +184,11 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, long offset = (long)pc - (long)addr; if (offset < -SZ_128M || offset >= SZ_128M) { -#ifdef CONFIG_ARM64_MODULE_PLTS u32 replaced; + if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS)) + return -EINVAL; + /* * 'mod' is only set at module load time, but if we end up * dealing with an out-of-range condition, we can assume it @@ -183,9 +219,6 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, return -EINVAL; validate = false; -#else /* CONFIG_ARM64_MODULE_PLTS */ - return -EINVAL; -#endif /* CONFIG_ARM64_MODULE_PLTS */ } else { old = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index e0a7fce0e01c..a96b2921d22c 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -201,6 +201,7 @@ static int create_safe_exec_page(void *src_start, size_t length, gfp_t mask) { int rc = 0; + pgd_t *trans_pgd; pgd_t *pgdp; pud_t *pudp; pmd_t *pmdp; @@ -215,7 +216,13 @@ static int create_safe_exec_page(void *src_start, size_t length, memcpy((void *)dst, src_start, length); __flush_icache_range(dst, dst + length); - pgdp = pgd_offset_raw(allocator(mask), dst_addr); + trans_pgd = allocator(mask); + if (!trans_pgd) { + rc = -ENOMEM; + goto out; + } + + pgdp = pgd_offset_raw(trans_pgd, dst_addr); if (pgd_none(READ_ONCE(*pgdp))) { pudp = allocator(mask); if (!pudp) { diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 38ee1514cd9c..0b727edf4104 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -51,7 +51,7 @@ int hw_breakpoint_slots(int type) case TYPE_DATA: return get_num_wrps(); default: - pr_warning("unknown slot type: %d\n", type); + pr_warn("unknown slot type: %d\n", type); return 0; } } @@ -112,7 +112,7 @@ static u64 read_wb_reg(int reg, int n) GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_WVR, AARCH64_DBG_REG_NAME_WVR, val); GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_WCR, AARCH64_DBG_REG_NAME_WCR, val); default: - pr_warning("attempt to read from unknown breakpoint register %d\n", n); + pr_warn("attempt to read from unknown breakpoint register %d\n", n); } return val; @@ -127,7 +127,7 @@ static void write_wb_reg(int reg, int n, u64 val) GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_WVR, AARCH64_DBG_REG_NAME_WVR, val); GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_WCR, AARCH64_DBG_REG_NAME_WCR, val); default: - pr_warning("attempt to write to unknown breakpoint register %d\n", n); + pr_warn("attempt to write to unknown breakpoint register %d\n", n); } isb(); } @@ -145,7 +145,7 @@ static enum dbg_active_el debug_exception_level(int privilege) case AARCH64_BREAKPOINT_EL1: return DBG_ACTIVE_EL1; default: - pr_warning("invalid breakpoint privilege level %d\n", privilege); + pr_warn("invalid breakpoint privilege level %d\n", privilege); return -EINVAL; } } diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index d801a7094076..513b29c3e735 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -1268,6 +1268,19 @@ u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst, return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_6, insn, shift); } +/* + * MOV (register) is architecturally an alias of ORR (shifted register) where + * MOV <*d>, <*m> is equivalent to ORR <*d>, <*ZR>, <*m> + */ +u32 aarch64_insn_gen_move_reg(enum aarch64_insn_register dst, + enum aarch64_insn_register src, + enum aarch64_insn_variant variant) +{ + return aarch64_insn_gen_logical_shifted_reg(dst, AARCH64_INSN_REG_ZR, + src, 0, variant, + AARCH64_INSN_LOGIC_ORR); +} + u32 aarch64_insn_gen_adr(unsigned long pc, unsigned long addr, enum aarch64_insn_register reg, enum aarch64_insn_adr_type type) diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 416f537bf614..2a11a962e571 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -19,6 +19,14 @@ #include <asm/pgtable.h> #include <asm/sections.h> +enum kaslr_status { + KASLR_ENABLED, + KASLR_DISABLED_CMDLINE, + KASLR_DISABLED_NO_SEED, + KASLR_DISABLED_FDT_REMAP, +}; + +static enum kaslr_status __initdata kaslr_status; u64 __ro_after_init module_alloc_base; u16 __initdata memstart_offset_seed; @@ -91,15 +99,15 @@ u64 __init kaslr_early_init(u64 dt_phys) */ early_fixmap_init(); fdt = fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL); - if (!fdt) + if (!fdt) { + kaslr_status = KASLR_DISABLED_FDT_REMAP; return 0; + } /* * Retrieve (and wipe) the seed from the FDT */ seed = get_kaslr_seed(fdt); - if (!seed) - return 0; /* * Check if 'nokaslr' appears on the command line, and @@ -107,8 +115,15 @@ u64 __init kaslr_early_init(u64 dt_phys) */ cmdline = kaslr_get_cmdline(fdt); str = strstr(cmdline, "nokaslr"); - if (str == cmdline || (str > cmdline && *(str - 1) == ' ')) + if (str == cmdline || (str > cmdline && *(str - 1) == ' ')) { + kaslr_status = KASLR_DISABLED_CMDLINE; + return 0; + } + + if (!seed) { + kaslr_status = KASLR_DISABLED_NO_SEED; return 0; + } /* * OK, so we are proceeding with KASLR enabled. Calculate a suitable @@ -170,3 +185,24 @@ u64 __init kaslr_early_init(u64 dt_phys) return offset; } + +static int __init kaslr_init(void) +{ + switch (kaslr_status) { + case KASLR_ENABLED: + pr_info("KASLR enabled\n"); + break; + case KASLR_DISABLED_CMDLINE: + pr_info("KASLR disabled on command line\n"); + break; + case KASLR_DISABLED_NO_SEED: + pr_warn("KASLR disabled due to lack of seed\n"); + break; + case KASLR_DISABLED_FDT_REMAP: + pr_warn("KASLR disabled due to FDT remapping failure\n"); + break; + } + + return 0; +} +core_initcall(kaslr_init) diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index b182442b87a3..65b08a74aec6 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -4,6 +4,7 @@ */ #include <linux/elf.h> +#include <linux/ftrace.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/sort.h> @@ -330,7 +331,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, tramp->sh_type = SHT_NOBITS; tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC; tramp->sh_addralign = __alignof__(struct plt_entry); - tramp->sh_size = sizeof(struct plt_entry); + tramp->sh_size = NR_FTRACE_PLTS * sizeof(struct plt_entry); } return 0; diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 03ff15bffbb6..1cd1a4d0ed30 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -9,6 +9,7 @@ #include <linux/bitops.h> #include <linux/elf.h> +#include <linux/ftrace.h> #include <linux/gfp.h> #include <linux/kasan.h> #include <linux/kernel.h> @@ -470,22 +471,58 @@ overflow: return -ENOEXEC; } -int module_finalize(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - struct module *me) +static const Elf_Shdr *find_section(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + const char *name) { const Elf_Shdr *s, *se; const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) { - if (strcmp(".altinstructions", secstrs + s->sh_name) == 0) - apply_alternatives_module((void *)s->sh_addr, s->sh_size); -#ifdef CONFIG_ARM64_MODULE_PLTS - if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) && - !strcmp(".text.ftrace_trampoline", secstrs + s->sh_name)) - me->arch.ftrace_trampoline = (void *)s->sh_addr; -#endif + if (strcmp(name, secstrs + s->sh_name) == 0) + return s; } + return NULL; +} + +static inline void __init_plt(struct plt_entry *plt, unsigned long addr) +{ + *plt = get_plt_entry(addr, plt); +} + +static int module_init_ftrace_plt(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + struct module *mod) +{ +#if defined(CONFIG_ARM64_MODULE_PLTS) && defined(CONFIG_DYNAMIC_FTRACE) + const Elf_Shdr *s; + struct plt_entry *plts; + + s = find_section(hdr, sechdrs, ".text.ftrace_trampoline"); + if (!s) + return -ENOEXEC; + + plts = (void *)s->sh_addr; + + __init_plt(&plts[FTRACE_PLT_IDX], FTRACE_ADDR); + + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) + __init_plt(&plts[FTRACE_REGS_PLT_IDX], FTRACE_REGS_ADDR); + + mod->arch.ftrace_trampolines = plts; +#endif return 0; } + +int module_finalize(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + struct module *me) +{ + const Elf_Shdr *s; + s = find_section(hdr, sechdrs, ".altinstructions"); + if (s) + apply_alternatives_module((void *)s->sh_addr, s->sh_size); + + return module_init_ftrace_plt(hdr, sechdrs, me); +} diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c index 4cfed91fe256..1ef702b0be2d 100644 --- a/arch/arm64/kernel/paravirt.c +++ b/arch/arm64/kernel/paravirt.c @@ -6,13 +6,153 @@ * Author: Stefano Stabellini <stefano.stabellini@eu.citrix.com> */ +#define pr_fmt(fmt) "arm-pv: " fmt + +#include <linux/arm-smccc.h> +#include <linux/cpuhotplug.h> #include <linux/export.h> +#include <linux/io.h> #include <linux/jump_label.h> +#include <linux/printk.h> +#include <linux/psci.h> +#include <linux/reboot.h> +#include <linux/slab.h> #include <linux/types.h> + #include <asm/paravirt.h> +#include <asm/pvclock-abi.h> +#include <asm/smp_plat.h> struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled; struct paravirt_patch_template pv_ops; EXPORT_SYMBOL_GPL(pv_ops); + +struct pv_time_stolen_time_region { + struct pvclock_vcpu_stolen_time *kaddr; +}; + +static DEFINE_PER_CPU(struct pv_time_stolen_time_region, stolen_time_region); + +static bool steal_acc = true; +static int __init parse_no_stealacc(char *arg) +{ + steal_acc = false; + return 0; +} + +early_param("no-steal-acc", parse_no_stealacc); + +/* return stolen time in ns by asking the hypervisor */ +static u64 pv_steal_clock(int cpu) +{ + struct pv_time_stolen_time_region *reg; + + reg = per_cpu_ptr(&stolen_time_region, cpu); + if (!reg->kaddr) { + pr_warn_once("stolen time enabled but not configured for cpu %d\n", + cpu); + return 0; + } + + return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time)); +} + +static int stolen_time_dying_cpu(unsigned int cpu) +{ + struct pv_time_stolen_time_region *reg; + + reg = this_cpu_ptr(&stolen_time_region); + if (!reg->kaddr) + return 0; + + memunmap(reg->kaddr); + memset(reg, 0, sizeof(*reg)); + + return 0; +} + +static int init_stolen_time_cpu(unsigned int cpu) +{ + struct pv_time_stolen_time_region *reg; + struct arm_smccc_res res; + + reg = this_cpu_ptr(&stolen_time_region); + + arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_TIME_ST, &res); + + if (res.a0 == SMCCC_RET_NOT_SUPPORTED) + return -EINVAL; + + reg->kaddr = memremap(res.a0, + sizeof(struct pvclock_vcpu_stolen_time), + MEMREMAP_WB); + + if (!reg->kaddr) { + pr_warn("Failed to map stolen time data structure\n"); + return -ENOMEM; + } + + if (le32_to_cpu(reg->kaddr->revision) != 0 || + le32_to_cpu(reg->kaddr->attributes) != 0) { + pr_warn_once("Unexpected revision or attributes in stolen time data\n"); + return -ENXIO; + } + + return 0; +} + +static int pv_time_init_stolen_time(void) +{ + int ret; + + ret = cpuhp_setup_state(CPUHP_AP_ARM_KVMPV_STARTING, + "hypervisor/arm/pvtime:starting", + init_stolen_time_cpu, stolen_time_dying_cpu); + if (ret < 0) + return ret; + return 0; +} + +static bool has_pv_steal_clock(void) +{ + struct arm_smccc_res res; + + /* To detect the presence of PV time support we require SMCCC 1.1+ */ + if (psci_ops.smccc_version < SMCCC_VERSION_1_1) + return false; + + arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_HV_PV_TIME_FEATURES, &res); + + if (res.a0 != SMCCC_RET_SUCCESS) + return false; + + arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_TIME_FEATURES, + ARM_SMCCC_HV_PV_TIME_ST, &res); + + return (res.a0 == SMCCC_RET_SUCCESS); +} + +int __init pv_time_init(void) +{ + int ret; + + if (!has_pv_steal_clock()) + return 0; + + ret = pv_time_init_stolen_time(); + if (ret) + return ret; + + pv_ops.time.steal_clock = pv_steal_clock; + + static_key_slow_inc(¶virt_steal_enabled); + if (steal_acc) + static_key_slow_inc(¶virt_steal_rq_enabled); + + pr_info("using stolen time PV\n"); + + return 0; +} diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index a0b4f1bca491..e40b65645c86 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -158,133 +158,74 @@ armv8pmu_events_sysfs_show(struct device *dev, return sprintf(page, "event=0x%03llx\n", pmu_attr->id); } -#define ARMV8_EVENT_ATTR(name, config) \ - PMU_EVENT_ATTR(name, armv8_event_attr_##name, \ - config, armv8pmu_events_sysfs_show) - -ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR); -ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL); -ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL); -ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL); -ARMV8_EVENT_ATTR(l1d_cache, ARMV8_PMUV3_PERFCTR_L1D_CACHE); -ARMV8_EVENT_ATTR(l1d_tlb_refill, ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL); -ARMV8_EVENT_ATTR(ld_retired, ARMV8_PMUV3_PERFCTR_LD_RETIRED); -ARMV8_EVENT_ATTR(st_retired, ARMV8_PMUV3_PERFCTR_ST_RETIRED); -ARMV8_EVENT_ATTR(inst_retired, ARMV8_PMUV3_PERFCTR_INST_RETIRED); -ARMV8_EVENT_ATTR(exc_taken, ARMV8_PMUV3_PERFCTR_EXC_TAKEN); -ARMV8_EVENT_ATTR(exc_return, ARMV8_PMUV3_PERFCTR_EXC_RETURN); -ARMV8_EVENT_ATTR(cid_write_retired, ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED); -ARMV8_EVENT_ATTR(pc_write_retired, ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED); -ARMV8_EVENT_ATTR(br_immed_retired, ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED); -ARMV8_EVENT_ATTR(br_return_retired, ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED); -ARMV8_EVENT_ATTR(unaligned_ldst_retired, ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED); -ARMV8_EVENT_ATTR(br_mis_pred, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED); -ARMV8_EVENT_ATTR(cpu_cycles, ARMV8_PMUV3_PERFCTR_CPU_CYCLES); -ARMV8_EVENT_ATTR(br_pred, ARMV8_PMUV3_PERFCTR_BR_PRED); -ARMV8_EVENT_ATTR(mem_access, ARMV8_PMUV3_PERFCTR_MEM_ACCESS); -ARMV8_EVENT_ATTR(l1i_cache, ARMV8_PMUV3_PERFCTR_L1I_CACHE); -ARMV8_EVENT_ATTR(l1d_cache_wb, ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB); -ARMV8_EVENT_ATTR(l2d_cache, ARMV8_PMUV3_PERFCTR_L2D_CACHE); -ARMV8_EVENT_ATTR(l2d_cache_refill, ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL); -ARMV8_EVENT_ATTR(l2d_cache_wb, ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB); -ARMV8_EVENT_ATTR(bus_access, ARMV8_PMUV3_PERFCTR_BUS_ACCESS); -ARMV8_EVENT_ATTR(memory_error, ARMV8_PMUV3_PERFCTR_MEMORY_ERROR); -ARMV8_EVENT_ATTR(inst_spec, ARMV8_PMUV3_PERFCTR_INST_SPEC); -ARMV8_EVENT_ATTR(ttbr_write_retired, ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED); -ARMV8_EVENT_ATTR(bus_cycles, ARMV8_PMUV3_PERFCTR_BUS_CYCLES); -/* Don't expose the chain event in /sys, since it's useless in isolation */ -ARMV8_EVENT_ATTR(l1d_cache_allocate, ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE); -ARMV8_EVENT_ATTR(l2d_cache_allocate, ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE); -ARMV8_EVENT_ATTR(br_retired, ARMV8_PMUV3_PERFCTR_BR_RETIRED); -ARMV8_EVENT_ATTR(br_mis_pred_retired, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED); -ARMV8_EVENT_ATTR(stall_frontend, ARMV8_PMUV3_PERFCTR_STALL_FRONTEND); -ARMV8_EVENT_ATTR(stall_backend, ARMV8_PMUV3_PERFCTR_STALL_BACKEND); -ARMV8_EVENT_ATTR(l1d_tlb, ARMV8_PMUV3_PERFCTR_L1D_TLB); -ARMV8_EVENT_ATTR(l1i_tlb, ARMV8_PMUV3_PERFCTR_L1I_TLB); -ARMV8_EVENT_ATTR(l2i_cache, ARMV8_PMUV3_PERFCTR_L2I_CACHE); -ARMV8_EVENT_ATTR(l2i_cache_refill, ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL); -ARMV8_EVENT_ATTR(l3d_cache_allocate, ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE); -ARMV8_EVENT_ATTR(l3d_cache_refill, ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL); -ARMV8_EVENT_ATTR(l3d_cache, ARMV8_PMUV3_PERFCTR_L3D_CACHE); -ARMV8_EVENT_ATTR(l3d_cache_wb, ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB); -ARMV8_EVENT_ATTR(l2d_tlb_refill, ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL); -ARMV8_EVENT_ATTR(l2i_tlb_refill, ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL); -ARMV8_EVENT_ATTR(l2d_tlb, ARMV8_PMUV3_PERFCTR_L2D_TLB); -ARMV8_EVENT_ATTR(l2i_tlb, ARMV8_PMUV3_PERFCTR_L2I_TLB); -ARMV8_EVENT_ATTR(remote_access, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS); -ARMV8_EVENT_ATTR(ll_cache, ARMV8_PMUV3_PERFCTR_LL_CACHE); -ARMV8_EVENT_ATTR(ll_cache_miss, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS); -ARMV8_EVENT_ATTR(dtlb_walk, ARMV8_PMUV3_PERFCTR_DTLB_WALK); -ARMV8_EVENT_ATTR(itlb_walk, ARMV8_PMUV3_PERFCTR_ITLB_WALK); -ARMV8_EVENT_ATTR(ll_cache_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_RD); -ARMV8_EVENT_ATTR(ll_cache_miss_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD); -ARMV8_EVENT_ATTR(remote_access_rd, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD); -ARMV8_EVENT_ATTR(sample_pop, ARMV8_SPE_PERFCTR_SAMPLE_POP); -ARMV8_EVENT_ATTR(sample_feed, ARMV8_SPE_PERFCTR_SAMPLE_FEED); -ARMV8_EVENT_ATTR(sample_filtrate, ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE); -ARMV8_EVENT_ATTR(sample_collision, ARMV8_SPE_PERFCTR_SAMPLE_COLLISION); +#define ARMV8_EVENT_ATTR(name, config) \ + (&((struct perf_pmu_events_attr) { \ + .attr = __ATTR(name, 0444, armv8pmu_events_sysfs_show, NULL), \ + .id = config, \ + }).attr.attr) static struct attribute *armv8_pmuv3_event_attrs[] = { - &armv8_event_attr_sw_incr.attr.attr, - &armv8_event_attr_l1i_cache_refill.attr.attr, - &armv8_event_attr_l1i_tlb_refill.attr.attr, - &armv8_event_attr_l1d_cache_refill.attr.attr, - &armv8_event_attr_l1d_cache.attr.attr, - &armv8_event_attr_l1d_tlb_refill.attr.attr, - &armv8_event_attr_ld_retired.attr.attr, - &armv8_event_attr_st_retired.attr.attr, - &armv8_event_attr_inst_retired.attr.attr, - &armv8_event_attr_exc_taken.attr.attr, - &armv8_event_attr_exc_return.attr.attr, - &armv8_event_attr_cid_write_retired.attr.attr, - &armv8_event_attr_pc_write_retired.attr.attr, - &armv8_event_attr_br_immed_retired.attr.attr, - &armv8_event_attr_br_return_retired.attr.attr, - &armv8_event_attr_unaligned_ldst_retired.attr.attr, - &armv8_event_attr_br_mis_pred.attr.attr, - &armv8_event_attr_cpu_cycles.attr.attr, - &armv8_event_attr_br_pred.attr.attr, - &armv8_event_attr_mem_access.attr.attr, - &armv8_event_attr_l1i_cache.attr.attr, - &armv8_event_attr_l1d_cache_wb.attr.attr, - &armv8_event_attr_l2d_cache.attr.attr, - &armv8_event_attr_l2d_cache_refill.attr.attr, - &armv8_event_attr_l2d_cache_wb.attr.attr, - &armv8_event_attr_bus_access.attr.attr, - &armv8_event_attr_memory_error.attr.attr, - &armv8_event_attr_inst_spec.attr.attr, - &armv8_event_attr_ttbr_write_retired.attr.attr, - &armv8_event_attr_bus_cycles.attr.attr, - &armv8_event_attr_l1d_cache_allocate.attr.attr, - &armv8_event_attr_l2d_cache_allocate.attr.attr, - &armv8_event_attr_br_retired.attr.attr, - &armv8_event_attr_br_mis_pred_retired.attr.attr, - &armv8_event_attr_stall_frontend.attr.attr, - &armv8_event_attr_stall_backend.attr.attr, - &armv8_event_attr_l1d_tlb.attr.attr, - &armv8_event_attr_l1i_tlb.attr.attr, - &armv8_event_attr_l2i_cache.attr.attr, - &armv8_event_attr_l2i_cache_refill.attr.attr, - &armv8_event_attr_l3d_cache_allocate.attr.attr, - &armv8_event_attr_l3d_cache_refill.attr.attr, - &armv8_event_attr_l3d_cache.attr.attr, - &armv8_event_attr_l3d_cache_wb.attr.attr, - &armv8_event_attr_l2d_tlb_refill.attr.attr, - &armv8_event_attr_l2i_tlb_refill.attr.attr, - &armv8_event_attr_l2d_tlb.attr.attr, - &armv8_event_attr_l2i_tlb.attr.attr, - &armv8_event_attr_remote_access.attr.attr, - &armv8_event_attr_ll_cache.attr.attr, - &armv8_event_attr_ll_cache_miss.attr.attr, - &armv8_event_attr_dtlb_walk.attr.attr, - &armv8_event_attr_itlb_walk.attr.attr, - &armv8_event_attr_ll_cache_rd.attr.attr, - &armv8_event_attr_ll_cache_miss_rd.attr.attr, - &armv8_event_attr_remote_access_rd.attr.attr, - &armv8_event_attr_sample_pop.attr.attr, - &armv8_event_attr_sample_feed.attr.attr, - &armv8_event_attr_sample_filtrate.attr.attr, - &armv8_event_attr_sample_collision.attr.attr, + ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR), + ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL), + ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL), + ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL), + ARMV8_EVENT_ATTR(l1d_cache, ARMV8_PMUV3_PERFCTR_L1D_CACHE), + ARMV8_EVENT_ATTR(l1d_tlb_refill, ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL), + ARMV8_EVENT_ATTR(ld_retired, ARMV8_PMUV3_PERFCTR_LD_RETIRED), + ARMV8_EVENT_ATTR(st_retired, ARMV8_PMUV3_PERFCTR_ST_RETIRED), + ARMV8_EVENT_ATTR(inst_retired, ARMV8_PMUV3_PERFCTR_INST_RETIRED), + ARMV8_EVENT_ATTR(exc_taken, ARMV8_PMUV3_PERFCTR_EXC_TAKEN), + ARMV8_EVENT_ATTR(exc_return, ARMV8_PMUV3_PERFCTR_EXC_RETURN), + ARMV8_EVENT_ATTR(cid_write_retired, ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED), + ARMV8_EVENT_ATTR(pc_write_retired, ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED), + ARMV8_EVENT_ATTR(br_immed_retired, ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED), + ARMV8_EVENT_ATTR(br_return_retired, ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED), + ARMV8_EVENT_ATTR(unaligned_ldst_retired, ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED), + ARMV8_EVENT_ATTR(br_mis_pred, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED), + ARMV8_EVENT_ATTR(cpu_cycles, ARMV8_PMUV3_PERFCTR_CPU_CYCLES), + ARMV8_EVENT_ATTR(br_pred, ARMV8_PMUV3_PERFCTR_BR_PRED), + ARMV8_EVENT_ATTR(mem_access, ARMV8_PMUV3_PERFCTR_MEM_ACCESS), + ARMV8_EVENT_ATTR(l1i_cache, ARMV8_PMUV3_PERFCTR_L1I_CACHE), + ARMV8_EVENT_ATTR(l1d_cache_wb, ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB), + ARMV8_EVENT_ATTR(l2d_cache, ARMV8_PMUV3_PERFCTR_L2D_CACHE), + ARMV8_EVENT_ATTR(l2d_cache_refill, ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL), + ARMV8_EVENT_ATTR(l2d_cache_wb, ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB), + ARMV8_EVENT_ATTR(bus_access, ARMV8_PMUV3_PERFCTR_BUS_ACCESS), + ARMV8_EVENT_ATTR(memory_error, ARMV8_PMUV3_PERFCTR_MEMORY_ERROR), + ARMV8_EVENT_ATTR(inst_spec, ARMV8_PMUV3_PERFCTR_INST_SPEC), + ARMV8_EVENT_ATTR(ttbr_write_retired, ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED), + ARMV8_EVENT_ATTR(bus_cycles, ARMV8_PMUV3_PERFCTR_BUS_CYCLES), + /* Don't expose the chain event in /sys, since it's useless in isolation */ + ARMV8_EVENT_ATTR(l1d_cache_allocate, ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE), + ARMV8_EVENT_ATTR(l2d_cache_allocate, ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE), + ARMV8_EVENT_ATTR(br_retired, ARMV8_PMUV3_PERFCTR_BR_RETIRED), + ARMV8_EVENT_ATTR(br_mis_pred_retired, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED), + ARMV8_EVENT_ATTR(stall_frontend, ARMV8_PMUV3_PERFCTR_STALL_FRONTEND), + ARMV8_EVENT_ATTR(stall_backend, ARMV8_PMUV3_PERFCTR_STALL_BACKEND), + ARMV8_EVENT_ATTR(l1d_tlb, ARMV8_PMUV3_PERFCTR_L1D_TLB), + ARMV8_EVENT_ATTR(l1i_tlb, ARMV8_PMUV3_PERFCTR_L1I_TLB), + ARMV8_EVENT_ATTR(l2i_cache, ARMV8_PMUV3_PERFCTR_L2I_CACHE), + ARMV8_EVENT_ATTR(l2i_cache_refill, ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL), + ARMV8_EVENT_ATTR(l3d_cache_allocate, ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE), + ARMV8_EVENT_ATTR(l3d_cache_refill, ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL), + ARMV8_EVENT_ATTR(l3d_cache, ARMV8_PMUV3_PERFCTR_L3D_CACHE), + ARMV8_EVENT_ATTR(l3d_cache_wb, ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB), + ARMV8_EVENT_ATTR(l2d_tlb_refill, ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL), + ARMV8_EVENT_ATTR(l2i_tlb_refill, ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL), + ARMV8_EVENT_ATTR(l2d_tlb, ARMV8_PMUV3_PERFCTR_L2D_TLB), + ARMV8_EVENT_ATTR(l2i_tlb, ARMV8_PMUV3_PERFCTR_L2I_TLB), + ARMV8_EVENT_ATTR(remote_access, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS), + ARMV8_EVENT_ATTR(ll_cache, ARMV8_PMUV3_PERFCTR_LL_CACHE), + ARMV8_EVENT_ATTR(ll_cache_miss, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS), + ARMV8_EVENT_ATTR(dtlb_walk, ARMV8_PMUV3_PERFCTR_DTLB_WALK), + ARMV8_EVENT_ATTR(itlb_walk, ARMV8_PMUV3_PERFCTR_ITLB_WALK), + ARMV8_EVENT_ATTR(ll_cache_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_RD), + ARMV8_EVENT_ATTR(ll_cache_miss_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD), + ARMV8_EVENT_ATTR(remote_access_rd, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD), + ARMV8_EVENT_ATTR(sample_pop, ARMV8_SPE_PERFCTR_SAMPLE_POP), + ARMV8_EVENT_ATTR(sample_feed, ARMV8_SPE_PERFCTR_SAMPLE_FEED), + ARMV8_EVENT_ATTR(sample_filtrate, ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE), + ARMV8_EVENT_ATTR(sample_collision, ARMV8_SPE_PERFCTR_SAMPLE_COLLISION), NULL, }; diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index c4452827419b..d1c95dcf1d78 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -455,10 +455,6 @@ int __init arch_populate_kprobe_blacklist(void) (unsigned long)__irqentry_text_end); if (ret) return ret; - ret = kprobe_add_area_blacklist((unsigned long)__exception_text_start, - (unsigned long)__exception_text_end); - if (ret) - return ret; ret = kprobe_add_area_blacklist((unsigned long)__idmap_text_start, (unsigned long)__idmap_text_end); if (ret) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index a47462def04b..71f788cd2b18 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -17,6 +17,7 @@ #include <linux/sched/task.h> #include <linux/sched/task_stack.h> #include <linux/kernel.h> +#include <linux/lockdep.h> #include <linux/mm.h> #include <linux/stddef.h> #include <linux/sysctl.h> @@ -44,6 +45,7 @@ #include <asm/alternative.h> #include <asm/arch_gicv3.h> #include <asm/compat.h> +#include <asm/cpufeature.h> #include <asm/cacheflush.h> #include <asm/exec.h> #include <asm/fpsimd.h> @@ -332,22 +334,27 @@ void arch_release_task_struct(struct task_struct *tsk) fpsimd_release_task(tsk); } -/* - * src and dst may temporarily have aliased sve_state after task_struct - * is copied. We cannot fix this properly here, because src may have - * live SVE state and dst's thread_info may not exist yet, so tweaking - * either src's or dst's TIF_SVE is not safe. - * - * The unaliasing is done in copy_thread() instead. This works because - * dst is not schedulable or traceable until both of these functions - * have been called. - */ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { if (current->mm) fpsimd_preserve_current_state(); *dst = *src; + /* We rely on the above assignment to initialize dst's thread_flags: */ + BUILD_BUG_ON(!IS_ENABLED(CONFIG_THREAD_INFO_IN_TASK)); + + /* + * Detach src's sve_state (if any) from dst so that it does not + * get erroneously used or freed prematurely. dst's sve_state + * will be allocated on demand later on if dst uses SVE. + * For consistency, also clear TIF_SVE here: this could be done + * later in copy_process(), but to avoid tripping up future + * maintainers it is best not to leave TIF_SVE and sve_state in + * an inconsistent state, even temporarily. + */ + dst->thread.sve_state = NULL; + clear_tsk_thread_flag(dst, TIF_SVE); + return 0; } @@ -361,13 +368,6 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context)); /* - * Unalias p->thread.sve_state (if any) from the parent task - * and disable discard SVE state for p: - */ - clear_tsk_thread_flag(p, TIF_SVE); - p->thread.sve_state = NULL; - - /* * In case p was allocated the same task_struct pointer as some * other recently-exited task, make sure p is disassociated from * any cpu that may have run that now-exited task recently. @@ -633,3 +633,19 @@ static int __init tagged_addr_init(void) core_initcall(tagged_addr_init); #endif /* CONFIG_ARM64_TAGGED_ADDR_ABI */ + +asmlinkage void __sched arm64_preempt_schedule_irq(void) +{ + lockdep_assert_irqs_disabled(); + + /* + * Preempting a task from an IRQ means we leave copies of PSTATE + * on the stack. cpufeature's enable calls may modify PSTATE, but + * resuming one of these preempted tasks would undo those changes. + * + * Only allow a task to be preempted once cpufeatures have been + * enabled. + */ + if (static_branch_likely(&arm64_const_caps_ready)) + preempt_schedule_irq(); +} diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index c9f72b2665f1..43ae4e0c968f 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -81,7 +81,8 @@ static void cpu_psci_cpu_die(unsigned int cpu) static int cpu_psci_cpu_kill(unsigned int cpu) { - int err, i; + int err; + unsigned long start, end; if (!psci_ops.affinity_info) return 0; @@ -91,16 +92,18 @@ static int cpu_psci_cpu_kill(unsigned int cpu) * while it is dying. So, try again a few times. */ - for (i = 0; i < 10; i++) { + start = jiffies; + end = start + msecs_to_jiffies(100); + do { err = psci_ops.affinity_info(cpu_logical_map(cpu), 0); if (err == PSCI_0_2_AFFINITY_LEVEL_OFF) { - pr_info("CPU%d killed.\n", cpu); + pr_info("CPU%d killed (polled %d ms)\n", cpu, + jiffies_to_msecs(jiffies - start)); return 0; } - msleep(10); - pr_info("Retrying again to check for CPU kill\n"); - } + usleep_range(100, 1000); + } while (time_before(jiffies, end)); pr_warn("CPU%d may not have shut down cleanly (AFFINITY_INFO reports %d)\n", cpu, err); diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index ea94cf8f9dc6..d6259dac62b6 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -2,6 +2,7 @@ // Copyright (C) 2017 Arm Ltd. #define pr_fmt(fmt) "sdei: " fmt +#include <linux/arm-smccc.h> #include <linux/arm_sdei.h> #include <linux/hardirq.h> #include <linux/irqflags.h> @@ -161,7 +162,7 @@ unsigned long sdei_arch_get_entry_point(int conduit) return 0; } - sdei_exit_mode = (conduit == CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC; + sdei_exit_mode = (conduit == SMCCC_CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC; #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 if (arm64_kernel_unmapped_at_el0()) { diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index dc9fe879c279..ab149bcc3dc7 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -345,8 +345,7 @@ void __cpu_die(unsigned int cpu) */ err = op_cpu_kill(cpu); if (err) - pr_warn("CPU%d may not have shut down cleanly: %d\n", - cpu, err); + pr_warn("CPU%d may not have shut down cleanly: %d\n", cpu, err); } /* @@ -976,8 +975,8 @@ void smp_send_stop(void) udelay(1); if (num_online_cpus() > 1) - pr_warning("SMP: failed to stop secondary CPUs %*pbl\n", - cpumask_pr_args(cpu_online_mask)); + pr_warn("SMP: failed to stop secondary CPUs %*pbl\n", + cpumask_pr_args(cpu_online_mask)); sdei_mask_local_cpu(); } @@ -1017,8 +1016,8 @@ void crash_smp_send_stop(void) udelay(1); if (atomic_read(&waiting_for_crash_ipi) > 0) - pr_warning("SMP: failed to stop secondary CPUs %*pbl\n", - cpumask_pr_args(&mask)); + pr_warn("SMP: failed to stop secondary CPUs %*pbl\n", + cpumask_pr_args(&mask)); sdei_mask_local_cpu(); } diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index f1cb64959427..3c18c2454089 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -8,6 +8,7 @@ */ #include <linux/compat.h> +#include <linux/cpufeature.h> #include <linux/personality.h> #include <linux/sched.h> #include <linux/sched/signal.h> @@ -17,6 +18,7 @@ #include <asm/cacheflush.h> #include <asm/system_misc.h> +#include <asm/tlbflush.h> #include <asm/unistd.h> static long @@ -30,6 +32,15 @@ __do_compat_cache_op(unsigned long start, unsigned long end) if (fatal_signal_pending(current)) return 0; + if (cpus_have_const_cap(ARM64_WORKAROUND_1542419)) { + /* + * The workaround requires an inner-shareable tlbi. + * We pick the reserved-ASID to minimise the impact. + */ + __tlbi(aside1is, __TLBI_VADDR(0, 0)); + dsb(ish); + } + ret = __flush_cache_user_range(start, start + chunk); if (ret) return ret; diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 871c739f060a..9a9d98a443fc 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -154,14 +154,14 @@ static inline void sve_user_discard(void) sve_user_disable(); } -asmlinkage void el0_svc_handler(struct pt_regs *regs) +void el0_svc_handler(struct pt_regs *regs) { sve_user_discard(); el0_svc_common(regs, regs->regs[8], __NR_syscalls, sys_call_table); } #ifdef CONFIG_COMPAT -asmlinkage void el0_svc_compat_handler(struct pt_regs *regs) +void el0_svc_compat_handler(struct pt_regs *regs) { el0_svc_common(regs, regs->regs[7], __NR_compat_syscalls, compat_sys_call_table); diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index 0b2946414dc9..73f06d4b3aae 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -30,6 +30,7 @@ #include <asm/thread_info.h> #include <asm/stacktrace.h> +#include <asm/paravirt.h> unsigned long profile_pc(struct pt_regs *regs) { @@ -65,4 +66,6 @@ void __init time_init(void) /* Calibrate the delay loop directly */ lpj_fine = arch_timer_rate / HZ; + + pv_time_init(); } diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 34739e80211b..73caf35c2262 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -35,6 +35,7 @@ #include <asm/debug-monitors.h> #include <asm/esr.h> #include <asm/insn.h> +#include <asm/kprobes.h> #include <asm/traps.h> #include <asm/smp.h> #include <asm/stack_pointer.h> @@ -393,7 +394,7 @@ void arm64_notify_segfault(unsigned long addr) force_signal_inject(SIGSEGV, code, addr); } -asmlinkage void __exception do_undefinstr(struct pt_regs *regs) +void do_undefinstr(struct pt_regs *regs) { /* check for AArch32 breakpoint instructions */ if (!aarch32_break_handler(regs)) @@ -405,6 +406,7 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) BUG_ON(!user_mode(regs)); force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); } +NOKPROBE_SYMBOL(do_undefinstr); #define __user_cache_maint(insn, address, res) \ if (address >= user_addr_max()) { \ @@ -470,6 +472,15 @@ static void ctr_read_handler(unsigned int esr, struct pt_regs *regs) int rt = ESR_ELx_SYS64_ISS_RT(esr); unsigned long val = arm64_ftr_reg_user_value(&arm64_ftr_reg_ctrel0); + if (cpus_have_const_cap(ARM64_WORKAROUND_1542419)) { + /* Hide DIC so that we can trap the unnecessary maintenance...*/ + val &= ~BIT(CTR_DIC_SHIFT); + + /* ... and fake IminLine to reduce the number of traps. */ + val &= ~CTR_IMINLINE_MASK; + val |= (PAGE_SHIFT - 2) & CTR_IMINLINE_MASK; + } + pt_regs_write_reg(regs, rt, val); arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); @@ -667,7 +678,7 @@ static const struct sys64_hook cp15_64_hooks[] = { {}, }; -asmlinkage void __exception do_cp15instr(unsigned int esr, struct pt_regs *regs) +void do_cp15instr(unsigned int esr, struct pt_regs *regs) { const struct sys64_hook *hook, *hook_base; @@ -705,9 +716,10 @@ asmlinkage void __exception do_cp15instr(unsigned int esr, struct pt_regs *regs) */ do_undefinstr(regs); } +NOKPROBE_SYMBOL(do_cp15instr); #endif -asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs) +void do_sysinstr(unsigned int esr, struct pt_regs *regs) { const struct sys64_hook *hook; @@ -724,6 +736,7 @@ asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs) */ do_undefinstr(regs); } +NOKPROBE_SYMBOL(do_sysinstr); static const char *esr_class_str[] = { [0 ... ESR_ELx_EC_MAX] = "UNRECOGNIZED EC", @@ -793,7 +806,7 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) * bad_el0_sync handles unexpected, but potentially recoverable synchronous * exceptions taken from EL0. Unlike bad_mode, this returns. */ -asmlinkage void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr) +void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr) { void __user *pc = (void __user *)instruction_pointer(regs); diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S deleted file mode 100644 index e69de29bb2d1..000000000000 --- a/arch/arm64/kernel/vdso/gettimeofday.S +++ /dev/null diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 1fba0776ed40..76b327f88fbb 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -8,15 +8,21 @@ ARCH_REL_TYPE_ABS := R_ARM_JUMP_SLOT|R_ARM_GLOB_DAT|R_ARM_ABS32 include $(srctree)/lib/vdso/Makefile -COMPATCC := $(CROSS_COMPILE_COMPAT)gcc +# Same as cc-*option, but using CC_COMPAT instead of CC +ifeq ($(CONFIG_CC_IS_CLANG), y) +CC_COMPAT ?= $(CC) +else +CC_COMPAT ?= $(CROSS_COMPILE_COMPAT)gcc +endif -# Same as cc-*option, but using COMPATCC instead of CC cc32-option = $(call try-run,\ - $(COMPATCC) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2)) + $(CC_COMPAT) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2)) cc32-disable-warning = $(call try-run,\ - $(COMPATCC) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1))) + $(CC_COMPAT) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1))) cc32-ldoption = $(call try-run,\ - $(COMPATCC) $(1) -nostdlib -x c /dev/null -o "$$TMP",$(1),$(2)) + $(CC_COMPAT) $(1) -nostdlib -x c /dev/null -o "$$TMP",$(1),$(2)) +cc32-as-instr = $(call try-run,\ + printf "%b\n" "$(1)" | $(CC_COMPAT) $(VDSO_AFLAGS) -c -x assembler -o "$$TMP" -,$(2),$(3)) # We cannot use the global flags to compile the vDSO files, the main reason # being that the 32-bit compiler may be older than the main (64-bit) compiler @@ -25,22 +31,21 @@ cc32-ldoption = $(call try-run,\ # arm64 one. # As a result we set our own flags here. -# From top-level Makefile -# NOSTDINC_FLAGS -VDSO_CPPFLAGS := -nostdinc -isystem $(shell $(COMPATCC) -print-file-name=include) +# KBUILD_CPPFLAGS and NOSTDINC_FLAGS from top-level Makefile +VDSO_CPPFLAGS := -D__KERNEL__ -nostdinc -isystem $(shell $(CC_COMPAT) -print-file-name=include) VDSO_CPPFLAGS += $(LINUXINCLUDE) -VDSO_CPPFLAGS += $(KBUILD_CPPFLAGS) # Common C and assembly flags # From top-level Makefile VDSO_CAFLAGS := $(VDSO_CPPFLAGS) +ifneq ($(shell $(CC_COMPAT) --version 2>&1 | head -n 1 | grep clang),) +VDSO_CAFLAGS += --target=$(notdir $(CROSS_COMPILE_COMPAT:%-=%)) +endif + VDSO_CAFLAGS += $(call cc32-option,-fno-PIE) ifdef CONFIG_DEBUG_INFO VDSO_CAFLAGS += -g endif -ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(COMPATCC)), y) -VDSO_CAFLAGS += -DCC_HAVE_ASM_GOTO -endif # From arm Makefile VDSO_CAFLAGS += $(call cc32-option,-fno-dwarf2-cfi-asm) @@ -55,6 +60,7 @@ endif VDSO_CAFLAGS += -fPIC -fno-builtin -fno-stack-protector VDSO_CAFLAGS += -DDISABLE_BRANCH_PROFILING + # Try to compile for ARMv8. If the compiler is too old and doesn't support it, # fall back to v7. There is no easy way to check for what architecture the code # is being compiled, so define a macro specifying that (see arch/arm/Makefile). @@ -91,6 +97,12 @@ VDSO_CFLAGS += -Wno-int-to-pointer-cast VDSO_AFLAGS := $(VDSO_CAFLAGS) VDSO_AFLAGS += -D__ASSEMBLY__ +# Check for binutils support for dmb ishld +dmbinstr := $(call cc32-as-instr,dmb ishld,-DCONFIG_AS_DMB_ISHLD=1) + +VDSO_CFLAGS += $(dmbinstr) +VDSO_AFLAGS += $(dmbinstr) + VDSO_LDFLAGS := $(VDSO_CPPFLAGS) # From arm vDSO Makefile VDSO_LDFLAGS += -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1 @@ -159,14 +171,14 @@ quiet_cmd_vdsold_and_vdso_check = LD32 $@ cmd_vdsold_and_vdso_check = $(cmd_vdsold); $(cmd_vdso_check) quiet_cmd_vdsold = LD32 $@ - cmd_vdsold = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_LDFLAGS) \ + cmd_vdsold = $(CC_COMPAT) -Wp,-MD,$(depfile) $(VDSO_LDFLAGS) \ -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@ quiet_cmd_vdsocc = CC32 $@ - cmd_vdsocc = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) -c -o $@ $< + cmd_vdsocc = $(CC_COMPAT) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) -c -o $@ $< quiet_cmd_vdsocc_gettimeofday = CC32 $@ - cmd_vdsocc_gettimeofday = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) $(VDSO_CFLAGS_gettimeofday_o) -c -o $@ $< + cmd_vdsocc_gettimeofday = $(CC_COMPAT) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) $(VDSO_CFLAGS_gettimeofday_o) -c -o $@ $< quiet_cmd_vdsoas = AS32 $@ - cmd_vdsoas = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_AFLAGS) -c -o $@ $< + cmd_vdsoas = $(CC_COMPAT) -Wp,-MD,$(depfile) $(VDSO_AFLAGS) -c -o $@ $< quiet_cmd_vdsomunge = MUNGE $@ cmd_vdsomunge = $(obj)/$(munge) $< $@ diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index aa76f7259668..009057517bdd 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -111,9 +111,6 @@ SECTIONS } .text : { /* Real text segment */ _stext = .; /* Text and read-only data */ - __exception_text_start = .; - *(.exception.text) - __exception_text_end = .; IRQENTRY_TEXT SOFTIRQENTRY_TEXT ENTRY_TEXT diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index a67121d419a2..a475c68cbfec 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -21,6 +21,8 @@ if VIRTUALIZATION config KVM bool "Kernel-based Virtual Machine (KVM) support" depends on OF + # for TASKSTATS/TASK_DELAY_ACCT: + depends on NET && MULTIUSER select MMU_NOTIFIER select PREEMPT_NOTIFIERS select HAVE_KVM_CPU_RELAX_INTERCEPT @@ -39,6 +41,8 @@ config KVM select IRQ_BYPASS_MANAGER select HAVE_KVM_IRQ_BYPASS select HAVE_KVM_VCPU_RUN_PID_CHANGE + select TASKSTATS + select TASK_DELAY_ACCT ---help--- Support hosting virtualized guest machines. We don't support KVM with 16K page tables yet, due to the multiple diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 3ac1a64d2fb9..5ffbdc39e780 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -13,6 +13,8 @@ obj-$(CONFIG_KVM_ARM_HOST) += hyp/ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hypercalls.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/pvtime.o kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o va_layout.o kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index dfd626447482..2fff06114a8f 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -34,6 +34,10 @@ #define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU } struct kvm_stats_debugfs_item debugfs_entries[] = { + VCPU_STAT(halt_successful_poll), + VCPU_STAT(halt_attempted_poll), + VCPU_STAT(halt_poll_invalid), + VCPU_STAT(halt_wakeup), VCPU_STAT(hvc_exit_stat), VCPU_STAT(wfe_exit_stat), VCPU_STAT(wfi_exit_stat), @@ -712,6 +716,12 @@ int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, if (events->exception.serror_pending && events->exception.serror_has_esr) events->exception.serror_esr = vcpu_get_vsesr(vcpu); + /* + * We never return a pending ext_dabt here because we deliver it to + * the virtual CPU directly when setting the event and it's no longer + * 'pending' at this point. + */ + return 0; } @@ -720,6 +730,7 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, { bool serror_pending = events->exception.serror_pending; bool has_esr = events->exception.serror_has_esr; + bool ext_dabt_pending = events->exception.ext_dabt_pending; if (serror_pending && has_esr) { if (!cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) @@ -733,6 +744,9 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, kvm_inject_vabt(vcpu); } + if (ext_dabt_pending) + kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); + return 0; } @@ -858,6 +872,9 @@ int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, case KVM_ARM_VCPU_TIMER_CTRL: ret = kvm_arm_timer_set_attr(vcpu, attr); break; + case KVM_ARM_VCPU_PVTIME_CTRL: + ret = kvm_arm_pvtime_set_attr(vcpu, attr); + break; default: ret = -ENXIO; break; @@ -878,6 +895,9 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, case KVM_ARM_VCPU_TIMER_CTRL: ret = kvm_arm_timer_get_attr(vcpu, attr); break; + case KVM_ARM_VCPU_PVTIME_CTRL: + ret = kvm_arm_pvtime_get_attr(vcpu, attr); + break; default: ret = -ENXIO; break; @@ -898,6 +918,9 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, case KVM_ARM_VCPU_TIMER_CTRL: ret = kvm_arm_timer_has_attr(vcpu, attr); break; + case KVM_ARM_VCPU_PVTIME_CTRL: + ret = kvm_arm_pvtime_has_attr(vcpu, attr); + break; default: ret = -ENXIO; break; diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 706cca23f0d2..aacfc55de44c 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -11,8 +11,6 @@ #include <linux/kvm.h> #include <linux/kvm_host.h> -#include <kvm/arm_psci.h> - #include <asm/esr.h> #include <asm/exception.h> #include <asm/kvm_asm.h> @@ -22,6 +20,8 @@ #include <asm/debug-monitors.h> #include <asm/traps.h> +#include <kvm/arm_hypercalls.h> + #define CREATE_TRACE_POINTS #include "trace.h" diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index bd978ad71936..72fbbd86eb5e 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -12,7 +12,7 @@ #include <kvm/arm_psci.h> -#include <asm/arch_gicv3.h> +#include <asm/barrier.h> #include <asm/cpufeature.h> #include <asm/kprobes.h> #include <asm/kvm_asm.h> @@ -118,12 +118,29 @@ static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu) } write_sysreg(val, cptr_el2); + + if (cpus_have_const_cap(ARM64_WORKAROUND_1319367)) { + struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt; + + isb(); + /* + * At this stage, and thanks to the above isb(), S2 is + * configured and enabled. We can now restore the guest's S1 + * configuration: SCTLR, and only then TCR. + */ + write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); + isb(); + write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); + } } static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) { u64 hcr = vcpu->arch.hcr_el2; + if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM)) + hcr |= HCR_TVM; + write_sysreg(hcr, hcr_el2); if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) @@ -156,6 +173,23 @@ static void __hyp_text __deactivate_traps_nvhe(void) { u64 mdcr_el2 = read_sysreg(mdcr_el2); + if (cpus_have_const_cap(ARM64_WORKAROUND_1319367)) { + u64 val; + + /* + * Set the TCR and SCTLR registers in the exact opposite + * sequence as __activate_traps_nvhe (first prevent walks, + * then force the MMU on). A generous sprinkling of isb() + * ensure that things happen in this exact order. + */ + val = read_sysreg_el1(SYS_TCR); + write_sysreg_el1(val | TCR_EPD1_MASK | TCR_EPD0_MASK, SYS_TCR); + isb(); + val = read_sysreg_el1(SYS_SCTLR); + write_sysreg_el1(val | SCTLR_ELx_M, SYS_SCTLR); + isb(); + } + __deactivate_traps_common(); mdcr_el2 &= MDCR_EL2_HPMN_MASK; @@ -174,8 +208,10 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) * the crucial bit is "On taking a vSError interrupt, * HCR_EL2.VSE is cleared to 0." */ - if (vcpu->arch.hcr_el2 & HCR_VSE) - vcpu->arch.hcr_el2 = read_sysreg(hcr_el2); + if (vcpu->arch.hcr_el2 & HCR_VSE) { + vcpu->arch.hcr_el2 &= ~HCR_VSE; + vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE; + } if (has_vhe()) deactivate_traps_vhe(); @@ -229,20 +265,6 @@ static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu) } } -static bool __hyp_text __true_value(void) -{ - return true; -} - -static bool __hyp_text __false_value(void) -{ - return false; -} - -static hyp_alternate_select(__check_arm_834220, - __false_value, __true_value, - ARM64_WORKAROUND_834220); - static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar) { u64 par, tmp; @@ -298,7 +320,8 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) * resolve the IPA using the AT instruction. */ if (!(esr & ESR_ELx_S1PTW) && - (__check_arm_834220()() || (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { + (cpus_have_const_cap(ARM64_WORKAROUND_834220) || + (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { if (!__translate_far_to_hpfar(far, &hpfar)) return false; } else { @@ -393,6 +416,61 @@ static bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) return true; } +static bool __hyp_text handle_tx2_tvm(struct kvm_vcpu *vcpu) +{ + u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_hsr(vcpu)); + int rt = kvm_vcpu_sys_get_rt(vcpu); + u64 val = vcpu_get_reg(vcpu, rt); + + /* + * The normal sysreg handling code expects to see the traps, + * let's not do anything here. + */ + if (vcpu->arch.hcr_el2 & HCR_TVM) + return false; + + switch (sysreg) { + case SYS_SCTLR_EL1: + write_sysreg_el1(val, SYS_SCTLR); + break; + case SYS_TTBR0_EL1: + write_sysreg_el1(val, SYS_TTBR0); + break; + case SYS_TTBR1_EL1: + write_sysreg_el1(val, SYS_TTBR1); + break; + case SYS_TCR_EL1: + write_sysreg_el1(val, SYS_TCR); + break; + case SYS_ESR_EL1: + write_sysreg_el1(val, SYS_ESR); + break; + case SYS_FAR_EL1: + write_sysreg_el1(val, SYS_FAR); + break; + case SYS_AFSR0_EL1: + write_sysreg_el1(val, SYS_AFSR0); + break; + case SYS_AFSR1_EL1: + write_sysreg_el1(val, SYS_AFSR1); + break; + case SYS_MAIR_EL1: + write_sysreg_el1(val, SYS_MAIR); + break; + case SYS_AMAIR_EL1: + write_sysreg_el1(val, SYS_AMAIR); + break; + case SYS_CONTEXTIDR_EL1: + write_sysreg_el1(val, SYS_CONTEXTIDR); + break; + default: + return false; + } + + __kvm_skip_instr(vcpu); + return true; +} + /* * Return true when we were able to fixup the guest exit and should return to * the guest, false when we should restore the host state and return to the @@ -412,6 +490,11 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) if (*exit_code != ARM_EXCEPTION_TRAP) goto exit; + if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && + kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 && + handle_tx2_tvm(vcpu)) + return true; + /* * We trap the first access to the FP/SIMD to save the host context * and restore the guest context lazily. @@ -605,7 +688,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) */ if (system_uses_irq_prio_masking()) { gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); - dsb(sy); + pmr_sync(); } vcpu = kern_hyp_va(vcpu); @@ -618,18 +701,23 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) __sysreg_save_state_nvhe(host_ctxt); - __activate_vm(kern_hyp_va(vcpu->kvm)); - __activate_traps(vcpu); - - __hyp_vgic_restore_state(vcpu); - __timer_enable_traps(vcpu); - /* * We must restore the 32-bit state before the sysregs, thanks * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). + * + * Also, and in order to be able to deal with erratum #1319537 (A57) + * and #1319367 (A72), we must ensure that all VM-related sysreg are + * restored before we enable S2 translation. */ __sysreg32_restore_state(vcpu); __sysreg_restore_state_nvhe(guest_ctxt); + + __activate_vm(kern_hyp_va(vcpu->kvm)); + __activate_traps(vcpu); + + __hyp_vgic_restore_state(vcpu); + __timer_enable_traps(vcpu); + __debug_switch_to_guest(vcpu); __set_guest_arch_workaround_state(vcpu); diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index 7ddbc849b580..22b8128d19f6 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -117,12 +117,26 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) { write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2); write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1); - write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); + + if (!cpus_have_const_cap(ARM64_WORKAROUND_1319367)) { + write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); + write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); + } else if (!ctxt->__hyp_running_vcpu) { + /* + * Must only be done for guest registers, hence the context + * test. We're coming from the host, so SCTLR.M is already + * set. Pairs with __activate_traps_nvhe(). + */ + write_sysreg_el1((ctxt->sys_regs[TCR_EL1] | + TCR_EPD1_MASK | TCR_EPD0_MASK), + SYS_TCR); + isb(); + } + write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1); write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], SYS_CPACR); write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], SYS_TTBR0); write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], SYS_TTBR1); - write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); write_sysreg_el1(ctxt->sys_regs[ESR_EL1], SYS_ESR); write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1], SYS_AFSR0); write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1], SYS_AFSR1); @@ -135,6 +149,23 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1); write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1); + if (cpus_have_const_cap(ARM64_WORKAROUND_1319367) && + ctxt->__hyp_running_vcpu) { + /* + * Must only be done for host registers, hence the context + * test. Pairs with __deactivate_traps_nvhe(). + */ + isb(); + /* + * At this stage, and thanks to the above isb(), S2 is + * deconfigured and disabled. We can now restore the host's + * S1 configuration: SCTLR, and only then TCR. + */ + write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); + isb(); + write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); + } + write_sysreg(ctxt->gp_regs.sp_el1, sp_el1); write_sysreg_el1(ctxt->gp_regs.elr_el1, SYS_ELR); write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],SYS_SPSR); diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c index c466060b76d6..c2bc17ca6430 100644 --- a/arch/arm64/kvm/hyp/tlb.c +++ b/arch/arm64/kvm/hyp/tlb.c @@ -63,14 +63,34 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm, static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm, struct tlb_inv_context *cxt) { + if (cpus_have_const_cap(ARM64_WORKAROUND_1319367)) { + u64 val; + + /* + * For CPUs that are affected by ARM 1319367, we need to + * avoid a host Stage-1 walk while we have the guest's + * VMID set in the VTTBR in order to invalidate TLBs. + * We're guaranteed that the S1 MMU is enabled, so we can + * simply set the EPD bits to avoid any further TLB fill. + */ + val = cxt->tcr = read_sysreg_el1(SYS_TCR); + val |= TCR_EPD1_MASK | TCR_EPD0_MASK; + write_sysreg_el1(val, SYS_TCR); + isb(); + } + __load_guest_stage2(kvm); isb(); } -static hyp_alternate_select(__tlb_switch_to_guest, - __tlb_switch_to_guest_nvhe, - __tlb_switch_to_guest_vhe, - ARM64_HAS_VIRT_HOST_EXTN); +static void __hyp_text __tlb_switch_to_guest(struct kvm *kvm, + struct tlb_inv_context *cxt) +{ + if (has_vhe()) + __tlb_switch_to_guest_vhe(kvm, cxt); + else + __tlb_switch_to_guest_nvhe(kvm, cxt); +} static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm, struct tlb_inv_context *cxt) @@ -96,12 +116,23 @@ static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm, struct tlb_inv_context *cxt) { write_sysreg(0, vttbr_el2); + + if (cpus_have_const_cap(ARM64_WORKAROUND_1319367)) { + /* Ensure write of the host VMID */ + isb(); + /* Restore the host's TCR_EL1 */ + write_sysreg_el1(cxt->tcr, SYS_TCR); + } } -static hyp_alternate_select(__tlb_switch_to_host, - __tlb_switch_to_host_nvhe, - __tlb_switch_to_host_vhe, - ARM64_HAS_VIRT_HOST_EXTN); +static void __hyp_text __tlb_switch_to_host(struct kvm *kvm, + struct tlb_inv_context *cxt) +{ + if (has_vhe()) + __tlb_switch_to_host_vhe(kvm, cxt); + else + __tlb_switch_to_host_nvhe(kvm, cxt); +} void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { @@ -111,7 +142,7 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) /* Switch to requested VMID */ kvm = kern_hyp_va(kvm); - __tlb_switch_to_guest()(kvm, &cxt); + __tlb_switch_to_guest(kvm, &cxt); /* * We could do so much better if we had the VA as well. @@ -154,7 +185,7 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) if (!has_vhe() && icache_is_vpipt()) __flush_icache_all(); - __tlb_switch_to_host()(kvm, &cxt); + __tlb_switch_to_host(kvm, &cxt); } void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) @@ -165,13 +196,13 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) /* Switch to requested VMID */ kvm = kern_hyp_va(kvm); - __tlb_switch_to_guest()(kvm, &cxt); + __tlb_switch_to_guest(kvm, &cxt); __tlbi(vmalls12e1is); dsb(ish); isb(); - __tlb_switch_to_host()(kvm, &cxt); + __tlb_switch_to_host(kvm, &cxt); } void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) @@ -180,13 +211,13 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) struct tlb_inv_context cxt; /* Switch to requested VMID */ - __tlb_switch_to_guest()(kvm, &cxt); + __tlb_switch_to_guest(kvm, &cxt); __tlbi(vmalle1); dsb(nsh); isb(); - __tlb_switch_to_host()(kvm, &cxt); + __tlb_switch_to_host(kvm, &cxt); } void __hyp_text __kvm_flush_vm_context(void) diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index a9d25a305af5..ccdb6a051ab2 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -109,7 +109,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) /** * kvm_inject_dabt - inject a data abort into the guest - * @vcpu: The VCPU to receive the undefined exception + * @vcpu: The VCPU to receive the data abort * @addr: The address to report in the DFAR * * It is assumed that this code is called from the VCPU thread and that the @@ -125,7 +125,7 @@ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) /** * kvm_inject_pabt - inject a prefetch abort into the guest - * @vcpu: The VCPU to receive the undefined exception + * @vcpu: The VCPU to receive the prefetch abort * @addr: The address to report in the DFAR * * It is assumed that this code is called from the VCPU thread and that the diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 2071260a275b..46822afc57e0 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -632,6 +632,8 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) */ val = ((pmcr & ~ARMV8_PMU_PMCR_MASK) | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E); + if (!system_supports_32bit_el0()) + val |= ARMV8_PMU_PMCR_LC; __vcpu_sys_reg(vcpu, r->reg) = val; } @@ -682,6 +684,8 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, val = __vcpu_sys_reg(vcpu, PMCR_EL0); val &= ~ARMV8_PMU_PMCR_MASK; val |= p->regval & ARMV8_PMU_PMCR_MASK; + if (!system_supports_32bit_el0()) + val |= ARMV8_PMU_PMCR_LC; __vcpu_sys_reg(vcpu, PMCR_EL0) = val; kvm_pmu_handle_pmcr(vcpu, val); kvm_vcpu_pmu_restore_guest(vcpu); diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index 10415572e82f..aeafc03e961a 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -20,7 +20,6 @@ * Alignment fixed up by hardware. */ ENTRY(__arch_clear_user) - uaccess_enable_not_uao x2, x3, x4 mov x2, x1 // save the size for fixup return subs x1, x1, #8 b.mi 2f @@ -40,7 +39,6 @@ uao_user_alternative 9f, strh, sttrh, wzr, x0, 2 b.mi 5f uao_user_alternative 9f, strb, sttrb, wzr, x0, 0 5: mov x0, #0 - uaccess_disable_not_uao x2, x3 ret ENDPROC(__arch_clear_user) EXPORT_SYMBOL(__arch_clear_user) diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 680e74409ff9..ebb3c06cbb5d 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -54,10 +54,8 @@ end .req x5 ENTRY(__arch_copy_from_user) - uaccess_enable_not_uao x3, x4, x5 add end, x0, x2 #include "copy_template.S" - uaccess_disable_not_uao x3, x4 mov x0, #0 // Nothing to copy ret ENDPROC(__arch_copy_from_user) diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index 0bedae3f3792..3d8153a1ebce 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -56,10 +56,8 @@ end .req x5 ENTRY(__arch_copy_in_user) - uaccess_enable_not_uao x3, x4, x5 add end, x0, x2 #include "copy_template.S" - uaccess_disable_not_uao x3, x4 mov x0, #0 ret ENDPROC(__arch_copy_in_user) diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 2d88c736e8f2..357eae2c18eb 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -53,10 +53,8 @@ end .req x5 ENTRY(__arch_copy_to_user) - uaccess_enable_not_uao x3, x4, x5 add end, x0, x2 #include "copy_template.S" - uaccess_disable_not_uao x3, x4 mov x0, #0 ret ENDPROC(__arch_copy_to_user) diff --git a/arch/arm64/lib/uaccess_flushcache.c b/arch/arm64/lib/uaccess_flushcache.c index cbfcbe6470a5..bfa30b75b2b8 100644 --- a/arch/arm64/lib/uaccess_flushcache.c +++ b/arch/arm64/lib/uaccess_flushcache.c @@ -28,7 +28,11 @@ void memcpy_page_flushcache(char *to, struct page *page, size_t offset, unsigned long __copy_user_flushcache(void *to, const void __user *from, unsigned long n) { - unsigned long rc = __arch_copy_from_user(to, from, n); + unsigned long rc; + + uaccess_enable_not_uao(); + rc = __arch_copy_from_user(to, from, n); + uaccess_disable_not_uao(); /* See above */ __clean_dcache_area_pop(to, n - rc); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 115d7a0e4b08..077b02a2d4d3 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -32,7 +32,8 @@ #include <asm/daifflags.h> #include <asm/debug-monitors.h> #include <asm/esr.h> -#include <asm/kasan.h> +#include <asm/kprobes.h> +#include <asm/processor.h> #include <asm/sysreg.h> #include <asm/system_misc.h> #include <asm/pgtable.h> @@ -101,16 +102,13 @@ static void mem_abort_decode(unsigned int esr) data_abort_decode(esr); } -static inline bool is_ttbr0_addr(unsigned long addr) +static inline unsigned long mm_to_pgd_phys(struct mm_struct *mm) { - /* entry assembly clears tags for TTBR0 addrs */ - return addr < TASK_SIZE; -} + /* Either init_pg_dir or swapper_pg_dir */ + if (mm == &init_mm) + return __pa_symbol(mm->pgd); -static inline bool is_ttbr1_addr(unsigned long addr) -{ - /* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */ - return arch_kasan_reset_tag(addr) >= PAGE_OFFSET; + return (unsigned long)virt_to_phys(mm->pgd); } /* @@ -141,7 +139,7 @@ static void show_pte(unsigned long addr) pr_alert("%s pgtable: %luk pages, %llu-bit VAs, pgdp=%016lx\n", mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K, - vabits_actual, (unsigned long)virt_to_phys(mm->pgd)); + vabits_actual, mm_to_pgd_phys(mm)); pgdp = pgd_offset(mm, addr); pgd = READ_ONCE(*pgdp); pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd)); @@ -259,14 +257,18 @@ static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr, par = read_sysreg(par_el1); local_irq_restore(flags); + /* + * If we now have a valid translation, treat the translation fault as + * spurious. + */ if (!(par & SYS_PAR_EL1_F)) - return false; + return true; /* * If we got a different type of fault from the AT instruction, * treat the translation fault as spurious. */ - dfsc = FIELD_PREP(SYS_PAR_EL1_FST, par); + dfsc = FIELD_GET(SYS_PAR_EL1_FST, par); return (dfsc & ESR_ELx_FSC_TYPE) != ESR_ELx_FSC_FAULT; } @@ -305,6 +307,8 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr, if (is_el1_permission_fault(addr, esr, regs)) { if (esr & ESR_ELx_WNR) msg = "write to read-only memory"; + else if (is_el1_instruction_abort(esr)) + msg = "execute from non-executable memory"; else msg = "read from unreadable memory"; } else if (addr < PAGE_SIZE) { @@ -723,8 +727,7 @@ static const struct fault_info fault_info[] = { { do_bad, SIGKILL, SI_KERNEL, "unknown 63" }, }; -asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr, - struct pt_regs *regs) +void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs) { const struct fault_info *inf = esr_to_fault_info(esr); @@ -740,43 +743,21 @@ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr, arm64_notify_die(inf->name, regs, inf->sig, inf->code, (void __user *)addr, esr); } +NOKPROBE_SYMBOL(do_mem_abort); -asmlinkage void __exception do_el0_irq_bp_hardening(void) +void do_el0_irq_bp_hardening(void) { /* PC has already been checked in entry.S */ arm64_apply_bp_hardening(); } +NOKPROBE_SYMBOL(do_el0_irq_bp_hardening); -asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr, - unsigned int esr, - struct pt_regs *regs) -{ - /* - * We've taken an instruction abort from userspace and not yet - * re-enabled IRQs. If the address is a kernel address, apply - * BP hardening prior to enabling IRQs and pre-emption. - */ - if (!is_ttbr0_addr(addr)) - arm64_apply_bp_hardening(); - - local_daif_restore(DAIF_PROCCTX); - do_mem_abort(addr, esr, regs); -} - - -asmlinkage void __exception do_sp_pc_abort(unsigned long addr, - unsigned int esr, - struct pt_regs *regs) +void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs) { - if (user_mode(regs)) { - if (!is_ttbr0_addr(instruction_pointer(regs))) - arm64_apply_bp_hardening(); - local_daif_restore(DAIF_PROCCTX); - } - arm64_notify_die("SP/PC alignment exception", regs, SIGBUS, BUS_ADRALN, (void __user *)addr, esr); } +NOKPROBE_SYMBOL(do_sp_pc_abort); int __init early_brk64(unsigned long addr, unsigned int esr, struct pt_regs *regs); @@ -859,8 +840,7 @@ NOKPROBE_SYMBOL(debug_exception_exit); #ifdef CONFIG_ARM64_ERRATUM_1463225 DECLARE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa); -static int __exception -cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) +static int cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) { if (user_mode(regs)) return 0; @@ -879,16 +859,15 @@ cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) return 1; } #else -static int __exception -cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) +static int cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) { return 0; } #endif /* CONFIG_ARM64_ERRATUM_1463225 */ +NOKPROBE_SYMBOL(cortex_a76_erratum_1463225_debug_handler); -asmlinkage void __exception do_debug_exception(unsigned long addr_if_watchpoint, - unsigned int esr, - struct pt_regs *regs) +void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr, + struct pt_regs *regs) { const struct fault_info *inf = esr_to_debug_fault_info(esr); unsigned long pc = instruction_pointer(regs); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 45c00a54909c..be9481cdf3b9 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -20,6 +20,7 @@ #include <linux/sort.h> #include <linux/of.h> #include <linux/of_fdt.h> +#include <linux/dma-direct.h> #include <linux/dma-mapping.h> #include <linux/dma-contiguous.h> #include <linux/efi.h> @@ -41,6 +42,8 @@ #include <asm/tlb.h> #include <asm/alternative.h> +#define ARM64_ZONE_DMA_BITS 30 + /* * We need to be able to catch inadvertent references to memstart_addr * that occur (potentially in generic code) before arm64_memblock_init() @@ -56,7 +59,14 @@ EXPORT_SYMBOL(physvirt_offset); struct page *vmemmap __ro_after_init; EXPORT_SYMBOL(vmemmap); +/* + * We create both ZONE_DMA and ZONE_DMA32. ZONE_DMA covers the first 1G of + * memory as some devices, namely the Raspberry Pi 4, have peripherals with + * this limited view of the memory. ZONE_DMA32 will cover the rest of the 32 + * bit addressable memory area. + */ phys_addr_t arm64_dma_phys_limit __ro_after_init; +static phys_addr_t arm64_dma32_phys_limit __ro_after_init; #ifdef CONFIG_KEXEC_CORE /* @@ -81,7 +91,7 @@ static void __init reserve_crashkernel(void) if (crash_base == 0) { /* Current arm64 boot protocol requires 2MB alignment */ - crash_base = memblock_find_in_range(0, ARCH_LOW_ADDRESS_LIMIT, + crash_base = memblock_find_in_range(0, arm64_dma32_phys_limit, crash_size, SZ_2M); if (crash_base == 0) { pr_warn("cannot allocate crashkernel (size:0x%llx)\n", @@ -169,15 +179,16 @@ static void __init reserve_elfcorehdr(void) { } #endif /* CONFIG_CRASH_DUMP */ + /* - * Return the maximum physical address for ZONE_DMA32 (DMA_BIT_MASK(32)). It - * currently assumes that for memory starting above 4G, 32-bit devices will - * use a DMA offset. + * Return the maximum physical address for a zone with a given address size + * limit. It currently assumes that for memory starting above 4G, 32-bit + * devices will use a DMA offset. */ -static phys_addr_t __init max_zone_dma_phys(void) +static phys_addr_t __init max_zone_phys(unsigned int zone_bits) { - phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, 32); - return min(offset + (1ULL << 32), memblock_end_of_DRAM()); + phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, zone_bits); + return min(offset + (1ULL << zone_bits), memblock_end_of_DRAM()); } #ifdef CONFIG_NUMA @@ -186,8 +197,11 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) { unsigned long max_zone_pfns[MAX_NR_ZONES] = {0}; +#ifdef CONFIG_ZONE_DMA + max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit); +#endif #ifdef CONFIG_ZONE_DMA32 - max_zone_pfns[ZONE_DMA32] = PFN_DOWN(max_zone_dma_phys()); + max_zone_pfns[ZONE_DMA32] = PFN_DOWN(arm64_dma32_phys_limit); #endif max_zone_pfns[ZONE_NORMAL] = max; @@ -200,16 +214,21 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) { struct memblock_region *reg; unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES]; - unsigned long max_dma = min; + unsigned long max_dma32 = min; + unsigned long __maybe_unused max_dma = min; memset(zone_size, 0, sizeof(zone_size)); - /* 4GB maximum for 32-bit only capable devices */ -#ifdef CONFIG_ZONE_DMA32 +#ifdef CONFIG_ZONE_DMA max_dma = PFN_DOWN(arm64_dma_phys_limit); - zone_size[ZONE_DMA32] = max_dma - min; + zone_size[ZONE_DMA] = max_dma - min; + max_dma32 = max_dma; +#endif +#ifdef CONFIG_ZONE_DMA32 + max_dma32 = PFN_DOWN(arm64_dma32_phys_limit); + zone_size[ZONE_DMA32] = max_dma32 - max_dma; #endif - zone_size[ZONE_NORMAL] = max - max_dma; + zone_size[ZONE_NORMAL] = max - max_dma32; memcpy(zhole_size, zone_size, sizeof(zhole_size)); @@ -219,16 +238,22 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) if (start >= max) continue; - -#ifdef CONFIG_ZONE_DMA32 +#ifdef CONFIG_ZONE_DMA if (start < max_dma) { - unsigned long dma_end = min(end, max_dma); - zhole_size[ZONE_DMA32] -= dma_end - start; + unsigned long dma_end = min_not_zero(end, max_dma); + zhole_size[ZONE_DMA] -= dma_end - start; } #endif - if (end > max_dma) { +#ifdef CONFIG_ZONE_DMA32 + if (start < max_dma32) { + unsigned long dma32_end = min(end, max_dma32); + unsigned long dma32_start = max(start, max_dma); + zhole_size[ZONE_DMA32] -= dma32_end - dma32_start; + } +#endif + if (end > max_dma32) { unsigned long normal_end = min(end, max); - unsigned long normal_start = max(start, max_dma); + unsigned long normal_start = max(start, max_dma32); zhole_size[ZONE_NORMAL] -= normal_end - normal_start; } } @@ -418,11 +443,15 @@ void __init arm64_memblock_init(void) early_init_fdt_scan_reserved_mem(); - /* 4GB maximum for 32-bit only capable devices */ + if (IS_ENABLED(CONFIG_ZONE_DMA)) { + zone_dma_bits = ARM64_ZONE_DMA_BITS; + arm64_dma_phys_limit = max_zone_phys(ARM64_ZONE_DMA_BITS); + } + if (IS_ENABLED(CONFIG_ZONE_DMA32)) - arm64_dma_phys_limit = max_zone_dma_phys(); + arm64_dma32_phys_limit = max_zone_phys(32); else - arm64_dma_phys_limit = PHYS_MASK + 1; + arm64_dma32_phys_limit = PHYS_MASK + 1; reserve_crashkernel(); @@ -430,7 +459,7 @@ void __init arm64_memblock_init(void) high_memory = __va(memblock_end_of_DRAM() - 1) + 1; - dma_contiguous_reserve(arm64_dma_phys_limit); + dma_contiguous_reserve(arm64_dma32_phys_limit); } void __init bootmem_init(void) @@ -534,7 +563,7 @@ static void __init free_unused_memmap(void) void __init mem_init(void) { if (swiotlb_force == SWIOTLB_FORCE || - max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) + max_pfn > PFN_DOWN(arm64_dma_phys_limit ? : arm64_dma32_phys_limit)) swiotlb_init(1); else swiotlb_force = SWIOTLB_NO_FORCE; @@ -571,7 +600,7 @@ void free_initmem(void) { free_reserved_area(lm_alias(__init_begin), lm_alias(__init_end), - 0, "unused kernel"); + POISON_FREE_INITMEM, "unused kernel"); /* * Unmap the __init region but leave the VM area in place. This * prevents the region from being reused for kernel modules, which @@ -580,18 +609,6 @@ void free_initmem(void) unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin)); } -#ifdef CONFIG_BLK_DEV_INITRD -void __init free_initrd_mem(unsigned long start, unsigned long end) -{ - unsigned long aligned_start, aligned_end; - - aligned_start = __virt_to_phys(start) & PAGE_MASK; - aligned_end = PAGE_ALIGN(__virt_to_phys(end)); - memblock_free(aligned_start, aligned_end - aligned_start); - free_reserved_area((void *)start, (void *)end, 0, "initrd"); -} -#endif - /* * Dump out memory limit information on panic. */ diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 60c929f3683b..a9f541912289 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -338,7 +338,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, phys_addr_t (*pgtable_alloc)(int), int flags) { - unsigned long addr, length, end, next; + unsigned long addr, end, next; pgd_t *pgdp = pgd_offset_raw(pgdir, virt); /* @@ -350,9 +350,8 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, phys &= PAGE_MASK; addr = virt & PAGE_MASK; - length = PAGE_ALIGN(size + (virt & ~PAGE_MASK)); + end = PAGE_ALIGN(virt + size); - end = addr + length; do { next = pgd_addr_end(addr, end); alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc, diff --git a/arch/arm64/xen/Makefile b/arch/arm64/xen/Makefile index a4fc65f3928d..b66215e8658e 100644 --- a/arch/arm64/xen/Makefile +++ b/arch/arm64/xen/Makefile @@ -1,4 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only xen-arm-y += $(addprefix ../../arm/xen/, enlighten.o grant-table.o p2m.o mm.o) obj-y := xen-arm.o hypercall.o -obj-$(CONFIG_XEN_EFI) += $(addprefix ../../arm/xen/, efi.o) diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index bb320c6d0cc9..c49fcef754de 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -289,7 +289,7 @@ static void __init setup_crashkernel(unsigned long total, int *n) } if (!check_crashkernel_memory(base, size)) { - pr_warning("crashkernel: There would be kdump memory " + pr_warn("crashkernel: There would be kdump memory " "at %ld GB but this is unusable because it " "must\nbe below 4 GB. Change the memory " "configuration of the machine.\n", diff --git a/arch/m68k/atari/config.c b/arch/m68k/atari/config.c index 73bf5ea9ee1b..7ec3161e8517 100644 --- a/arch/m68k/atari/config.c +++ b/arch/m68k/atari/config.c @@ -869,8 +869,28 @@ static const struct resource atari_scsi_tt_rsrc[] __initconst = { }; #endif +/* + * Falcon IDE interface + */ + +#define FALCON_IDE_BASE 0xfff00000 + +static const struct resource atari_falconide_rsrc[] __initconst = { + { + .flags = IORESOURCE_MEM, + .start = FALCON_IDE_BASE, + .end = FALCON_IDE_BASE + 0x39, + }, + { + .flags = IORESOURCE_IRQ, + .start = IRQ_MFP_FSCSI, + .end = IRQ_MFP_FSCSI, + }, +}; + int __init atari_platform_init(void) { + struct platform_device *pdev; int rv = 0; if (!MACH_IS_ATARI) @@ -912,6 +932,13 @@ int __init atari_platform_init(void) atari_scsi_tt_rsrc, ARRAY_SIZE(atari_scsi_tt_rsrc)); #endif + if (ATARIHW_PRESENT(IDE)) { + pdev = platform_device_register_simple("atari-falcon-ide", -1, + atari_falconide_rsrc, ARRAY_SIZE(atari_falconide_rsrc)); + if (IS_ERR(pdev)) + rv = PTR_ERR(pdev); + } + return rv; } diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index 9a33c1c006a1..619d30d663a2 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -355,6 +355,7 @@ CONFIG_DM_SNAPSHOT=m CONFIG_DM_THIN_PROVISIONING=m CONFIG_DM_WRITECACHE=m CONFIG_DM_ERA=m +CONFIG_DM_CLONE=m CONFIG_DM_MIRROR=m CONFIG_DM_RAID=m CONFIG_DM_ZERO=m @@ -420,11 +421,15 @@ CONFIG_INPUT_M68K_BEEP=m # CONFIG_LEGACY_PTYS is not set CONFIG_PRINTER=m # CONFIG_HW_RANDOM is not set +CONFIG_I2C=m +CONFIG_I2C_CHARDEV=m +CONFIG_I2C_ICY=m CONFIG_NTP_PPS=y CONFIG_PPS_CLIENT_LDISC=m CONFIG_PPS_CLIENT_PARPORT=m CONFIG_PTP_1588_CLOCK=m -# CONFIG_HWMON is not set +CONFIG_HWMON=m +CONFIG_SENSORS_LTC2990=m CONFIG_FB=y CONFIG_FB_CIRRUS=y CONFIG_FB_AMIGA=y @@ -432,8 +437,6 @@ CONFIG_FB_AMIGA_OCS=y CONFIG_FB_AMIGA_ECS=y CONFIG_FB_AMIGA_AGA=y CONFIG_FB_FM2=y -# CONFIG_LCD_CLASS_DEVICE is not set -# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_SOUND=m @@ -490,6 +493,7 @@ CONFIG_QNX4FS_FS=m CONFIG_QNX6FS_FS=m CONFIG_SYSV_FS=m CONFIG_UFS_FS=m +CONFIG_EROFS_FS=m CONFIG_NFS_FS=y CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y @@ -560,10 +564,6 @@ CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_AEGIS128L=m -CONFIG_CRYPTO_AEGIS256=m -CONFIG_CRYPTO_MORUS640=m -CONFIG_CRYPTO_MORUS1280=m CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index 7fdbc797a05d..caa0558abcdb 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -334,6 +334,7 @@ CONFIG_DM_SNAPSHOT=m CONFIG_DM_THIN_PROVISIONING=m CONFIG_DM_WRITECACHE=m CONFIG_DM_ERA=m +CONFIG_DM_CLONE=m CONFIG_DM_MIRROR=m CONFIG_DM_RAID=m CONFIG_DM_ZERO=m @@ -393,8 +394,6 @@ CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set CONFIG_FB=y -# CONFIG_LCD_CLASS_DEVICE is not set -# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y # CONFIG_LOGO_LINUX_VGA16 is not set @@ -450,6 +449,7 @@ CONFIG_QNX4FS_FS=m CONFIG_QNX6FS_FS=m CONFIG_SYSV_FS=m CONFIG_UFS_FS=m +CONFIG_EROFS_FS=m CONFIG_NFS_FS=y CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y @@ -520,10 +520,6 @@ CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_AEGIS128L=m -CONFIG_CRYPTO_AEGIS256=m -CONFIG_CRYPTO_MORUS640=m -CONFIG_CRYPTO_MORUS1280=m CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index f1763405a539..2551c7e9ac54 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -350,6 +350,7 @@ CONFIG_DM_SNAPSHOT=m CONFIG_DM_THIN_PROVISIONING=m CONFIG_DM_WRITECACHE=m CONFIG_DM_ERA=m +CONFIG_DM_CLONE=m CONFIG_DM_MIRROR=m CONFIG_DM_RAID=m CONFIG_DM_ZERO=m @@ -417,8 +418,6 @@ CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set CONFIG_FB=y CONFIG_FB_ATARI=y -# CONFIG_LCD_CLASS_DEVICE is not set -# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_SOUND=m @@ -472,6 +471,7 @@ CONFIG_QNX4FS_FS=m CONFIG_QNX6FS_FS=m CONFIG_SYSV_FS=m CONFIG_UFS_FS=m +CONFIG_EROFS_FS=m CONFIG_NFS_FS=y CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y @@ -542,10 +542,6 @@ CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_AEGIS128L=m -CONFIG_CRYPTO_AEGIS256=m -CONFIG_CRYPTO_MORUS640=m -CONFIG_CRYPTO_MORUS1280=m CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index 91154d6acb31..4ffc1e5646d5 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -332,6 +332,7 @@ CONFIG_DM_SNAPSHOT=m CONFIG_DM_THIN_PROVISIONING=m CONFIG_DM_WRITECACHE=m CONFIG_DM_ERA=m +CONFIG_DM_CLONE=m CONFIG_DM_MIRROR=m CONFIG_DM_RAID=m CONFIG_DM_ZERO=m @@ -390,8 +391,6 @@ CONFIG_NTP_PPS=y CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set -# CONFIG_LCD_CLASS_DEVICE is not set -# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_HID=m CONFIG_HIDRAW=y CONFIG_UHID=m @@ -443,6 +442,7 @@ CONFIG_QNX4FS_FS=m CONFIG_QNX6FS_FS=m CONFIG_SYSV_FS=m CONFIG_UFS_FS=m +CONFIG_EROFS_FS=m CONFIG_NFS_FS=y CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y @@ -513,10 +513,6 @@ CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_AEGIS128L=m -CONFIG_CRYPTO_AEGIS256=m -CONFIG_CRYPTO_MORUS640=m -CONFIG_CRYPTO_MORUS1280=m CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index c398c4a94d95..806da3d97ca4 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -333,6 +333,7 @@ CONFIG_DM_SNAPSHOT=m CONFIG_DM_THIN_PROVISIONING=m CONFIG_DM_WRITECACHE=m CONFIG_DM_ERA=m +CONFIG_DM_CLONE=m CONFIG_DM_MIRROR=m CONFIG_DM_RAID=m CONFIG_DM_ZERO=m @@ -395,8 +396,6 @@ CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set CONFIG_FB=y -# CONFIG_LCD_CLASS_DEVICE is not set -# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y # CONFIG_LOGO_LINUX_MONO is not set @@ -452,6 +451,7 @@ CONFIG_QNX4FS_FS=m CONFIG_QNX6FS_FS=m CONFIG_SYSV_FS=m CONFIG_UFS_FS=m +CONFIG_EROFS_FS=m CONFIG_NFS_FS=y CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y @@ -522,10 +522,6 @@ CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_AEGIS128L=m -CONFIG_CRYPTO_AEGIS256=m -CONFIG_CRYPTO_MORUS640=m -CONFIG_CRYPTO_MORUS1280=m CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 350d004559be..250da20e291c 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -342,6 +342,7 @@ CONFIG_DM_SNAPSHOT=m CONFIG_DM_THIN_PROVISIONING=m CONFIG_DM_WRITECACHE=m CONFIG_DM_ERA=m +CONFIG_DM_CLONE=m CONFIG_DM_MIRROR=m CONFIG_DM_RAID=m CONFIG_DM_ZERO=m @@ -419,8 +420,6 @@ CONFIG_PTP_1588_CLOCK=m CONFIG_FB=y CONFIG_FB_VALKYRIE=y CONFIG_FB_MAC=y -# CONFIG_LCD_CLASS_DEVICE is not set -# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_HID=m @@ -474,6 +473,7 @@ CONFIG_QNX4FS_FS=m CONFIG_QNX6FS_FS=m CONFIG_SYSV_FS=m CONFIG_UFS_FS=m +CONFIG_EROFS_FS=m CONFIG_NFS_FS=y CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y @@ -544,10 +544,6 @@ CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_AEGIS128L=m -CONFIG_CRYPTO_AEGIS256=m -CONFIG_CRYPTO_MORUS640=m -CONFIG_CRYPTO_MORUS1280=m CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index b838dd820348..b764a0368a56 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -386,6 +386,7 @@ CONFIG_DM_SNAPSHOT=m CONFIG_DM_THIN_PROVISIONING=m CONFIG_DM_WRITECACHE=m CONFIG_DM_ERA=m +CONFIG_DM_CLONE=m CONFIG_DM_MIRROR=m CONFIG_DM_RAID=m CONFIG_DM_ZERO=m @@ -480,11 +481,15 @@ CONFIG_SERIAL_PMACZILOG_TTYS=y CONFIG_SERIAL_PMACZILOG_CONSOLE=y CONFIG_PRINTER=m # CONFIG_HW_RANDOM is not set +CONFIG_I2C=m +CONFIG_I2C_CHARDEV=m +CONFIG_I2C_ICY=m CONFIG_NTP_PPS=y CONFIG_PPS_CLIENT_LDISC=m CONFIG_PPS_CLIENT_PARPORT=m CONFIG_PTP_1588_CLOCK=m -# CONFIG_HWMON is not set +CONFIG_HWMON=m +CONFIG_SENSORS_LTC2990=m CONFIG_FB=y CONFIG_FB_CIRRUS=y CONFIG_FB_AMIGA=y @@ -495,8 +500,6 @@ CONFIG_FB_FM2=y CONFIG_FB_ATARI=y CONFIG_FB_VALKYRIE=y CONFIG_FB_MAC=y -# CONFIG_LCD_CLASS_DEVICE is not set -# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_SOUND=m @@ -556,6 +559,7 @@ CONFIG_QNX4FS_FS=m CONFIG_QNX6FS_FS=m CONFIG_SYSV_FS=m CONFIG_UFS_FS=m +CONFIG_EROFS_FS=m CONFIG_NFS_FS=y CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y @@ -626,10 +630,6 @@ CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_AEGIS128L=m -CONFIG_CRYPTO_AEGIS256=m -CONFIG_CRYPTO_MORUS640=m -CONFIG_CRYPTO_MORUS1280=m CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 3f8dd61559cf..7800d3a8d46e 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -331,6 +331,7 @@ CONFIG_DM_SNAPSHOT=m CONFIG_DM_THIN_PROVISIONING=m CONFIG_DM_WRITECACHE=m CONFIG_DM_ERA=m +CONFIG_DM_CLONE=m CONFIG_DM_MIRROR=m CONFIG_DM_RAID=m CONFIG_DM_ZERO=m @@ -389,8 +390,6 @@ CONFIG_NTP_PPS=y CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set -# CONFIG_LCD_CLASS_DEVICE is not set -# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_HID=m CONFIG_HIDRAW=y CONFIG_UHID=m @@ -442,6 +441,7 @@ CONFIG_QNX4FS_FS=m CONFIG_QNX6FS_FS=m CONFIG_SYSV_FS=m CONFIG_UFS_FS=m +CONFIG_EROFS_FS=m CONFIG_NFS_FS=y CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y @@ -512,10 +512,6 @@ CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_AEGIS128L=m -CONFIG_CRYPTO_AEGIS256=m -CONFIG_CRYPTO_MORUS640=m -CONFIG_CRYPTO_MORUS1280=m CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index ae3b2d4f636c..c32dc2d2058d 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -332,6 +332,7 @@ CONFIG_DM_SNAPSHOT=m CONFIG_DM_THIN_PROVISIONING=m CONFIG_DM_WRITECACHE=m CONFIG_DM_ERA=m +CONFIG_DM_CLONE=m CONFIG_DM_MIRROR=m CONFIG_DM_RAID=m CONFIG_DM_ZERO=m @@ -390,8 +391,6 @@ CONFIG_NTP_PPS=y CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set -# CONFIG_LCD_CLASS_DEVICE is not set -# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_HID=m CONFIG_HIDRAW=y CONFIG_UHID=m @@ -443,6 +442,7 @@ CONFIG_QNX4FS_FS=m CONFIG_QNX6FS_FS=m CONFIG_SYSV_FS=m CONFIG_UFS_FS=m +CONFIG_EROFS_FS=m CONFIG_NFS_FS=y CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y @@ -513,10 +513,6 @@ CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_AEGIS128L=m -CONFIG_CRYPTO_AEGIS256=m -CONFIG_CRYPTO_MORUS640=m -CONFIG_CRYPTO_MORUS1280=m CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index cd61ef14b582..bf0a65ce57e0 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -339,6 +339,7 @@ CONFIG_DM_SNAPSHOT=m CONFIG_DM_THIN_PROVISIONING=m CONFIG_DM_WRITECACHE=m CONFIG_DM_ERA=m +CONFIG_DM_CLONE=m CONFIG_DM_MIRROR=m CONFIG_DM_RAID=m CONFIG_DM_ZERO=m @@ -404,8 +405,6 @@ CONFIG_PPS_CLIENT_PARPORT=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set CONFIG_FB=y -# CONFIG_LCD_CLASS_DEVICE is not set -# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_SOUND=m @@ -461,6 +460,7 @@ CONFIG_QNX4FS_FS=m CONFIG_QNX6FS_FS=m CONFIG_SYSV_FS=m CONFIG_UFS_FS=m +CONFIG_EROFS_FS=m CONFIG_NFS_FS=y CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y @@ -531,10 +531,6 @@ CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_AEGIS128L=m -CONFIG_CRYPTO_AEGIS256=m -CONFIG_CRYPTO_MORUS640=m -CONFIG_CRYPTO_MORUS1280=m CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 151f5371cd3d..5f3cfa2926d2 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -329,6 +329,7 @@ CONFIG_DM_SNAPSHOT=m CONFIG_DM_THIN_PROVISIONING=m CONFIG_DM_WRITECACHE=m CONFIG_DM_ERA=m +CONFIG_DM_CLONE=m CONFIG_DM_MIRROR=m CONFIG_DM_RAID=m CONFIG_DM_ZERO=m @@ -390,8 +391,6 @@ CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set CONFIG_FB=y -# CONFIG_LCD_CLASS_DEVICE is not set -# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_HID=m @@ -445,6 +444,7 @@ CONFIG_QNX4FS_FS=m CONFIG_QNX6FS_FS=m CONFIG_SYSV_FS=m CONFIG_UFS_FS=m +CONFIG_EROFS_FS=m CONFIG_NFS_FS=y CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y @@ -515,10 +515,6 @@ CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_AEGIS128L=m -CONFIG_CRYPTO_AEGIS256=m -CONFIG_CRYPTO_MORUS640=m -CONFIG_CRYPTO_MORUS1280=m CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index 1dcb0ee1fe98..58354d2018d5 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -329,6 +329,7 @@ CONFIG_DM_SNAPSHOT=m CONFIG_DM_THIN_PROVISIONING=m CONFIG_DM_WRITECACHE=m CONFIG_DM_ERA=m +CONFIG_DM_CLONE=m CONFIG_DM_MIRROR=m CONFIG_DM_RAID=m CONFIG_DM_ZERO=m @@ -389,8 +390,6 @@ CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set CONFIG_FB=y -# CONFIG_LCD_CLASS_DEVICE is not set -# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_HID=m @@ -444,6 +443,7 @@ CONFIG_QNX4FS_FS=m CONFIG_QNX6FS_FS=m CONFIG_SYSV_FS=m CONFIG_UFS_FS=m +CONFIG_EROFS_FS=m CONFIG_NFS_FS=y CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y @@ -514,10 +514,6 @@ CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_AEGIS128L=m -CONFIG_CRYPTO_AEGIS256=m -CONFIG_CRYPTO_MORUS640=m -CONFIG_CRYPTO_MORUS1280=m CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m diff --git a/arch/m68k/q40/config.c b/arch/m68k/q40/config.c index e63eb5f06999..f31890078197 100644 --- a/arch/m68k/q40/config.c +++ b/arch/m68k/q40/config.c @@ -264,6 +264,7 @@ static int q40_get_rtc_pll(struct rtc_pll_info *pll) { int tmp = Q40_RTC_CTRL; + pll->pll_ctrl = 0; pll->pll_value = tmp & Q40_RTC_PLL_MASK; if (tmp & Q40_RTC_PLL_SIGN) pll->pll_value = -pll->pll_value; diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms index 0de839882106..a69b272a3ab0 100644 --- a/arch/mips/Kbuild.platforms +++ b/arch/mips/Kbuild.platforms @@ -17,6 +17,7 @@ platforms += jazz platforms += jz4740 platforms += lantiq platforms += lasat +platforms += loongson2ef platforms += loongson32 platforms += loongson64 platforms += mti-malta @@ -30,6 +31,7 @@ platforms += ralink platforms += rb532 platforms += sgi-ip22 platforms += sgi-ip27 +platforms += sgi-ip30 platforms += sgi-ip32 platforms += sibyte platforms += sni diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index a0bd9bdb5f83..c86be02b6d89 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -7,6 +7,7 @@ config MIPS select ARCH_CLOCKSOURCE_DATA select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UBSAN_SANITIZE_ALL + select ARCH_HAS_FORTIFY_SOURCE select ARCH_SUPPORTS_UPROBES select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF if 64BIT @@ -86,6 +87,8 @@ config MIPS select SYSCTL_EXCEPTION_TRACE select VIRT_TO_BUS select ARCH_HAS_PTE_SPECIAL if !(32BIT && CPU_HAS_RIXI) + select ARCH_HAS_KCOV + select HAVE_GCC_PLUGINS menu "Machine selection" @@ -359,6 +362,8 @@ config MACH_DECSTATION config MACH_JAZZ bool "Jazz family of machines" + select ARC_MEMORY + select ARC_PROMLIB select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select FW_ARC @@ -441,7 +446,7 @@ config LASAT select SYS_SUPPORTS_LITTLE_ENDIAN config MACH_LOONGSON32 - bool "Loongson-1 family of machines" + bool "Loongson 32-bit family of machines" select SYS_SUPPORTS_ZBOOT help This enables support for the Loongson-1 family of machines. @@ -450,18 +455,48 @@ config MACH_LOONGSON32 the Institute of Computing Technology (ICT), Chinese Academy of Sciences (CAS). +config MACH_LOONGSON2EF + bool "Loongson-2E/F family of machines" + select SYS_SUPPORTS_ZBOOT + help + This enables the support of early Loongson-2E/F family of machines. + config MACH_LOONGSON64 - bool "Loongson-2/3 family of machines" + bool "Loongson 64-bit family of machines" + select ARCH_SPARSEMEM_ENABLE + select ARCH_MIGHT_HAVE_PC_PARPORT + select ARCH_MIGHT_HAVE_PC_SERIO + select GENERIC_ISA_DMA_SUPPORT_BROKEN + select BOOT_ELF32 + select BOARD_SCACHE + select CSRC_R4K + select CEVT_R4K + select CPU_HAS_WB + select FORCE_PCI + select ISA + select I8259 + select IRQ_MIPS_CPU + select NR_CPUS_DEFAULT_4 + select USE_GENERIC_EARLY_PRINTK_8250 + select SYS_HAS_CPU_LOONGSON64 + select SYS_HAS_EARLY_PRINTK + select SYS_SUPPORTS_SMP + select SYS_SUPPORTS_HOTPLUG_CPU + select SYS_SUPPORTS_NUMA + select SYS_SUPPORTS_64BIT_KERNEL + select SYS_SUPPORTS_HIGHMEM + select SYS_SUPPORTS_LITTLE_ENDIAN select SYS_SUPPORTS_ZBOOT + select LOONGSON_MC146818 + select ZONE_DMA32 + select NUMA help This enables the support of Loongson-2/3 family of machines. - Loongson-2 is a family of single-core CPUs and Loongson-3 is a - family of multi-core CPUs. They are both 64-bit general-purpose - MIPS-compatible CPUs. Loongson-2/3 are developed by the Institute - of Computing Technology (ICT), Chinese Academy of Sciences (CAS) - in the People's Republic of China. The chief architect is Professor - Weiwu Hu. + Loongson-2 and Loongson-3 are 64-bit general-purpose processors with + GS264/GS464/GS464E/GS464V microarchitecture (except old Loongson-2E + and Loongson-2F which will be removed), developed by the Institute + of Computing Technology (ICT), Chinese Academy of Sciences (CAS). config MACH_PISTACHIO bool "IMG Pistachio SoC based boards" @@ -631,6 +666,8 @@ config RALINK config SGI_IP22 bool "SGI IP22 (Indy/Indigo2)" + select ARC_MEMORY + select ARC_PROMLIB select FW_ARC select FW_ARC32 select ARCH_MIGHT_HAVE_PC_SERIO @@ -654,14 +691,7 @@ config SGI_IP22 select SWAP_IO_SPACE select SYS_HAS_CPU_R4X00 select SYS_HAS_CPU_R5000 - # - # Disable EARLY_PRINTK for now since it leads to overwritten prom - # memory during early boot on some machines. - # - # See http://www.linux-mips.org/cgi-bin/mesg.cgi?a=linux-mips&i=20091119164009.GA15038%40deprecation.cyrius.com - # for a more details discussion - # - # select SYS_HAS_EARLY_PRINTK + select SYS_HAS_EARLY_PRINTK select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_64BIT_KERNEL select SYS_SUPPORTS_BIG_ENDIAN @@ -674,8 +704,10 @@ config SGI_IP22 config SGI_IP27 bool "SGI IP27 (Origin200/2000)" select ARCH_HAS_PHYS_TO_DMA + select ARCH_SPARSEMEM_ENABLE select FW_ARC select FW_ARC64 + select ARC_CMDLINE_ONLY select BOOT_ELF64 select DEFAULT_SGI_PARTITION select SYS_HAS_EARLY_PRINTK @@ -698,6 +730,8 @@ config SGI_IP27 config SGI_IP28 bool "SGI IP28 (Indigo2 R10k)" + select ARC_MEMORY + select ARC_PROMLIB select FW_ARC select FW_ARC64 select ARCH_MIGHT_HAVE_PC_SERIO @@ -719,14 +753,7 @@ config SGI_IP28 select SGI_HAS_ZILOG select SWAP_IO_SPACE select SYS_HAS_CPU_R10000 - # - # Disable EARLY_PRINTK for now since it leads to overwritten prom - # memory during early boot on some machines. - # - # See http://www.linux-mips.org/cgi-bin/mesg.cgi?a=linux-mips&i=20091119164009.GA15038%40deprecation.cyrius.com - # for a more details discussion - # - # select SYS_HAS_EARLY_PRINTK + select SYS_HAS_EARLY_PRINTK select SYS_SUPPORTS_64BIT_KERNEL select SYS_SUPPORTS_BIG_ENDIAN select MIPS_L1_CACHE_SHIFT_7 @@ -734,8 +761,37 @@ config SGI_IP28 This is the SGI Indigo2 with R10000 processor. To compile a Linux kernel that runs on these, say Y here. +config SGI_IP30 + bool "SGI IP30 (Octane/Octane2)" + select ARCH_HAS_PHYS_TO_DMA + select FW_ARC + select FW_ARC64 + select BOOT_ELF64 + select CEVT_R4K + select CSRC_R4K + select SYNC_R4K if SMP + select ZONE_DMA32 + select HAVE_PCI + select IRQ_MIPS_CPU + select IRQ_DOMAIN_HIERARCHY + select NR_CPUS_DEFAULT_2 + select PCI_DRIVERS_GENERIC + select PCI_XTALK_BRIDGE + select SYS_HAS_EARLY_PRINTK + select SYS_HAS_CPU_R10000 + select SYS_SUPPORTS_64BIT_KERNEL + select SYS_SUPPORTS_BIG_ENDIAN + select SYS_SUPPORTS_SMP + select MIPS_L1_CACHE_SHIFT_7 + select ARC_MEMORY + help + These are the SGI Octane and Octane2 graphics workstations. To + compile a Linux kernel that runs on these, say Y here. + config SGI_IP32 bool "SGI IP32 (O2)" + select ARC_MEMORY + select ARC_PROMLIB select ARCH_HAS_PHYS_TO_DMA select FW_ARC select FW_ARC32 @@ -843,6 +899,8 @@ config SIBYTE_BIGSUR config SNI_RM bool "SNI RM200/300/400" + select ARC_MEMORY + select ARC_PROMLIB select FW_ARC if CPU_LITTLE_ENDIAN select FW_ARC32 if CPU_LITTLE_ENDIAN select FW_SNIPROM if CPU_BIG_ENDIAN @@ -1038,6 +1096,7 @@ source "arch/mips/sibyte/Kconfig" source "arch/mips/txx9/Kconfig" source "arch/mips/vr41xx/Kconfig" source "arch/mips/cavium-octeon/Kconfig" +source "arch/mips/loongson2ef/Kconfig" source "arch/mips/loongson32/Kconfig" source "arch/mips/loongson64/Kconfig" source "arch/mips/netlogic/Kconfig" @@ -1353,19 +1412,18 @@ config MIPS_L1_CACHE_SHIFT config HAVE_STD_PC_SERIAL_PORT bool +config ARC_CMDLINE_ONLY + bool + config ARC_CONSOLE bool "ARC console support" depends on SGI_IP22 || SGI_IP28 || (SNI_RM && CPU_LITTLE_ENDIAN) config ARC_MEMORY bool - depends on MACH_JAZZ || SNI_RM || SGI_IP32 - default y config ARC_PROMLIB bool - depends on MACH_JAZZ || SNI_RM || SGI_IP22 || SGI_IP28 || SGI_IP32 - default y config FW_ARC64 bool @@ -1379,51 +1437,56 @@ choice prompt "CPU type" default CPU_R4X00 -config CPU_LOONGSON3 - bool "Loongson 3 CPU" - depends on SYS_HAS_CPU_LOONGSON3 +config CPU_LOONGSON64 + bool "Loongson 64-bit CPU" + depends on SYS_HAS_CPU_LOONGSON64 select ARCH_HAS_PHYS_TO_DMA select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HIGHMEM select CPU_SUPPORTS_HUGEPAGES + select CPU_SUPPORTS_MSA select CPU_HAS_LOAD_STORE_LR select WEAK_ORDERING select WEAK_REORDERING_BEYOND_LLSC + select MIPS_ASID_BITS_VARIABLE select MIPS_PGD_C0_CONTEXT select MIPS_L1_CACHE_SHIFT_6 select GPIOLIB select SWIOTLB help - The Loongson 3 processor implements the MIPS64R2 instruction - set with many extensions. + The Loongson GSx64(GS264/GS464/GS464E/GS464V) series of processor + cores implements the MIPS64R2 instruction set with many extensions, + including most 64-bit Loongson-2 (2H, 2K) and Loongson-3 (3A1000, + 3B1000, 3B1500, 3A2000, 3A3000 and 3A4000) processors. However, old + Loongson-2E/2F is not covered here and will be removed in future. config LOONGSON3_ENHANCEMENT - bool "New Loongson 3 CPU Enhancements" + bool "New Loongson-3 CPU Enhancements" default n select CPU_MIPSR2 select CPU_HAS_PREFETCH - depends on CPU_LOONGSON3 + depends on CPU_LOONGSON64 help - New Loongson 3 CPU (since Loongson-3A R2, as opposed to Loongson-3A + New Loongson-3 cores (since Loongson-3A R2, as opposed to Loongson-3A R1, Loongson-3B R1 and Loongson-3B R2) has many enhancements, such as - FTLB, L1-VCache, EI/DI/Wait/Prefetch instruction, DSP/DSPv2 ASE, User + FTLB, L1-VCache, EI/DI/Wait/Prefetch instruction, DSP/DSPr2 ASE, User Local register, Read-Inhibit/Execute-Inhibit, SFB (Store Fill Buffer), Fast TLB refill support, etc. This option enable those enhancements which are not probed at run time. If you want a generic kernel to run on all Loongson 3 machines, please say 'N' here. If you want a high-performance kernel to run on - new Loongson 3 machines only, please say 'Y' here. + new Loongson-3 machines only, please say 'Y' here. config CPU_LOONGSON3_WORKAROUNDS - bool "Old Loongson 3 LLSC Workarounds" + bool "Old Loongson-3 LLSC Workarounds" default y if SMP - depends on CPU_LOONGSON3 + depends on CPU_LOONGSON64 help - Loongson 3 processors have the llsc issues which require workarounds. + Loongson-3 processors have the llsc issues which require workarounds. Without workarounds the system may hang unexpectedly. - Newer Loongson 3 will fix these issues and no workarounds are needed. + Newer Loongson-3 will fix these issues and no workarounds are needed. The workarounds have no significant side effect on them but may decrease the performance of the system so this option should be disabled unless the kernel is intended to be run on old systems. @@ -1433,7 +1496,7 @@ config CPU_LOONGSON3_WORKAROUNDS config CPU_LOONGSON2E bool "Loongson 2E" depends on SYS_HAS_CPU_LOONGSON2E - select CPU_LOONGSON2 + select CPU_LOONGSON2EF help The Loongson 2E processor implements the MIPS III instruction set with many extensions. @@ -1444,7 +1507,7 @@ config CPU_LOONGSON2E config CPU_LOONGSON2F bool "Loongson 2F" depends on SYS_HAS_CPU_LOONGSON2F - select CPU_LOONGSON2 + select CPU_LOONGSON2EF select GPIOLIB help The Loongson 2F processor implements the MIPS III instruction set @@ -1457,7 +1520,7 @@ config CPU_LOONGSON2F config CPU_LOONGSON1B bool "Loongson 1B" depends on SYS_HAS_CPU_LOONGSON1B - select CPU_LOONGSON1 + select CPU_LOONGSON32 select LEDS_GPIO_REGISTER help The Loongson 1B is a 32-bit SoC, which implements the MIPS32 @@ -1467,7 +1530,7 @@ config CPU_LOONGSON1B config CPU_LOONGSON1C bool "Loongson 1C" depends on SYS_HAS_CPU_LOONGSON1C - select CPU_LOONGSON1 + select CPU_LOONGSON32 select LEDS_GPIO_REGISTER help The Loongson 1C is a 32-bit SoC, which implements the MIPS32 @@ -1857,7 +1920,7 @@ config SYS_SUPPORTS_ZBOOT_UART_PROM bool select SYS_SUPPORTS_ZBOOT -config CPU_LOONGSON2 +config CPU_LOONGSON2EF bool select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL @@ -1866,7 +1929,7 @@ config CPU_LOONGSON2 select ARCH_HAS_PHYS_TO_DMA select CPU_HAS_LOAD_STORE_LR -config CPU_LOONGSON1 +config CPU_LOONGSON32 bool select CPU_MIPS32 select CPU_MIPSR2 @@ -1900,7 +1963,7 @@ config CPU_BMIPS5000 select SYS_SUPPORTS_HOTPLUG_CPU select CPU_HAS_RIXI -config SYS_HAS_CPU_LOONGSON3 +config SYS_HAS_CPU_LOONGSON64 bool select CPU_SUPPORTS_CPUFREQ select CPU_HAS_RIXI @@ -1912,7 +1975,6 @@ config SYS_HAS_CPU_LOONGSON2F bool select CPU_SUPPORTS_CPUFREQ select CPU_SUPPORTS_ADDRWINCFG if 64BIT - select CPU_SUPPORTS_UNCACHED_ACCELERATED config SYS_HAS_CPU_LOONGSON1B bool @@ -2089,8 +2151,6 @@ config CPU_SUPPORTS_ADDRWINCFG config CPU_SUPPORTS_HUGEPAGES bool depends on !(32BIT && (ARCH_PHYS_ADDR_T_64BIT || EVA)) -config CPU_SUPPORTS_UNCACHED_ACCELERATED - bool config MIPS_PGD_C0_CONTEXT bool default y if 64BIT && (CPU_MIPSR2 || CPU_MIPSR6) && !CPU_XLP @@ -2162,7 +2222,7 @@ choice config PAGE_SIZE_4KB bool "4kB" - depends on !CPU_LOONGSON2 && !CPU_LOONGSON3 + depends on !CPU_LOONGSON2EF && !CPU_LOONGSON64 help This option select the standard 4kB Linux page size. On some R3000-family processors this is the only available page size. Using @@ -2554,6 +2614,10 @@ config CPU_R4000_WORKAROUNDS config CPU_R4400_WORKAROUNDS bool +config CPU_R4X00_BUGS64 + bool + default y if SYS_HAS_CPU_R4X00 && 64BIT && (TARGET_ISA_REV < 1) + config MIPS_ASID_SHIFT int default 6 if CPU_R3000 || CPU_TX39XX @@ -2612,20 +2676,11 @@ config CPU_SUPPORTS_MSA config ARCH_FLATMEM_ENABLE def_bool y - depends on !NUMA && !CPU_LOONGSON2 - -config ARCH_DISCONTIGMEM_ENABLE - bool - default y if SGI_IP27 - help - Say Y to support efficient handling of discontiguous physical memory, - for architectures which are either NUMA (Non-Uniform Memory Access) - or have huge holes in the physical address space for other reasons. - See <file:Documentation/vm/numa.rst> for more. + depends on !NUMA && !CPU_LOONGSON2EF config ARCH_SPARSEMEM_ENABLE bool - select SPARSEMEM_STATIC + select SPARSEMEM_STATIC if !SGI_IP27 config NUMA bool "NUMA Support" @@ -2702,7 +2757,7 @@ config NODES_SHIFT config HW_PERF_EVENTS bool "Enable hardware performance counter support for perf events" - depends on PERF_EVENTS && !OPROFILE && (CPU_MIPS32 || CPU_MIPS64 || CPU_R10000 || CPU_SB1 || CPU_CAVIUM_OCTEON || CPU_XLP || CPU_LOONGSON3) + depends on PERF_EVENTS && !OPROFILE && (CPU_MIPS32 || CPU_MIPS64 || CPU_R10000 || CPU_SB1 || CPU_CAVIUM_OCTEON || CPU_XLP || CPU_LOONGSON64) default y help Enable hardware performance counter support for perf events. If diff --git a/arch/mips/Kconfig.debug b/arch/mips/Kconfig.debug index 0c86b2a2adfc..93a2974d2ab7 100644 --- a/arch/mips/Kconfig.debug +++ b/arch/mips/Kconfig.debug @@ -32,7 +32,6 @@ config USE_GENERIC_EARLY_PRINTK_8250 config CMDLINE_BOOL bool "Built-in kernel command line" - default n help For most systems, it is firmware or second stage bootloader that by default specifies the kernel command line options. However, @@ -53,7 +52,6 @@ config CMDLINE_BOOL config CMDLINE string "Default kernel command string" depends on CMDLINE_BOOL - default "" help On some platforms, there is currently no way for the boot loader to pass arguments to the kernel. For these platforms, and for the cases @@ -68,7 +66,6 @@ config CMDLINE config CMDLINE_OVERRIDE bool "Built-in command line overrides firmware arguments" - default n depends on CMDLINE_BOOL help By setting this option to 'Y' you will have your kernel ignore diff --git a/arch/mips/Makefile b/arch/mips/Makefile index cdc09b71febe..e1c44aed8156 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -14,6 +14,9 @@ archscripts: scripts_basic $(Q)$(MAKE) $(build)=arch/mips/tools elf-entry +ifeq ($(CONFIG_CPU_LOONGSON3_WORKAROUNDS),y) + $(Q)$(MAKE) $(build)=arch/mips/tools loongson3-llsc-check +endif $(Q)$(MAKE) $(build)=arch/mips/boot/tools relocs KBUILD_DEFCONFIG := 32r2el_defconfig @@ -323,7 +326,7 @@ libs-$(CONFIG_MIPS_FP_SUPPORT) += arch/mips/math-emu/ # See arch/mips/Kbuild for content of core part of the kernel core-y += arch/mips/ -drivers-$(CONFIG_MIPS_CRC_SUPPORT) += arch/mips/crypto/ +drivers-y += arch/mips/crypto/ drivers-$(CONFIG_OPROFILE) += arch/mips/oprofile/ # suspend and hibernation support diff --git a/arch/mips/Makefile.postlink b/arch/mips/Makefile.postlink index 4eea4188cb20..f03fdc95143e 100644 --- a/arch/mips/Makefile.postlink +++ b/arch/mips/Makefile.postlink @@ -3,7 +3,8 @@ # Post-link MIPS pass # =========================================================================== # -# 1. Insert relocations into vmlinux +# 1. Check that Loongson3 LL/SC workarounds are applied correctly +# 2. Insert relocations into vmlinux PHONY := __archpost __archpost: @@ -11,6 +12,10 @@ __archpost: -include include/config/auto.conf include scripts/Kbuild.include +CMD_LS3_LLSC = arch/mips/tools/loongson3-llsc-check +quiet_cmd_ls3_llsc = LLSCCHK $@ + cmd_ls3_llsc = $(CMD_LS3_LLSC) $@ + CMD_RELOCS = arch/mips/boot/tools/relocs quiet_cmd_relocs = RELOCS $@ cmd_relocs = $(CMD_RELOCS) $@ @@ -19,6 +24,9 @@ quiet_cmd_relocs = RELOCS $@ vmlinux: FORCE @true +ifeq ($(CONFIG_CPU_LOONGSON3_WORKAROUNDS),y) + $(call if_changed,ls3_llsc) +endif ifeq ($(CONFIG_RELOCATABLE),y) $(call if_changed,relocs) endif diff --git a/arch/mips/bcm63xx/prom.c b/arch/mips/bcm63xx/prom.c index 77a836e661c9..df69eaa453a1 100644 --- a/arch/mips/bcm63xx/prom.c +++ b/arch/mips/bcm63xx/prom.c @@ -84,7 +84,7 @@ void __init prom_init(void) * Here we will start up CPU1 in the background and ask it to * reconfigure itself then go back to sleep. */ - memcpy((void *)0xa0000200, &bmips_smp_movevec, 0x20); + memcpy((void *)0xa0000200, bmips_smp_movevec, 0x20); __sync(); set_c0_cause(C_SW0); cpumask_set_cpu(1, &bmips_booted_mask); diff --git a/arch/mips/boot/dts/ingenic/ci20.dts b/arch/mips/boot/dts/ingenic/ci20.dts index 2e9952311ecd..37b93166bf22 100644 --- a/arch/mips/boot/dts/ingenic/ci20.dts +++ b/arch/mips/boot/dts/ingenic/ci20.dts @@ -25,12 +25,47 @@ 0x30000000 0x30000000>; }; + leds { + compatible = "gpio-leds"; + + led0 { + label = "ci20:red:led0"; + gpios = <&gpc 3 GPIO_ACTIVE_HIGH>; + linux,default-trigger = "none"; + }; + + led1 { + label = "ci20:red:led1"; + gpios = <&gpc 2 GPIO_ACTIVE_HIGH>; + linux,default-trigger = "nand-disk"; + }; + + led2 { + label = "ci20:red:led2"; + gpios = <&gpc 1 GPIO_ACTIVE_HIGH>; + linux,default-trigger = "cpu1"; + }; + + led3 { + label = "ci20:red:led3"; + gpios = <&gpc 0 GPIO_ACTIVE_HIGH>; + linux,default-trigger = "cpu0"; + }; + }; + eth0_power: fixedregulator@0 { compatible = "regulator-fixed"; regulator-name = "eth0_power"; gpio = <&gpb 25 GPIO_ACTIVE_LOW>; enable-active-high; }; + + wlan0_power: fixedregulator@1 { + compatible = "regulator-fixed"; + regulator-name = "wlan0_power"; + gpio = <&gpb 19 GPIO_ACTIVE_LOW>; + enable-active-high; + }; }; &ext { @@ -54,9 +89,18 @@ bus-width = <4>; max-frequency = <50000000>; + non-removable; pinctrl-names = "default"; pinctrl-0 = <&pins_mmc1>; + + brcmf: wifi@1 { +/* reg = <4>;*/ + compatible = "brcm,bcm4330-fmac"; + vcc-supply = <&wlan0_power>; + device-wakeup-gpios = <&gpd 9 GPIO_ACTIVE_HIGH>; + shutdown-gpios = <&gpf 7 GPIO_ACTIVE_LOW>; + }; }; &uart0 { @@ -73,6 +117,23 @@ pinctrl-0 = <&pins_uart1>; }; +&uart2 { + status = "okay"; + + pinctrl-names = "default"; + pinctrl-0 = <&pins_uart2>; + uart-has-rtscts; + + bluetooth { + compatible = "brcm,bcm4330-bt"; + reset-gpios = <&gpf 8 GPIO_ACTIVE_HIGH>; + vcc-supply = <&wlan0_power>; + device-wakeup-gpios = <&gpf 5 GPIO_ACTIVE_HIGH>; + host-wakeup-gpios = <&gpf 6 GPIO_ACTIVE_HIGH>; + shutdown-gpios = <&gpf 4 GPIO_ACTIVE_LOW>; + }; +}; + &uart3 { status = "okay"; @@ -87,6 +148,123 @@ pinctrl-0 = <&pins_uart4>; }; +&i2c0 { + status = "okay"; + + pinctrl-names = "default"; + pinctrl-0 = <&pins_i2c0>; + + clock-frequency = <400000>; + + act8600: act8600@5a { + compatible = "active-semi,act8600"; + reg = <0x5a>; + status = "okay"; + + regulators { + vddcore: SUDCDC1 { + regulator-name = "VDDCORE"; + regulator-min-microvolt = <1100000>; + regulator-max-microvolt = <1100000>; + regulator-always-on; + }; + vddmem: SUDCDC2 { + regulator-name = "VDDMEM"; + regulator-min-microvolt = <1500000>; + regulator-max-microvolt = <1500000>; + regulator-always-on; + }; + vcc_33: SUDCDC3 { + regulator-name = "VCC33"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-always-on; + }; + vcc_50: SUDCDC4 { + regulator-name = "VCC50"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + regulator-always-on; + }; + vcc_25: LDO_REG5 { + regulator-name = "VCC25"; + regulator-min-microvolt = <2500000>; + regulator-max-microvolt = <2500000>; + regulator-always-on; + }; + wifi_io: LDO_REG6 { + regulator-name = "WIFIIO"; + regulator-min-microvolt = <2500000>; + regulator-max-microvolt = <2500000>; + regulator-always-on; + }; + vcc_28: LDO_REG7 { + regulator-name = "VCC28"; + regulator-min-microvolt = <2800000>; + regulator-max-microvolt = <2800000>; + regulator-always-on; + }; + vcc_15: LDO_REG8 { + regulator-name = "VCC15"; + regulator-min-microvolt = <1500000>; + regulator-max-microvolt = <1500000>; + regulator-always-on; + }; + vcc_18: LDO_REG9 { + regulator-name = "VCC18"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + regulator-always-on; + }; + vcc_11: LDO_REG10 { + regulator-name = "VCC11"; + regulator-min-microvolt = <1100000>; + regulator-max-microvolt = <1100000>; + regulator-always-on; + }; + }; + }; +}; + +&i2c1 { + status = "okay"; + + pinctrl-names = "default"; + pinctrl-0 = <&pins_i2c1>; + +}; + +&i2c2 { + status = "okay"; + + pinctrl-names = "default"; + pinctrl-0 = <&pins_i2c2>; + +}; + +&i2c3 { + status = "okay"; + + pinctrl-names = "default"; + pinctrl-0 = <&pins_i2c3>; + +}; + +&i2c4 { + status = "okay"; + + pinctrl-names = "default"; + pinctrl-0 = <&pins_i2c4>; + + clock-frequency = <400000>; + + rtc@51 { + compatible = "nxp,pcf8563"; + reg = <0x51>; + interrupts = <110>; + }; +}; + &nemc { status = "okay"; @@ -197,6 +375,12 @@ bias-disable; }; + pins_uart2: uart2 { + function = "uart2"; + groups = "uart2-data", "uart2-hwflow"; + bias-disable; + }; + pins_uart3: uart3 { function = "uart3"; groups = "uart3-data", "uart3-hwflow"; @@ -209,6 +393,36 @@ bias-disable; }; + pins_i2c0: i2c0 { + function = "i2c0"; + groups = "i2c0-data"; + bias-disable; + }; + + pins_i2c1: i2c1 { + function = "i2c1"; + groups = "i2c1-data"; + bias-disable; + }; + + pins_i2c2: i2c2 { + function = "i2c2"; + groups = "i2c2-data"; + bias-disable; + }; + + pins_i2c3: i2c3 { + function = "i2c3"; + groups = "i2c3-data"; + bias-disable; + }; + + pins_i2c4: i2c4 { + function = "i2c4"; + groups = "i2c4-data-e"; + bias-disable; + }; + pins_nemc: nemc { function = "nemc"; groups = "nemc-data", "nemc-cle-ale", "nemc-rd-we", "nemc-frd-fwe"; diff --git a/arch/mips/boot/dts/ingenic/jz4780.dtsi b/arch/mips/boot/dts/ingenic/jz4780.dtsi index c54bd7cfec55..f928329b034b 100644 --- a/arch/mips/boot/dts/ingenic/jz4780.dtsi +++ b/arch/mips/boot/dts/ingenic/jz4780.dtsi @@ -262,6 +262,92 @@ status = "disabled"; }; + i2c0: i2c@10050000 { + compatible = "ingenic,jz4780-i2c"; + #address-cells = <1>; + #size-cells = <0>; + + reg = <0x10050000 0x1000>; + + interrupt-parent = <&intc>; + interrupts = <60>; + + clocks = <&cgu JZ4780_CLK_SMB0>; + clock-frequency = <100000>; + pinctrl-names = "default"; + pinctrl-0 = <&pins_i2c0_data>; + + status = "disabled"; + }; + + i2c1: i2c@10051000 { + compatible = "ingenic,jz4780-i2c"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0x10051000 0x1000>; + + interrupt-parent = <&intc>; + interrupts = <59>; + + clocks = <&cgu JZ4780_CLK_SMB1>; + clock-frequency = <100000>; + pinctrl-names = "default"; + pinctrl-0 = <&pins_i2c1_data>; + + status = "disabled"; + }; + + i2c2: i2c@10052000 { + compatible = "ingenic,jz4780-i2c"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0x10052000 0x1000>; + + interrupt-parent = <&intc>; + interrupts = <58>; + + clocks = <&cgu JZ4780_CLK_SMB2>; + clock-frequency = <100000>; + pinctrl-names = "default"; + pinctrl-0 = <&pins_i2c2_data>; + + status = "disabled"; + }; + + i2c3: i2c@10053000 { + compatible = "ingenic,jz4780-i2c"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0x10053000 0x1000>; + + interrupt-parent = <&intc>; + interrupts = <57>; + + clocks = <&cgu JZ4780_CLK_SMB3>; + clock-frequency = <100000>; + pinctrl-names = "default"; + pinctrl-0 = <&pins_i2c3_data>; + + status = "disabled"; + }; + + i2c4: i2c@10054000 { + compatible = "ingenic,jz4780-i2c"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0x10054000 0x1000>; + + interrupt-parent = <&intc>; + interrupts = <56>; + + clocks = <&cgu JZ4780_CLK_SMB4>; + clock-frequency = <100000>; + pinctrl-names = "default"; + pinctrl-0 = <&pins_i2c4_data>; + + status = "disabled"; + }; + watchdog: watchdog@10002000 { compatible = "ingenic,jz4780-watchdog"; reg = <0x10002000 0x10>; diff --git a/arch/mips/boot/dts/qca/ar9331.dtsi b/arch/mips/boot/dts/qca/ar9331.dtsi index 63a9f33aa43e..5cfc9d347826 100644 --- a/arch/mips/boot/dts/qca/ar9331.dtsi +++ b/arch/mips/boot/dts/qca/ar9331.dtsi @@ -99,7 +99,7 @@ miscintc: interrupt-controller@18060010 { compatible = "qca,ar7240-misc-intc"; - reg = <0x18060010 0x4>; + reg = <0x18060010 0x8>; interrupt-parent = <&cpuintc>; interrupts = <6>; diff --git a/arch/mips/boot/dts/ralink/gardena_smart_gateway_mt7688.dts b/arch/mips/boot/dts/ralink/gardena_smart_gateway_mt7688.dts new file mode 100644 index 000000000000..aa5caaa31104 --- /dev/null +++ b/arch/mips/boot/dts/ralink/gardena_smart_gateway_mt7688.dts @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2019 Stefan Roese <sr@denx.de> + */ + +/dts-v1/; + +/include/ "mt7628a.dtsi" + +#include <dt-bindings/gpio/gpio.h> +#include <dt-bindings/input/input.h> + +/ { + compatible = "gardena,smart-gateway-mt7688", "ralink,mt7688a-soc", + "ralink,mt7628a-soc"; + model = "GARDENA smart Gateway (MT7688)"; + + memory@0 { + device_type = "memory"; + reg = <0x0 0x8000000>; + }; + + gpio-keys { + compatible = "gpio-keys"; + + pinctrl-names = "default"; + pinctrl-0 = <&pinmux_gpio_gpio>; /* GPIO11 */ + + user_btn1 { + label = "USER_BTN1"; + gpios = <&gpio 11 GPIO_ACTIVE_LOW>; + linux,code =<KEY_PROG1> ; + }; + }; + + leds { + compatible = "gpio-leds"; + + pinctrl-names = "default"; + pinctrl-0 = <&pinmux_pwm0_gpio>, /* GPIO18 */ + <&pinmux_pwm1_gpio>, /* GPIO19 */ + <&pinmux_sdmode_gpio>, /* GPIO22..29 */ + <&pinmux_p0led_an_gpio>; /* GPIO43 */ + /* + * <&pinmux_i2s_gpio> (covers GPIO0..3) is needed here as + * well for GPIO3. But this is already claimed for uart1 + * (see below). So we can't include it in this LED node. + */ + + power_blue { + label = "smartgw:power:blue"; + gpios = <&gpio 18 GPIO_ACTIVE_HIGH>; + default-state = "off"; + }; + + power_green { + label = "smartgw:power:green"; + gpios = <&gpio 19 GPIO_ACTIVE_HIGH>; + default-state = "off"; + }; + + power_red { + label = "smartgw:power:red"; + gpios = <&gpio 22 GPIO_ACTIVE_HIGH>; + default-state = "off"; + }; + + radio_blue { + label = "smartgw:radio:blue"; + gpios = <&gpio 23 GPIO_ACTIVE_HIGH>; + default-state = "off"; + }; + + radio_green { + label = "smartgw:radio:green"; + gpios = <&gpio 24 GPIO_ACTIVE_HIGH>; + default-state = "off"; + }; + + radio_red { + label = "smartgw:radio:red"; + gpios = <&gpio 25 GPIO_ACTIVE_HIGH>; + default-state = "off"; + }; + + internet_blue { + label = "smartgw:internet:blue"; + gpios = <&gpio 26 GPIO_ACTIVE_HIGH>; + default-state = "off"; + }; + + internet_green { + label = "smartgw:internet:green"; + gpios = <&gpio 27 GPIO_ACTIVE_HIGH>; + default-state = "off"; + }; + + internet_red { + label = "smartgw:internet:red"; + gpios = <&gpio 28 GPIO_ACTIVE_HIGH>; + default-state = "off"; + }; + + ethernet_link { + label = "smartgw:eth:link"; + gpios = <&gpio 3 GPIO_ACTIVE_LOW>; + linux,default-trigger = "netdev"; + }; + + ethernet_activity { + label = "smartgw:eth:act"; + gpios = <&gpio 43 GPIO_ACTIVE_LOW>; + linux,default-trigger = "netdev"; + }; + }; + + aliases { + serial0 = &uart0; + }; +}; + +&i2c { + status = "okay"; +}; + +&spi { + status = "okay"; + + pinctrl-names = "default"; + pinctrl-0 = <&pinmux_spi_spi>, <&pinmux_spi_cs1_cs>; + + m25p80@0 { + compatible = "jedec,spi-nor"; + reg = <0>; + spi-max-frequency = <40000000>; + + partitions { + compatible = "fixed-partitions"; + #address-cells = <1>; + #size-cells = <1>; + + partition@0 { + label = "uboot"; + reg = <0x0 0xa0000>; + read-only; + }; + + partition@a0000 { + label = "uboot_env0"; + reg = <0xa0000 0x10000>; + }; + + partition@b0000 { + label = "uboot_env1"; + reg = <0xb0000 0x10000>; + }; + + factory: partition@c0000 { + label = "factory"; + reg = <0xc0000 0x10000>; + read-only; + }; + }; + }; + + nand_flash@1 { + compatible = "spi-nand"; + linux,mtd-name = "gd5f"; + reg = <1>; + spi-max-frequency = <40000000>; + }; +}; + +&uart1 { + status = "okay"; + + pinctrl-names = "default"; + pinctrl-0 = <&pinmux_i2s_gpio>; /* GPIO0..3 */ + + rts-gpios = <&gpio 1 GPIO_ACTIVE_LOW>; + cts-gpios = <&gpio 2 GPIO_ACTIVE_LOW>; +}; + +&uart2 { + status = "okay"; + + pinctrl-names = "default"; + pinctrl-0 = <&pinmux_p2led_an_gpio>, /* GPIO41 */ + <&pinmux_p3led_an_gpio>; /* GPIO40 */ + + rts-gpios = <&gpio 40 GPIO_ACTIVE_LOW>; + cts-gpios = <&gpio 41 GPIO_ACTIVE_LOW>; +}; + +&watchdog { + status = "okay"; +}; diff --git a/arch/mips/boot/dts/ralink/mt7628a.dtsi b/arch/mips/boot/dts/ralink/mt7628a.dtsi index 61f8621e88b3..742bcc1dc2e0 100644 --- a/arch/mips/boot/dts/ralink/mt7628a.dtsi +++ b/arch/mips/boot/dts/ralink/mt7628a.dtsi @@ -199,6 +199,22 @@ status = "disabled"; }; + i2c: i2c@900 { + compatible = "mediatek,mt7621-i2c"; + reg = <0x900 0x100>; + + pinctrl-names = "default"; + pinctrl-0 = <&pinmux_i2c_i2c>; + + resets = <&resetc 16>; + reset-names = "i2c"; + + #address-cells = <1>; + #size-cells = <0>; + + status = "disabled"; + }; + uart0: uartlite@c00 { compatible = "ns16550a"; reg = <0xc00 0x100>; diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c index 95034bf5ca83..1f742c32a883 100644 --- a/arch/mips/cavium-octeon/setup.c +++ b/arch/mips/cavium-octeon/setup.c @@ -844,7 +844,7 @@ void __init prom_init(void) * BIST should always be enabled when doing a soft reset. L2 * Cache locking for instance is not cleared unless BIST is * enabled. Unfortunately due to a chip errata G-200 for - * Cn38XX and CN31XX, BIST msut be disabled on these parts. + * Cn38XX and CN31XX, BIST must be disabled on these parts. */ if (OCTEON_IS_MODEL(OCTEON_CN38XX_PASS2) || OCTEON_IS_MODEL(OCTEON_CN31XX)) diff --git a/arch/mips/configs/fuloong2e_defconfig b/arch/mips/configs/fuloong2e_defconfig index 7a7af706e898..1788ae23bff9 100644 --- a/arch/mips/configs/fuloong2e_defconfig +++ b/arch/mips/configs/fuloong2e_defconfig @@ -15,7 +15,7 @@ CONFIG_EXPERT=y # CONFIG_COMPAT_BRK is not set CONFIG_SLAB=y CONFIG_PROFILING=y -CONFIG_MACH_LOONGSON64=y +CONFIG_MACH_LOONGSON2EF=y CONFIG_PCI=y CONFIG_MIPS32_O32=y CONFIG_MIPS32_N32=y diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig index d44f1469cf64..f9f93427c9bd 100644 --- a/arch/mips/configs/lemote2f_defconfig +++ b/arch/mips/configs/lemote2f_defconfig @@ -12,7 +12,7 @@ CONFIG_LOG_BUF_SHIFT=15 CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y CONFIG_PROFILING=y -CONFIG_MACH_LOONGSON64=y +CONFIG_MACH_LOONGSON2EF=y CONFIG_LEMOTE_MACH2F=y CONFIG_KEXEC=y # CONFIG_SECCOMP is not set diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig index 90ee0084d786..c16a2330e84d 100644 --- a/arch/mips/configs/loongson3_defconfig +++ b/arch/mips/configs/loongson3_defconfig @@ -12,7 +12,6 @@ CONFIG_TASKSTATS=y CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y -CONFIG_LOG_BUF_SHIFT=14 CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y CONFIG_BLK_CGROUP=y @@ -24,7 +23,6 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_SYSCTL_SYSCALL=y CONFIG_EMBEDDED=y CONFIG_MACH_LOONGSON64=y -CONFIG_LOONGSON_MACH3X=y CONFIG_SMP=y CONFIG_HZ_256=y CONFIG_KEXEC=y diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig index 16bef819fe98..914af125a7fa 100644 --- a/arch/mips/configs/mtx1_defconfig +++ b/arch/mips/configs/mtx1_defconfig @@ -571,7 +571,6 @@ CONFIG_USB_SERIAL_OMNINET=m CONFIG_USB_EMI62=m CONFIG_USB_EMI26=m CONFIG_USB_ADUTUX=m -CONFIG_USB_RIO500=m CONFIG_USB_LEGOTOWER=m CONFIG_USB_LCD=m CONFIG_USB_CYPRESS_CY7C63=m diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig index 8762e75f5d5f..2c7adea7638f 100644 --- a/arch/mips/configs/rm200_defconfig +++ b/arch/mips/configs/rm200_defconfig @@ -314,7 +314,6 @@ CONFIG_USB_SERIAL_SAFE_PADDED=y CONFIG_USB_SERIAL_CYBERJACK=m CONFIG_USB_SERIAL_XIRCOM=m CONFIG_USB_SERIAL_OMNINET=m -CONFIG_USB_RIO500=m CONFIG_USB_LEGOTOWER=m CONFIG_USB_LCD=m CONFIG_USB_CYTHERM=m diff --git a/arch/mips/crypto/Makefile b/arch/mips/crypto/Makefile index e07aca572c2e..8e1deaf00e0c 100644 --- a/arch/mips/crypto/Makefile +++ b/arch/mips/crypto/Makefile @@ -4,3 +4,21 @@ # obj-$(CONFIG_CRYPTO_CRC32_MIPS) += crc32-mips.o + +obj-$(CONFIG_CRYPTO_CHACHA_MIPS) += chacha-mips.o +chacha-mips-y := chacha-core.o chacha-glue.o +AFLAGS_chacha-core.o += -O2 # needed to fill branch delay slots + +obj-$(CONFIG_CRYPTO_POLY1305_MIPS) += poly1305-mips.o +poly1305-mips-y := poly1305-core.o poly1305-glue.o + +perlasm-flavour-$(CONFIG_CPU_MIPS32) := o32 +perlasm-flavour-$(CONFIG_CPU_MIPS64) := 64 + +quiet_cmd_perlasm = PERLASM $@ + cmd_perlasm = $(PERL) $(<) $(perlasm-flavour-y) $(@) + +$(obj)/poly1305-core.S: $(src)/poly1305-mips.pl FORCE + $(call if_changed,perlasm) + +targets += poly1305-core.S diff --git a/arch/mips/crypto/chacha-core.S b/arch/mips/crypto/chacha-core.S new file mode 100644 index 000000000000..5755f69cfe00 --- /dev/null +++ b/arch/mips/crypto/chacha-core.S @@ -0,0 +1,497 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright (C) 2016-2018 René van Dorst <opensource@vdorst.com>. All Rights Reserved. + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#define MASK_U32 0x3c +#define CHACHA20_BLOCK_SIZE 64 +#define STACK_SIZE 32 + +#define X0 $t0 +#define X1 $t1 +#define X2 $t2 +#define X3 $t3 +#define X4 $t4 +#define X5 $t5 +#define X6 $t6 +#define X7 $t7 +#define X8 $t8 +#define X9 $t9 +#define X10 $v1 +#define X11 $s6 +#define X12 $s5 +#define X13 $s4 +#define X14 $s3 +#define X15 $s2 +/* Use regs which are overwritten on exit for Tx so we don't leak clear data. */ +#define T0 $s1 +#define T1 $s0 +#define T(n) T ## n +#define X(n) X ## n + +/* Input arguments */ +#define STATE $a0 +#define OUT $a1 +#define IN $a2 +#define BYTES $a3 + +/* Output argument */ +/* NONCE[0] is kept in a register and not in memory. + * We don't want to touch original value in memory. + * Must be incremented every loop iteration. + */ +#define NONCE_0 $v0 + +/* SAVED_X and SAVED_CA are set in the jump table. + * Use regs which are overwritten on exit else we don't leak clear data. + * They are used to handling the last bytes which are not multiple of 4. + */ +#define SAVED_X X15 +#define SAVED_CA $s7 + +#define IS_UNALIGNED $s7 + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define MSB 0 +#define LSB 3 +#define ROTx rotl +#define ROTR(n) rotr n, 24 +#define CPU_TO_LE32(n) \ + wsbh n; \ + rotr n, 16; +#else +#define MSB 3 +#define LSB 0 +#define ROTx rotr +#define CPU_TO_LE32(n) +#define ROTR(n) +#endif + +#define FOR_EACH_WORD(x) \ + x( 0); \ + x( 1); \ + x( 2); \ + x( 3); \ + x( 4); \ + x( 5); \ + x( 6); \ + x( 7); \ + x( 8); \ + x( 9); \ + x(10); \ + x(11); \ + x(12); \ + x(13); \ + x(14); \ + x(15); + +#define FOR_EACH_WORD_REV(x) \ + x(15); \ + x(14); \ + x(13); \ + x(12); \ + x(11); \ + x(10); \ + x( 9); \ + x( 8); \ + x( 7); \ + x( 6); \ + x( 5); \ + x( 4); \ + x( 3); \ + x( 2); \ + x( 1); \ + x( 0); + +#define PLUS_ONE_0 1 +#define PLUS_ONE_1 2 +#define PLUS_ONE_2 3 +#define PLUS_ONE_3 4 +#define PLUS_ONE_4 5 +#define PLUS_ONE_5 6 +#define PLUS_ONE_6 7 +#define PLUS_ONE_7 8 +#define PLUS_ONE_8 9 +#define PLUS_ONE_9 10 +#define PLUS_ONE_10 11 +#define PLUS_ONE_11 12 +#define PLUS_ONE_12 13 +#define PLUS_ONE_13 14 +#define PLUS_ONE_14 15 +#define PLUS_ONE_15 16 +#define PLUS_ONE(x) PLUS_ONE_ ## x +#define _CONCAT3(a,b,c) a ## b ## c +#define CONCAT3(a,b,c) _CONCAT3(a,b,c) + +#define STORE_UNALIGNED(x) \ +CONCAT3(.Lchacha_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \ + .if (x != 12); \ + lw T0, (x*4)(STATE); \ + .endif; \ + lwl T1, (x*4)+MSB ## (IN); \ + lwr T1, (x*4)+LSB ## (IN); \ + .if (x == 12); \ + addu X ## x, NONCE_0; \ + .else; \ + addu X ## x, T0; \ + .endif; \ + CPU_TO_LE32(X ## x); \ + xor X ## x, T1; \ + swl X ## x, (x*4)+MSB ## (OUT); \ + swr X ## x, (x*4)+LSB ## (OUT); + +#define STORE_ALIGNED(x) \ +CONCAT3(.Lchacha_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \ + .if (x != 12); \ + lw T0, (x*4)(STATE); \ + .endif; \ + lw T1, (x*4) ## (IN); \ + .if (x == 12); \ + addu X ## x, NONCE_0; \ + .else; \ + addu X ## x, T0; \ + .endif; \ + CPU_TO_LE32(X ## x); \ + xor X ## x, T1; \ + sw X ## x, (x*4) ## (OUT); + +/* Jump table macro. + * Used for setup and handling the last bytes, which are not multiple of 4. + * X15 is free to store Xn + * Every jumptable entry must be equal in size. + */ +#define JMPTBL_ALIGNED(x) \ +.Lchacha_mips_jmptbl_aligned_ ## x: ; \ + .set noreorder; \ + b .Lchacha_mips_xor_aligned_ ## x ## _b; \ + .if (x == 12); \ + addu SAVED_X, X ## x, NONCE_0; \ + .else; \ + addu SAVED_X, X ## x, SAVED_CA; \ + .endif; \ + .set reorder + +#define JMPTBL_UNALIGNED(x) \ +.Lchacha_mips_jmptbl_unaligned_ ## x: ; \ + .set noreorder; \ + b .Lchacha_mips_xor_unaligned_ ## x ## _b; \ + .if (x == 12); \ + addu SAVED_X, X ## x, NONCE_0; \ + .else; \ + addu SAVED_X, X ## x, SAVED_CA; \ + .endif; \ + .set reorder + +#define AXR(A, B, C, D, K, L, M, N, V, W, Y, Z, S) \ + addu X(A), X(K); \ + addu X(B), X(L); \ + addu X(C), X(M); \ + addu X(D), X(N); \ + xor X(V), X(A); \ + xor X(W), X(B); \ + xor X(Y), X(C); \ + xor X(Z), X(D); \ + rotl X(V), S; \ + rotl X(W), S; \ + rotl X(Y), S; \ + rotl X(Z), S; + +.text +.set reorder +.set noat +.globl chacha_crypt_arch +.ent chacha_crypt_arch +chacha_crypt_arch: + .frame $sp, STACK_SIZE, $ra + + /* Load number of rounds */ + lw $at, 16($sp) + + addiu $sp, -STACK_SIZE + + /* Return bytes = 0. */ + beqz BYTES, .Lchacha_mips_end + + lw NONCE_0, 48(STATE) + + /* Save s0-s7 */ + sw $s0, 0($sp) + sw $s1, 4($sp) + sw $s2, 8($sp) + sw $s3, 12($sp) + sw $s4, 16($sp) + sw $s5, 20($sp) + sw $s6, 24($sp) + sw $s7, 28($sp) + + /* Test IN or OUT is unaligned. + * IS_UNALIGNED = ( IN | OUT ) & 0x00000003 + */ + or IS_UNALIGNED, IN, OUT + andi IS_UNALIGNED, 0x3 + + b .Lchacha_rounds_start + +.align 4 +.Loop_chacha_rounds: + addiu IN, CHACHA20_BLOCK_SIZE + addiu OUT, CHACHA20_BLOCK_SIZE + addiu NONCE_0, 1 + +.Lchacha_rounds_start: + lw X0, 0(STATE) + lw X1, 4(STATE) + lw X2, 8(STATE) + lw X3, 12(STATE) + + lw X4, 16(STATE) + lw X5, 20(STATE) + lw X6, 24(STATE) + lw X7, 28(STATE) + lw X8, 32(STATE) + lw X9, 36(STATE) + lw X10, 40(STATE) + lw X11, 44(STATE) + + move X12, NONCE_0 + lw X13, 52(STATE) + lw X14, 56(STATE) + lw X15, 60(STATE) + +.Loop_chacha_xor_rounds: + addiu $at, -2 + AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16); + AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12); + AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8); + AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7); + AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16); + AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12); + AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8); + AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7); + bnez $at, .Loop_chacha_xor_rounds + + addiu BYTES, -(CHACHA20_BLOCK_SIZE) + + /* Is data src/dst unaligned? Jump */ + bnez IS_UNALIGNED, .Loop_chacha_unaligned + + /* Set number rounds here to fill delayslot. */ + lw $at, (STACK_SIZE+16)($sp) + + /* BYTES < 0, it has no full block. */ + bltz BYTES, .Lchacha_mips_no_full_block_aligned + + FOR_EACH_WORD_REV(STORE_ALIGNED) + + /* BYTES > 0? Loop again. */ + bgtz BYTES, .Loop_chacha_rounds + + /* Place this here to fill delay slot */ + addiu NONCE_0, 1 + + /* BYTES < 0? Handle last bytes */ + bltz BYTES, .Lchacha_mips_xor_bytes + +.Lchacha_mips_xor_done: + /* Restore used registers */ + lw $s0, 0($sp) + lw $s1, 4($sp) + lw $s2, 8($sp) + lw $s3, 12($sp) + lw $s4, 16($sp) + lw $s5, 20($sp) + lw $s6, 24($sp) + lw $s7, 28($sp) + + /* Write NONCE_0 back to right location in state */ + sw NONCE_0, 48(STATE) + +.Lchacha_mips_end: + addiu $sp, STACK_SIZE + jr $ra + +.Lchacha_mips_no_full_block_aligned: + /* Restore the offset on BYTES */ + addiu BYTES, CHACHA20_BLOCK_SIZE + + /* Get number of full WORDS */ + andi $at, BYTES, MASK_U32 + + /* Load upper half of jump table addr */ + lui T0, %hi(.Lchacha_mips_jmptbl_aligned_0) + + /* Calculate lower half jump table offset */ + ins T0, $at, 1, 6 + + /* Add offset to STATE */ + addu T1, STATE, $at + + /* Add lower half jump table addr */ + addiu T0, %lo(.Lchacha_mips_jmptbl_aligned_0) + + /* Read value from STATE */ + lw SAVED_CA, 0(T1) + + /* Store remaining bytecounter as negative value */ + subu BYTES, $at, BYTES + + jr T0 + + /* Jump table */ + FOR_EACH_WORD(JMPTBL_ALIGNED) + + +.Loop_chacha_unaligned: + /* Set number rounds here to fill delayslot. */ + lw $at, (STACK_SIZE+16)($sp) + + /* BYTES > 0, it has no full block. */ + bltz BYTES, .Lchacha_mips_no_full_block_unaligned + + FOR_EACH_WORD_REV(STORE_UNALIGNED) + + /* BYTES > 0? Loop again. */ + bgtz BYTES, .Loop_chacha_rounds + + /* Write NONCE_0 back to right location in state */ + sw NONCE_0, 48(STATE) + + .set noreorder + /* Fall through to byte handling */ + bgez BYTES, .Lchacha_mips_xor_done +.Lchacha_mips_xor_unaligned_0_b: +.Lchacha_mips_xor_aligned_0_b: + /* Place this here to fill delay slot */ + addiu NONCE_0, 1 + .set reorder + +.Lchacha_mips_xor_bytes: + addu IN, $at + addu OUT, $at + /* First byte */ + lbu T1, 0(IN) + addiu $at, BYTES, 1 + CPU_TO_LE32(SAVED_X) + ROTR(SAVED_X) + xor T1, SAVED_X + sb T1, 0(OUT) + beqz $at, .Lchacha_mips_xor_done + /* Second byte */ + lbu T1, 1(IN) + addiu $at, BYTES, 2 + ROTx SAVED_X, 8 + xor T1, SAVED_X + sb T1, 1(OUT) + beqz $at, .Lchacha_mips_xor_done + /* Third byte */ + lbu T1, 2(IN) + ROTx SAVED_X, 8 + xor T1, SAVED_X + sb T1, 2(OUT) + b .Lchacha_mips_xor_done + +.Lchacha_mips_no_full_block_unaligned: + /* Restore the offset on BYTES */ + addiu BYTES, CHACHA20_BLOCK_SIZE + + /* Get number of full WORDS */ + andi $at, BYTES, MASK_U32 + + /* Load upper half of jump table addr */ + lui T0, %hi(.Lchacha_mips_jmptbl_unaligned_0) + + /* Calculate lower half jump table offset */ + ins T0, $at, 1, 6 + + /* Add offset to STATE */ + addu T1, STATE, $at + + /* Add lower half jump table addr */ + addiu T0, %lo(.Lchacha_mips_jmptbl_unaligned_0) + + /* Read value from STATE */ + lw SAVED_CA, 0(T1) + + /* Store remaining bytecounter as negative value */ + subu BYTES, $at, BYTES + + jr T0 + + /* Jump table */ + FOR_EACH_WORD(JMPTBL_UNALIGNED) +.end chacha_crypt_arch +.set at + +/* Input arguments + * STATE $a0 + * OUT $a1 + * NROUND $a2 + */ + +#undef X12 +#undef X13 +#undef X14 +#undef X15 + +#define X12 $a3 +#define X13 $at +#define X14 $v0 +#define X15 STATE + +.set noat +.globl hchacha_block_arch +.ent hchacha_block_arch +hchacha_block_arch: + .frame $sp, STACK_SIZE, $ra + + addiu $sp, -STACK_SIZE + + /* Save X11(s6) */ + sw X11, 0($sp) + + lw X0, 0(STATE) + lw X1, 4(STATE) + lw X2, 8(STATE) + lw X3, 12(STATE) + lw X4, 16(STATE) + lw X5, 20(STATE) + lw X6, 24(STATE) + lw X7, 28(STATE) + lw X8, 32(STATE) + lw X9, 36(STATE) + lw X10, 40(STATE) + lw X11, 44(STATE) + lw X12, 48(STATE) + lw X13, 52(STATE) + lw X14, 56(STATE) + lw X15, 60(STATE) + +.Loop_hchacha_xor_rounds: + addiu $a2, -2 + AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16); + AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12); + AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8); + AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7); + AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16); + AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12); + AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8); + AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7); + bnez $a2, .Loop_hchacha_xor_rounds + + /* Restore used register */ + lw X11, 0($sp) + + sw X0, 0(OUT) + sw X1, 4(OUT) + sw X2, 8(OUT) + sw X3, 12(OUT) + sw X12, 16(OUT) + sw X13, 20(OUT) + sw X14, 24(OUT) + sw X15, 28(OUT) + + addiu $sp, STACK_SIZE + jr $ra +.end hchacha_block_arch +.set at diff --git a/arch/mips/crypto/chacha-glue.c b/arch/mips/crypto/chacha-glue.c new file mode 100644 index 000000000000..779e399c9bef --- /dev/null +++ b/arch/mips/crypto/chacha-glue.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * MIPS accelerated ChaCha and XChaCha stream ciphers, + * including ChaCha20 (RFC7539) + * + * Copyright (C) 2019 Linaro, Ltd. <ard.biesheuvel@linaro.org> + */ + +#include <asm/byteorder.h> +#include <crypto/algapi.h> +#include <crypto/internal/chacha.h> +#include <crypto/internal/skcipher.h> +#include <linux/kernel.h> +#include <linux/module.h> + +asmlinkage void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds); +EXPORT_SYMBOL(chacha_crypt_arch); + +asmlinkage void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds); +EXPORT_SYMBOL(hchacha_block_arch); + +void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv) +{ + chacha_init_generic(state, key, iv); +} +EXPORT_SYMBOL(chacha_init_arch); + +static int chacha_mips_stream_xor(struct skcipher_request *req, + const struct chacha_ctx *ctx, const u8 *iv) +{ + struct skcipher_walk walk; + u32 state[16]; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + chacha_init_generic(state, ctx->key, iv); + + while (walk.nbytes > 0) { + unsigned int nbytes = walk.nbytes; + + if (nbytes < walk.total) + nbytes = round_down(nbytes, walk.stride); + + chacha_crypt(state, walk.dst.virt.addr, walk.src.virt.addr, + nbytes, ctx->nrounds); + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + } + + return err; +} + +static int chacha_mips(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); + + return chacha_mips_stream_xor(req, ctx, req->iv); +} + +static int xchacha_mips(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); + struct chacha_ctx subctx; + u32 state[16]; + u8 real_iv[16]; + + chacha_init_generic(state, ctx->key, req->iv); + + hchacha_block(state, subctx.key, ctx->nrounds); + subctx.nrounds = ctx->nrounds; + + memcpy(&real_iv[0], req->iv + 24, 8); + memcpy(&real_iv[8], req->iv + 16, 8); + return chacha_mips_stream_xor(req, &subctx, real_iv); +} + +static struct skcipher_alg algs[] = { + { + .base.cra_name = "chacha20", + .base.cra_driver_name = "chacha20-mips", + .base.cra_priority = 200, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = CHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .setkey = chacha20_setkey, + .encrypt = chacha_mips, + .decrypt = chacha_mips, + }, { + .base.cra_name = "xchacha20", + .base.cra_driver_name = "xchacha20-mips", + .base.cra_priority = 200, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .setkey = chacha20_setkey, + .encrypt = xchacha_mips, + .decrypt = xchacha_mips, + }, { + .base.cra_name = "xchacha12", + .base.cra_driver_name = "xchacha12-mips", + .base.cra_priority = 200, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .setkey = chacha12_setkey, + .encrypt = xchacha_mips, + .decrypt = xchacha_mips, + } +}; + +static int __init chacha_simd_mod_init(void) +{ + return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); +} + +static void __exit chacha_simd_mod_fini(void) +{ + crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); +} + +module_init(chacha_simd_mod_init); +module_exit(chacha_simd_mod_fini); + +MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (MIPS accelerated)"); +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("chacha20"); +MODULE_ALIAS_CRYPTO("chacha20-mips"); +MODULE_ALIAS_CRYPTO("xchacha20"); +MODULE_ALIAS_CRYPTO("xchacha20-mips"); +MODULE_ALIAS_CRYPTO("xchacha12"); +MODULE_ALIAS_CRYPTO("xchacha12-mips"); diff --git a/arch/mips/crypto/poly1305-glue.c b/arch/mips/crypto/poly1305-glue.c new file mode 100644 index 000000000000..b759b6ccc361 --- /dev/null +++ b/arch/mips/crypto/poly1305-glue.c @@ -0,0 +1,203 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * OpenSSL/Cryptogams accelerated Poly1305 transform for MIPS + * + * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org> + */ + +#include <asm/unaligned.h> +#include <crypto/algapi.h> +#include <crypto/internal/hash.h> +#include <crypto/internal/poly1305.h> +#include <linux/cpufeature.h> +#include <linux/crypto.h> +#include <linux/module.h> + +asmlinkage void poly1305_init_mips(void *state, const u8 *key); +asmlinkage void poly1305_blocks_mips(void *state, const u8 *src, u32 len, u32 hibit); +asmlinkage void poly1305_emit_mips(void *state, __le32 *digest, const u32 *nonce); + +void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) +{ + poly1305_init_mips(&dctx->h, key); + dctx->s[0] = get_unaligned_le32(key + 16); + dctx->s[1] = get_unaligned_le32(key + 20); + dctx->s[2] = get_unaligned_le32(key + 24); + dctx->s[3] = get_unaligned_le32(key + 28); + dctx->buflen = 0; +} +EXPORT_SYMBOL(poly1305_init_arch); + +static int mips_poly1305_init(struct shash_desc *desc) +{ + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); + + dctx->buflen = 0; + dctx->rset = 0; + dctx->sset = false; + + return 0; +} + +static void mips_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, + u32 len, u32 hibit) +{ + if (unlikely(!dctx->sset)) { + if (!dctx->rset) { + poly1305_init_mips(&dctx->h, src); + src += POLY1305_BLOCK_SIZE; + len -= POLY1305_BLOCK_SIZE; + dctx->rset = 1; + } + if (len >= POLY1305_BLOCK_SIZE) { + dctx->s[0] = get_unaligned_le32(src + 0); + dctx->s[1] = get_unaligned_le32(src + 4); + dctx->s[2] = get_unaligned_le32(src + 8); + dctx->s[3] = get_unaligned_le32(src + 12); + src += POLY1305_BLOCK_SIZE; + len -= POLY1305_BLOCK_SIZE; + dctx->sset = true; + } + if (len < POLY1305_BLOCK_SIZE) + return; + } + + len &= ~(POLY1305_BLOCK_SIZE - 1); + + poly1305_blocks_mips(&dctx->h, src, len, hibit); +} + +static int mips_poly1305_update(struct shash_desc *desc, const u8 *src, + unsigned int len) +{ + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); + + if (unlikely(dctx->buflen)) { + u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen); + + memcpy(dctx->buf + dctx->buflen, src, bytes); + src += bytes; + len -= bytes; + dctx->buflen += bytes; + + if (dctx->buflen == POLY1305_BLOCK_SIZE) { + mips_poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 1); + dctx->buflen = 0; + } + } + + if (likely(len >= POLY1305_BLOCK_SIZE)) { + mips_poly1305_blocks(dctx, src, len, 1); + src += round_down(len, POLY1305_BLOCK_SIZE); + len %= POLY1305_BLOCK_SIZE; + } + + if (unlikely(len)) { + dctx->buflen = len; + memcpy(dctx->buf, src, len); + } + return 0; +} + +void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, + unsigned int nbytes) +{ + if (unlikely(dctx->buflen)) { + u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen); + + memcpy(dctx->buf + dctx->buflen, src, bytes); + src += bytes; + nbytes -= bytes; + dctx->buflen += bytes; + + if (dctx->buflen == POLY1305_BLOCK_SIZE) { + poly1305_blocks_mips(&dctx->h, dctx->buf, + POLY1305_BLOCK_SIZE, 1); + dctx->buflen = 0; + } + } + + if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { + unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); + + poly1305_blocks_mips(&dctx->h, src, len, 1); + src += len; + nbytes %= POLY1305_BLOCK_SIZE; + } + + if (unlikely(nbytes)) { + dctx->buflen = nbytes; + memcpy(dctx->buf, src, nbytes); + } +} +EXPORT_SYMBOL(poly1305_update_arch); + +void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) +{ + __le32 digest[4]; + u64 f = 0; + + if (unlikely(dctx->buflen)) { + dctx->buf[dctx->buflen++] = 1; + memset(dctx->buf + dctx->buflen, 0, + POLY1305_BLOCK_SIZE - dctx->buflen); + poly1305_blocks_mips(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); + } + + poly1305_emit_mips(&dctx->h, digest, dctx->s); + + /* mac = (h + s) % (2^128) */ + f = (f >> 32) + le32_to_cpu(digest[0]); + put_unaligned_le32(f, dst); + f = (f >> 32) + le32_to_cpu(digest[1]); + put_unaligned_le32(f, dst + 4); + f = (f >> 32) + le32_to_cpu(digest[2]); + put_unaligned_le32(f, dst + 8); + f = (f >> 32) + le32_to_cpu(digest[3]); + put_unaligned_le32(f, dst + 12); + + *dctx = (struct poly1305_desc_ctx){}; +} +EXPORT_SYMBOL(poly1305_final_arch); + +static int mips_poly1305_final(struct shash_desc *desc, u8 *dst) +{ + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); + + if (unlikely(!dctx->sset)) + return -ENOKEY; + + poly1305_final_arch(dctx, dst); + return 0; +} + +static struct shash_alg mips_poly1305_alg = { + .init = mips_poly1305_init, + .update = mips_poly1305_update, + .final = mips_poly1305_final, + .digestsize = POLY1305_DIGEST_SIZE, + .descsize = sizeof(struct poly1305_desc_ctx), + + .base.cra_name = "poly1305", + .base.cra_driver_name = "poly1305-mips", + .base.cra_priority = 200, + .base.cra_blocksize = POLY1305_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, +}; + +static int __init mips_poly1305_mod_init(void) +{ + return crypto_register_shash(&mips_poly1305_alg); +} + +static void __exit mips_poly1305_mod_exit(void) +{ + crypto_unregister_shash(&mips_poly1305_alg); +} + +module_init(mips_poly1305_mod_init); +module_exit(mips_poly1305_mod_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("poly1305"); +MODULE_ALIAS_CRYPTO("poly1305-mips"); diff --git a/arch/mips/crypto/poly1305-mips.pl b/arch/mips/crypto/poly1305-mips.pl new file mode 100644 index 000000000000..b05bab884ed2 --- /dev/null +++ b/arch/mips/crypto/poly1305-mips.pl @@ -0,0 +1,1273 @@ +#!/usr/bin/env perl +# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause +# +# ==================================================================== +# Written by Andy Polyakov, @dot-asm, originally for the OpenSSL +# project. +# ==================================================================== + +# Poly1305 hash for MIPS. +# +# May 2016 +# +# Numbers are cycles per processed byte with poly1305_blocks alone. +# +# IALU/gcc +# R1x000 ~5.5/+130% (big-endian) +# Octeon II 2.50/+70% (little-endian) +# +# March 2019 +# +# Add 32-bit code path. +# +# October 2019 +# +# Modulo-scheduling reduction allows to omit dependency chain at the +# end of inner loop and improve performance. Also optimize MIPS32R2 +# code path for MIPS 1004K core. Per René von Dorst's suggestions. +# +# IALU/gcc +# R1x000 ~9.8/? (big-endian) +# Octeon II 3.65/+140% (little-endian) +# MT7621/1004K 4.75/? (little-endian) +# +###################################################################### +# There is a number of MIPS ABI in use, O32 and N32/64 are most +# widely used. Then there is a new contender: NUBI. It appears that if +# one picks the latter, it's possible to arrange code in ABI neutral +# manner. Therefore let's stick to NUBI register layout: +# +($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25)); +($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); +($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23)); +($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31)); +# +# The return value is placed in $a0. Following coding rules facilitate +# interoperability: +# +# - never ever touch $tp, "thread pointer", former $gp [o32 can be +# excluded from the rule, because it's specified volatile]; +# - copy return value to $t0, former $v0 [or to $a0 if you're adapting +# old code]; +# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary; +# +# For reference here is register layout for N32/64 MIPS ABIs: +# +# ($zero,$at,$v0,$v1)=map("\$$_",(0..3)); +# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); +# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); +# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); +# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); +# +# <appro@openssl.org> +# +###################################################################### + +$flavour = shift || "64"; # supported flavours are o32,n32,64,nubi32,nubi64 + +$v0 = ($flavour =~ /nubi/i) ? $a0 : $t0; + +if ($flavour =~ /64|n32/i) {{{ +###################################################################### +# 64-bit code path +# + +my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3); +my ($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1); + +$code.=<<___; +#if (defined(_MIPS_ARCH_MIPS64R3) || defined(_MIPS_ARCH_MIPS64R5) || \\ + defined(_MIPS_ARCH_MIPS64R6)) \\ + && !defined(_MIPS_ARCH_MIPS64R2) +# define _MIPS_ARCH_MIPS64R2 +#endif + +#if defined(_MIPS_ARCH_MIPS64R6) +# define dmultu(rs,rt) +# define mflo(rd,rs,rt) dmulu rd,rs,rt +# define mfhi(rd,rs,rt) dmuhu rd,rs,rt +#else +# define dmultu(rs,rt) dmultu rs,rt +# define mflo(rd,rs,rt) mflo rd +# define mfhi(rd,rs,rt) mfhi rd +#endif + +#ifdef __KERNEL__ +# define poly1305_init poly1305_init_mips +# define poly1305_blocks poly1305_blocks_mips +# define poly1305_emit poly1305_emit_mips +#endif + +#if defined(__MIPSEB__) && !defined(MIPSEB) +# define MIPSEB +#endif + +#ifdef MIPSEB +# define MSB 0 +# define LSB 7 +#else +# define MSB 7 +# define LSB 0 +#endif + +.text +.set noat +.set noreorder + +.align 5 +.globl poly1305_init +.ent poly1305_init +poly1305_init: + .frame $sp,0,$ra + .set reorder + + sd $zero,0($ctx) + sd $zero,8($ctx) + sd $zero,16($ctx) + + beqz $inp,.Lno_key + +#if defined(_MIPS_ARCH_MIPS64R6) + andi $tmp0,$inp,7 # $inp % 8 + dsubu $inp,$inp,$tmp0 # align $inp + sll $tmp0,$tmp0,3 # byte to bit offset + ld $in0,0($inp) + ld $in1,8($inp) + beqz $tmp0,.Laligned_key + ld $tmp2,16($inp) + + subu $tmp1,$zero,$tmp0 +# ifdef MIPSEB + dsllv $in0,$in0,$tmp0 + dsrlv $tmp3,$in1,$tmp1 + dsllv $in1,$in1,$tmp0 + dsrlv $tmp2,$tmp2,$tmp1 +# else + dsrlv $in0,$in0,$tmp0 + dsllv $tmp3,$in1,$tmp1 + dsrlv $in1,$in1,$tmp0 + dsllv $tmp2,$tmp2,$tmp1 +# endif + or $in0,$in0,$tmp3 + or $in1,$in1,$tmp2 +.Laligned_key: +#else + ldl $in0,0+MSB($inp) + ldl $in1,8+MSB($inp) + ldr $in0,0+LSB($inp) + ldr $in1,8+LSB($inp) +#endif +#ifdef MIPSEB +# if defined(_MIPS_ARCH_MIPS64R2) + dsbh $in0,$in0 # byte swap + dsbh $in1,$in1 + dshd $in0,$in0 + dshd $in1,$in1 +# else + ori $tmp0,$zero,0xFF + dsll $tmp2,$tmp0,32 + or $tmp0,$tmp2 # 0x000000FF000000FF + + and $tmp1,$in0,$tmp0 # byte swap + and $tmp3,$in1,$tmp0 + dsrl $tmp2,$in0,24 + dsrl $tmp4,$in1,24 + dsll $tmp1,24 + dsll $tmp3,24 + and $tmp2,$tmp0 + and $tmp4,$tmp0 + dsll $tmp0,8 # 0x0000FF000000FF00 + or $tmp1,$tmp2 + or $tmp3,$tmp4 + and $tmp2,$in0,$tmp0 + and $tmp4,$in1,$tmp0 + dsrl $in0,8 + dsrl $in1,8 + dsll $tmp2,8 + dsll $tmp4,8 + and $in0,$tmp0 + and $in1,$tmp0 + or $tmp1,$tmp2 + or $tmp3,$tmp4 + or $in0,$tmp1 + or $in1,$tmp3 + dsrl $tmp1,$in0,32 + dsrl $tmp3,$in1,32 + dsll $in0,32 + dsll $in1,32 + or $in0,$tmp1 + or $in1,$tmp3 +# endif +#endif + li $tmp0,1 + dsll $tmp0,32 # 0x0000000100000000 + daddiu $tmp0,-63 # 0x00000000ffffffc1 + dsll $tmp0,28 # 0x0ffffffc10000000 + daddiu $tmp0,-1 # 0x0ffffffc0fffffff + + and $in0,$tmp0 + daddiu $tmp0,-3 # 0x0ffffffc0ffffffc + and $in1,$tmp0 + + sd $in0,24($ctx) + dsrl $tmp0,$in1,2 + sd $in1,32($ctx) + daddu $tmp0,$in1 # s1 = r1 + (r1 >> 2) + sd $tmp0,40($ctx) + +.Lno_key: + li $v0,0 # return 0 + jr $ra +.end poly1305_init +___ +{ +my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x0003f000" : "0x00030000"; + +my ($h0,$h1,$h2,$r0,$r1,$rs1,$d0,$d1,$d2) = + ($s0,$s1,$s2,$s3,$s4,$s5,$in0,$in1,$t2); +my ($shr,$shl) = ($s6,$s7); # used on R6 + +$code.=<<___; +.align 5 +.globl poly1305_blocks +.ent poly1305_blocks +poly1305_blocks: + .set noreorder + dsrl $len,4 # number of complete blocks + bnez $len,poly1305_blocks_internal + nop + jr $ra + nop +.end poly1305_blocks + +.align 5 +.ent poly1305_blocks_internal +poly1305_blocks_internal: + .set noreorder +#if defined(_MIPS_ARCH_MIPS64R6) + .frame $sp,8*8,$ra + .mask $SAVED_REGS_MASK|0x000c0000,-8 + dsubu $sp,8*8 + sd $s7,56($sp) + sd $s6,48($sp) +#else + .frame $sp,6*8,$ra + .mask $SAVED_REGS_MASK,-8 + dsubu $sp,6*8 +#endif + sd $s5,40($sp) + sd $s4,32($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue + sd $s3,24($sp) + sd $s2,16($sp) + sd $s1,8($sp) + sd $s0,0($sp) +___ +$code.=<<___; + .set reorder + +#if defined(_MIPS_ARCH_MIPS64R6) + andi $shr,$inp,7 + dsubu $inp,$inp,$shr # align $inp + sll $shr,$shr,3 # byte to bit offset + subu $shl,$zero,$shr +#endif + + ld $h0,0($ctx) # load hash value + ld $h1,8($ctx) + ld $h2,16($ctx) + + ld $r0,24($ctx) # load key + ld $r1,32($ctx) + ld $rs1,40($ctx) + + dsll $len,4 + daddu $len,$inp # end of buffer + b .Loop + +.align 4 +.Loop: +#if defined(_MIPS_ARCH_MIPS64R6) + ld $in0,0($inp) # load input + ld $in1,8($inp) + beqz $shr,.Laligned_inp + + ld $tmp2,16($inp) +# ifdef MIPSEB + dsllv $in0,$in0,$shr + dsrlv $tmp3,$in1,$shl + dsllv $in1,$in1,$shr + dsrlv $tmp2,$tmp2,$shl +# else + dsrlv $in0,$in0,$shr + dsllv $tmp3,$in1,$shl + dsrlv $in1,$in1,$shr + dsllv $tmp2,$tmp2,$shl +# endif + or $in0,$in0,$tmp3 + or $in1,$in1,$tmp2 +.Laligned_inp: +#else + ldl $in0,0+MSB($inp) # load input + ldl $in1,8+MSB($inp) + ldr $in0,0+LSB($inp) + ldr $in1,8+LSB($inp) +#endif + daddiu $inp,16 +#ifdef MIPSEB +# if defined(_MIPS_ARCH_MIPS64R2) + dsbh $in0,$in0 # byte swap + dsbh $in1,$in1 + dshd $in0,$in0 + dshd $in1,$in1 +# else + ori $tmp0,$zero,0xFF + dsll $tmp2,$tmp0,32 + or $tmp0,$tmp2 # 0x000000FF000000FF + + and $tmp1,$in0,$tmp0 # byte swap + and $tmp3,$in1,$tmp0 + dsrl $tmp2,$in0,24 + dsrl $tmp4,$in1,24 + dsll $tmp1,24 + dsll $tmp3,24 + and $tmp2,$tmp0 + and $tmp4,$tmp0 + dsll $tmp0,8 # 0x0000FF000000FF00 + or $tmp1,$tmp2 + or $tmp3,$tmp4 + and $tmp2,$in0,$tmp0 + and $tmp4,$in1,$tmp0 + dsrl $in0,8 + dsrl $in1,8 + dsll $tmp2,8 + dsll $tmp4,8 + and $in0,$tmp0 + and $in1,$tmp0 + or $tmp1,$tmp2 + or $tmp3,$tmp4 + or $in0,$tmp1 + or $in1,$tmp3 + dsrl $tmp1,$in0,32 + dsrl $tmp3,$in1,32 + dsll $in0,32 + dsll $in1,32 + or $in0,$tmp1 + or $in1,$tmp3 +# endif +#endif + dsrl $tmp1,$h2,2 # modulo-scheduled reduction + andi $h2,$h2,3 + dsll $tmp0,$tmp1,2 + + daddu $d0,$h0,$in0 # accumulate input + daddu $tmp1,$tmp0 + sltu $tmp0,$d0,$h0 + daddu $d0,$d0,$tmp1 # ... and residue + sltu $tmp1,$d0,$tmp1 + daddu $d1,$h1,$in1 + daddu $tmp0,$tmp1 + sltu $tmp1,$d1,$h1 + daddu $d1,$tmp0 + + dmultu ($r0,$d0) # h0*r0 + daddu $d2,$h2,$padbit + sltu $tmp0,$d1,$tmp0 + mflo ($h0,$r0,$d0) + mfhi ($h1,$r0,$d0) + + dmultu ($rs1,$d1) # h1*5*r1 + daddu $d2,$tmp1 + daddu $d2,$tmp0 + mflo ($tmp0,$rs1,$d1) + mfhi ($tmp1,$rs1,$d1) + + dmultu ($r1,$d0) # h0*r1 + mflo ($tmp2,$r1,$d0) + mfhi ($h2,$r1,$d0) + daddu $h0,$tmp0 + daddu $h1,$tmp1 + sltu $tmp0,$h0,$tmp0 + + dmultu ($r0,$d1) # h1*r0 + daddu $h1,$tmp0 + daddu $h1,$tmp2 + mflo ($tmp0,$r0,$d1) + mfhi ($tmp1,$r0,$d1) + + dmultu ($rs1,$d2) # h2*5*r1 + sltu $tmp2,$h1,$tmp2 + daddu $h2,$tmp2 + mflo ($tmp2,$rs1,$d2) + + dmultu ($r0,$d2) # h2*r0 + daddu $h1,$tmp0 + daddu $h2,$tmp1 + mflo ($tmp3,$r0,$d2) + sltu $tmp0,$h1,$tmp0 + daddu $h2,$tmp0 + + daddu $h1,$tmp2 + sltu $tmp2,$h1,$tmp2 + daddu $h2,$tmp2 + daddu $h2,$tmp3 + + bne $inp,$len,.Loop + + sd $h0,0($ctx) # store hash value + sd $h1,8($ctx) + sd $h2,16($ctx) + + .set noreorder +#if defined(_MIPS_ARCH_MIPS64R6) + ld $s7,56($sp) + ld $s6,48($sp) +#endif + ld $s5,40($sp) # epilogue + ld $s4,32($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi epilogue + ld $s3,24($sp) + ld $s2,16($sp) + ld $s1,8($sp) + ld $s0,0($sp) +___ +$code.=<<___; + jr $ra +#if defined(_MIPS_ARCH_MIPS64R6) + daddu $sp,8*8 +#else + daddu $sp,6*8 +#endif +.end poly1305_blocks_internal +___ +} +{ +my ($ctx,$mac,$nonce) = ($a0,$a1,$a2); + +$code.=<<___; +.align 5 +.globl poly1305_emit +.ent poly1305_emit +poly1305_emit: + .frame $sp,0,$ra + .set reorder + + ld $tmp2,16($ctx) + ld $tmp0,0($ctx) + ld $tmp1,8($ctx) + + li $in0,-4 # final reduction + dsrl $in1,$tmp2,2 + and $in0,$tmp2 + andi $tmp2,$tmp2,3 + daddu $in0,$in1 + + daddu $tmp0,$tmp0,$in0 + sltu $in1,$tmp0,$in0 + daddiu $in0,$tmp0,5 # compare to modulus + daddu $tmp1,$tmp1,$in1 + sltiu $tmp3,$in0,5 + sltu $tmp4,$tmp1,$in1 + daddu $in1,$tmp1,$tmp3 + daddu $tmp2,$tmp2,$tmp4 + sltu $tmp3,$in1,$tmp3 + daddu $tmp2,$tmp2,$tmp3 + + dsrl $tmp2,2 # see if it carried/borrowed + dsubu $tmp2,$zero,$tmp2 + + xor $in0,$tmp0 + xor $in1,$tmp1 + and $in0,$tmp2 + and $in1,$tmp2 + xor $in0,$tmp0 + xor $in1,$tmp1 + + lwu $tmp0,0($nonce) # load nonce + lwu $tmp1,4($nonce) + lwu $tmp2,8($nonce) + lwu $tmp3,12($nonce) + dsll $tmp1,32 + dsll $tmp3,32 + or $tmp0,$tmp1 + or $tmp2,$tmp3 + + daddu $in0,$tmp0 # accumulate nonce + daddu $in1,$tmp2 + sltu $tmp0,$in0,$tmp0 + daddu $in1,$tmp0 + + dsrl $tmp0,$in0,8 # write mac value + dsrl $tmp1,$in0,16 + dsrl $tmp2,$in0,24 + sb $in0,0($mac) + dsrl $tmp3,$in0,32 + sb $tmp0,1($mac) + dsrl $tmp0,$in0,40 + sb $tmp1,2($mac) + dsrl $tmp1,$in0,48 + sb $tmp2,3($mac) + dsrl $tmp2,$in0,56 + sb $tmp3,4($mac) + dsrl $tmp3,$in1,8 + sb $tmp0,5($mac) + dsrl $tmp0,$in1,16 + sb $tmp1,6($mac) + dsrl $tmp1,$in1,24 + sb $tmp2,7($mac) + + sb $in1,8($mac) + dsrl $tmp2,$in1,32 + sb $tmp3,9($mac) + dsrl $tmp3,$in1,40 + sb $tmp0,10($mac) + dsrl $tmp0,$in1,48 + sb $tmp1,11($mac) + dsrl $tmp1,$in1,56 + sb $tmp2,12($mac) + sb $tmp3,13($mac) + sb $tmp0,14($mac) + sb $tmp1,15($mac) + + jr $ra +.end poly1305_emit +.rdata +.asciiz "Poly1305 for MIPS64, CRYPTOGAMS by \@dot-asm" +.align 2 +___ +} +}}} else {{{ +###################################################################### +# 32-bit code path +# + +my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3); +my ($in0,$in1,$in2,$in3,$tmp0,$tmp1,$tmp2,$tmp3) = + ($a4,$a5,$a6,$a7,$at,$t0,$t1,$t2); + +$code.=<<___; +#if (defined(_MIPS_ARCH_MIPS32R3) || defined(_MIPS_ARCH_MIPS32R5) || \\ + defined(_MIPS_ARCH_MIPS32R6)) \\ + && !defined(_MIPS_ARCH_MIPS32R2) +# define _MIPS_ARCH_MIPS32R2 +#endif + +#if defined(_MIPS_ARCH_MIPS32R6) +# define multu(rs,rt) +# define mflo(rd,rs,rt) mulu rd,rs,rt +# define mfhi(rd,rs,rt) muhu rd,rs,rt +#else +# define multu(rs,rt) multu rs,rt +# define mflo(rd,rs,rt) mflo rd +# define mfhi(rd,rs,rt) mfhi rd +#endif + +#ifdef __KERNEL__ +# define poly1305_init poly1305_init_mips +# define poly1305_blocks poly1305_blocks_mips +# define poly1305_emit poly1305_emit_mips +#endif + +#if defined(__MIPSEB__) && !defined(MIPSEB) +# define MIPSEB +#endif + +#ifdef MIPSEB +# define MSB 0 +# define LSB 3 +#else +# define MSB 3 +# define LSB 0 +#endif + +.text +.set noat +.set noreorder + +.align 5 +.globl poly1305_init +.ent poly1305_init +poly1305_init: + .frame $sp,0,$ra + .set reorder + + sw $zero,0($ctx) + sw $zero,4($ctx) + sw $zero,8($ctx) + sw $zero,12($ctx) + sw $zero,16($ctx) + + beqz $inp,.Lno_key + +#if defined(_MIPS_ARCH_MIPS32R6) + andi $tmp0,$inp,3 # $inp % 4 + subu $inp,$inp,$tmp0 # align $inp + sll $tmp0,$tmp0,3 # byte to bit offset + lw $in0,0($inp) + lw $in1,4($inp) + lw $in2,8($inp) + lw $in3,12($inp) + beqz $tmp0,.Laligned_key + + lw $tmp2,16($inp) + subu $tmp1,$zero,$tmp0 +# ifdef MIPSEB + sllv $in0,$in0,$tmp0 + srlv $tmp3,$in1,$tmp1 + sllv $in1,$in1,$tmp0 + or $in0,$in0,$tmp3 + srlv $tmp3,$in2,$tmp1 + sllv $in2,$in2,$tmp0 + or $in1,$in1,$tmp3 + srlv $tmp3,$in3,$tmp1 + sllv $in3,$in3,$tmp0 + or $in2,$in2,$tmp3 + srlv $tmp2,$tmp2,$tmp1 + or $in3,$in3,$tmp2 +# else + srlv $in0,$in0,$tmp0 + sllv $tmp3,$in1,$tmp1 + srlv $in1,$in1,$tmp0 + or $in0,$in0,$tmp3 + sllv $tmp3,$in2,$tmp1 + srlv $in2,$in2,$tmp0 + or $in1,$in1,$tmp3 + sllv $tmp3,$in3,$tmp1 + srlv $in3,$in3,$tmp0 + or $in2,$in2,$tmp3 + sllv $tmp2,$tmp2,$tmp1 + or $in3,$in3,$tmp2 +# endif +.Laligned_key: +#else + lwl $in0,0+MSB($inp) + lwl $in1,4+MSB($inp) + lwl $in2,8+MSB($inp) + lwl $in3,12+MSB($inp) + lwr $in0,0+LSB($inp) + lwr $in1,4+LSB($inp) + lwr $in2,8+LSB($inp) + lwr $in3,12+LSB($inp) +#endif +#ifdef MIPSEB +# if defined(_MIPS_ARCH_MIPS32R2) + wsbh $in0,$in0 # byte swap + wsbh $in1,$in1 + wsbh $in2,$in2 + wsbh $in3,$in3 + rotr $in0,$in0,16 + rotr $in1,$in1,16 + rotr $in2,$in2,16 + rotr $in3,$in3,16 +# else + srl $tmp0,$in0,24 # byte swap + srl $tmp1,$in0,8 + andi $tmp2,$in0,0xFF00 + sll $in0,$in0,24 + andi $tmp1,0xFF00 + sll $tmp2,$tmp2,8 + or $in0,$tmp0 + srl $tmp0,$in1,24 + or $tmp1,$tmp2 + srl $tmp2,$in1,8 + or $in0,$tmp1 + andi $tmp1,$in1,0xFF00 + sll $in1,$in1,24 + andi $tmp2,0xFF00 + sll $tmp1,$tmp1,8 + or $in1,$tmp0 + srl $tmp0,$in2,24 + or $tmp2,$tmp1 + srl $tmp1,$in2,8 + or $in1,$tmp2 + andi $tmp2,$in2,0xFF00 + sll $in2,$in2,24 + andi $tmp1,0xFF00 + sll $tmp2,$tmp2,8 + or $in2,$tmp0 + srl $tmp0,$in3,24 + or $tmp1,$tmp2 + srl $tmp2,$in3,8 + or $in2,$tmp1 + andi $tmp1,$in3,0xFF00 + sll $in3,$in3,24 + andi $tmp2,0xFF00 + sll $tmp1,$tmp1,8 + or $in3,$tmp0 + or $tmp2,$tmp1 + or $in3,$tmp2 +# endif +#endif + lui $tmp0,0x0fff + ori $tmp0,0xffff # 0x0fffffff + and $in0,$in0,$tmp0 + subu $tmp0,3 # 0x0ffffffc + and $in1,$in1,$tmp0 + and $in2,$in2,$tmp0 + and $in3,$in3,$tmp0 + + sw $in0,20($ctx) + sw $in1,24($ctx) + sw $in2,28($ctx) + sw $in3,32($ctx) + + srl $tmp1,$in1,2 + srl $tmp2,$in2,2 + srl $tmp3,$in3,2 + addu $in1,$in1,$tmp1 # s1 = r1 + (r1 >> 2) + addu $in2,$in2,$tmp2 + addu $in3,$in3,$tmp3 + sw $in1,36($ctx) + sw $in2,40($ctx) + sw $in3,44($ctx) +.Lno_key: + li $v0,0 + jr $ra +.end poly1305_init +___ +{ +my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x00fff000" : "0x00ff0000"; + +my ($h0,$h1,$h2,$h3,$h4, $r0,$r1,$r2,$r3, $rs1,$rs2,$rs3) = + ($s0,$s1,$s2,$s3,$s4, $s5,$s6,$s7,$s8, $s9,$s10,$s11); +my ($d0,$d1,$d2,$d3) = + ($a4,$a5,$a6,$a7); +my $shr = $t2; # used on R6 +my $one = $t2; # used on R2 + +$code.=<<___; +.globl poly1305_blocks +.align 5 +.ent poly1305_blocks +poly1305_blocks: + .frame $sp,16*4,$ra + .mask $SAVED_REGS_MASK,-4 + .set noreorder + subu $sp, $sp,4*12 + sw $s11,4*11($sp) + sw $s10,4*10($sp) + sw $s9, 4*9($sp) + sw $s8, 4*8($sp) + sw $s7, 4*7($sp) + sw $s6, 4*6($sp) + sw $s5, 4*5($sp) + sw $s4, 4*4($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue + sw $s3, 4*3($sp) + sw $s2, 4*2($sp) + sw $s1, 4*1($sp) + sw $s0, 4*0($sp) +___ +$code.=<<___; + .set reorder + + srl $len,4 # number of complete blocks + li $one,1 + beqz $len,.Labort + +#if defined(_MIPS_ARCH_MIPS32R6) + andi $shr,$inp,3 + subu $inp,$inp,$shr # align $inp + sll $shr,$shr,3 # byte to bit offset +#endif + + lw $h0,0($ctx) # load hash value + lw $h1,4($ctx) + lw $h2,8($ctx) + lw $h3,12($ctx) + lw $h4,16($ctx) + + lw $r0,20($ctx) # load key + lw $r1,24($ctx) + lw $r2,28($ctx) + lw $r3,32($ctx) + lw $rs1,36($ctx) + lw $rs2,40($ctx) + lw $rs3,44($ctx) + + sll $len,4 + addu $len,$len,$inp # end of buffer + b .Loop + +.align 4 +.Loop: +#if defined(_MIPS_ARCH_MIPS32R6) + lw $d0,0($inp) # load input + lw $d1,4($inp) + lw $d2,8($inp) + lw $d3,12($inp) + beqz $shr,.Laligned_inp + + lw $t0,16($inp) + subu $t1,$zero,$shr +# ifdef MIPSEB + sllv $d0,$d0,$shr + srlv $at,$d1,$t1 + sllv $d1,$d1,$shr + or $d0,$d0,$at + srlv $at,$d2,$t1 + sllv $d2,$d2,$shr + or $d1,$d1,$at + srlv $at,$d3,$t1 + sllv $d3,$d3,$shr + or $d2,$d2,$at + srlv $t0,$t0,$t1 + or $d3,$d3,$t0 +# else + srlv $d0,$d0,$shr + sllv $at,$d1,$t1 + srlv $d1,$d1,$shr + or $d0,$d0,$at + sllv $at,$d2,$t1 + srlv $d2,$d2,$shr + or $d1,$d1,$at + sllv $at,$d3,$t1 + srlv $d3,$d3,$shr + or $d2,$d2,$at + sllv $t0,$t0,$t1 + or $d3,$d3,$t0 +# endif +.Laligned_inp: +#else + lwl $d0,0+MSB($inp) # load input + lwl $d1,4+MSB($inp) + lwl $d2,8+MSB($inp) + lwl $d3,12+MSB($inp) + lwr $d0,0+LSB($inp) + lwr $d1,4+LSB($inp) + lwr $d2,8+LSB($inp) + lwr $d3,12+LSB($inp) +#endif +#ifdef MIPSEB +# if defined(_MIPS_ARCH_MIPS32R2) + wsbh $d0,$d0 # byte swap + wsbh $d1,$d1 + wsbh $d2,$d2 + wsbh $d3,$d3 + rotr $d0,$d0,16 + rotr $d1,$d1,16 + rotr $d2,$d2,16 + rotr $d3,$d3,16 +# else + srl $at,$d0,24 # byte swap + srl $t0,$d0,8 + andi $t1,$d0,0xFF00 + sll $d0,$d0,24 + andi $t0,0xFF00 + sll $t1,$t1,8 + or $d0,$at + srl $at,$d1,24 + or $t0,$t1 + srl $t1,$d1,8 + or $d0,$t0 + andi $t0,$d1,0xFF00 + sll $d1,$d1,24 + andi $t1,0xFF00 + sll $t0,$t0,8 + or $d1,$at + srl $at,$d2,24 + or $t1,$t0 + srl $t0,$d2,8 + or $d1,$t1 + andi $t1,$d2,0xFF00 + sll $d2,$d2,24 + andi $t0,0xFF00 + sll $t1,$t1,8 + or $d2,$at + srl $at,$d3,24 + or $t0,$t1 + srl $t1,$d3,8 + or $d2,$t0 + andi $t0,$d3,0xFF00 + sll $d3,$d3,24 + andi $t1,0xFF00 + sll $t0,$t0,8 + or $d3,$at + or $t1,$t0 + or $d3,$t1 +# endif +#endif + srl $t0,$h4,2 # modulo-scheduled reduction + andi $h4,$h4,3 + sll $at,$t0,2 + + addu $d0,$d0,$h0 # accumulate input + addu $t0,$t0,$at + sltu $h0,$d0,$h0 + addu $d0,$d0,$t0 # ... and residue + sltu $at,$d0,$t0 + + addu $d1,$d1,$h1 + addu $h0,$h0,$at # carry + sltu $h1,$d1,$h1 + addu $d1,$d1,$h0 + sltu $h0,$d1,$h0 + + addu $d2,$d2,$h2 + addu $h1,$h1,$h0 # carry + sltu $h2,$d2,$h2 + addu $d2,$d2,$h1 + sltu $h1,$d2,$h1 + + addu $d3,$d3,$h3 + addu $h2,$h2,$h1 # carry + sltu $h3,$d3,$h3 + addu $d3,$d3,$h2 + +#if defined(_MIPS_ARCH_MIPS32R2) && !defined(_MIPS_ARCH_MIPS32R6) + multu $r0,$d0 # d0*r0 + sltu $h2,$d3,$h2 + maddu $rs3,$d1 # d1*s3 + addu $h3,$h3,$h2 # carry + maddu $rs2,$d2 # d2*s2 + addu $h4,$h4,$padbit + maddu $rs1,$d3 # d3*s1 + addu $h4,$h4,$h3 + mfhi $at + mflo $h0 + + multu $r1,$d0 # d0*r1 + maddu $r0,$d1 # d1*r0 + maddu $rs3,$d2 # d2*s3 + maddu $rs2,$d3 # d3*s2 + maddu $rs1,$h4 # h4*s1 + maddu $at,$one # hi*1 + mfhi $at + mflo $h1 + + multu $r2,$d0 # d0*r2 + maddu $r1,$d1 # d1*r1 + maddu $r0,$d2 # d2*r0 + maddu $rs3,$d3 # d3*s3 + maddu $rs2,$h4 # h4*s2 + maddu $at,$one # hi*1 + mfhi $at + mflo $h2 + + mul $t0,$r0,$h4 # h4*r0 + + multu $r3,$d0 # d0*r3 + maddu $r2,$d1 # d1*r2 + maddu $r1,$d2 # d2*r1 + maddu $r0,$d3 # d3*r0 + maddu $rs3,$h4 # h4*s3 + maddu $at,$one # hi*1 + mfhi $at + mflo $h3 + + addiu $inp,$inp,16 + + addu $h4,$t0,$at +#else + multu ($r0,$d0) # d0*r0 + mflo ($h0,$r0,$d0) + mfhi ($h1,$r0,$d0) + + sltu $h2,$d3,$h2 + addu $h3,$h3,$h2 # carry + + multu ($rs3,$d1) # d1*s3 + mflo ($at,$rs3,$d1) + mfhi ($t0,$rs3,$d1) + + addu $h4,$h4,$padbit + addiu $inp,$inp,16 + addu $h4,$h4,$h3 + + multu ($rs2,$d2) # d2*s2 + mflo ($a3,$rs2,$d2) + mfhi ($t1,$rs2,$d2) + addu $h0,$h0,$at + addu $h1,$h1,$t0 + multu ($rs1,$d3) # d3*s1 + sltu $at,$h0,$at + addu $h1,$h1,$at + + mflo ($at,$rs1,$d3) + mfhi ($t0,$rs1,$d3) + addu $h0,$h0,$a3 + addu $h1,$h1,$t1 + multu ($r1,$d0) # d0*r1 + sltu $a3,$h0,$a3 + addu $h1,$h1,$a3 + + + mflo ($a3,$r1,$d0) + mfhi ($h2,$r1,$d0) + addu $h0,$h0,$at + addu $h1,$h1,$t0 + multu ($r0,$d1) # d1*r0 + sltu $at,$h0,$at + addu $h1,$h1,$at + + mflo ($at,$r0,$d1) + mfhi ($t0,$r0,$d1) + addu $h1,$h1,$a3 + sltu $a3,$h1,$a3 + multu ($rs3,$d2) # d2*s3 + addu $h2,$h2,$a3 + + mflo ($a3,$rs3,$d2) + mfhi ($t1,$rs3,$d2) + addu $h1,$h1,$at + addu $h2,$h2,$t0 + multu ($rs2,$d3) # d3*s2 + sltu $at,$h1,$at + addu $h2,$h2,$at + + mflo ($at,$rs2,$d3) + mfhi ($t0,$rs2,$d3) + addu $h1,$h1,$a3 + addu $h2,$h2,$t1 + multu ($rs1,$h4) # h4*s1 + sltu $a3,$h1,$a3 + addu $h2,$h2,$a3 + + mflo ($a3,$rs1,$h4) + addu $h1,$h1,$at + addu $h2,$h2,$t0 + multu ($r2,$d0) # d0*r2 + sltu $at,$h1,$at + addu $h2,$h2,$at + + + mflo ($at,$r2,$d0) + mfhi ($h3,$r2,$d0) + addu $h1,$h1,$a3 + sltu $a3,$h1,$a3 + multu ($r1,$d1) # d1*r1 + addu $h2,$h2,$a3 + + mflo ($a3,$r1,$d1) + mfhi ($t1,$r1,$d1) + addu $h2,$h2,$at + sltu $at,$h2,$at + multu ($r0,$d2) # d2*r0 + addu $h3,$h3,$at + + mflo ($at,$r0,$d2) + mfhi ($t0,$r0,$d2) + addu $h2,$h2,$a3 + addu $h3,$h3,$t1 + multu ($rs3,$d3) # d3*s3 + sltu $a3,$h2,$a3 + addu $h3,$h3,$a3 + + mflo ($a3,$rs3,$d3) + mfhi ($t1,$rs3,$d3) + addu $h2,$h2,$at + addu $h3,$h3,$t0 + multu ($rs2,$h4) # h4*s2 + sltu $at,$h2,$at + addu $h3,$h3,$at + + mflo ($at,$rs2,$h4) + addu $h2,$h2,$a3 + addu $h3,$h3,$t1 + multu ($r3,$d0) # d0*r3 + sltu $a3,$h2,$a3 + addu $h3,$h3,$a3 + + + mflo ($a3,$r3,$d0) + mfhi ($t1,$r3,$d0) + addu $h2,$h2,$at + sltu $at,$h2,$at + multu ($r2,$d1) # d1*r2 + addu $h3,$h3,$at + + mflo ($at,$r2,$d1) + mfhi ($t0,$r2,$d1) + addu $h3,$h3,$a3 + sltu $a3,$h3,$a3 + multu ($r0,$d3) # d3*r0 + addu $t1,$t1,$a3 + + mflo ($a3,$r0,$d3) + mfhi ($d3,$r0,$d3) + addu $h3,$h3,$at + addu $t1,$t1,$t0 + multu ($r1,$d2) # d2*r1 + sltu $at,$h3,$at + addu $t1,$t1,$at + + mflo ($at,$r1,$d2) + mfhi ($t0,$r1,$d2) + addu $h3,$h3,$a3 + addu $t1,$t1,$d3 + multu ($rs3,$h4) # h4*s3 + sltu $a3,$h3,$a3 + addu $t1,$t1,$a3 + + mflo ($a3,$rs3,$h4) + addu $h3,$h3,$at + addu $t1,$t1,$t0 + multu ($r0,$h4) # h4*r0 + sltu $at,$h3,$at + addu $t1,$t1,$at + + + mflo ($h4,$r0,$h4) + addu $h3,$h3,$a3 + sltu $a3,$h3,$a3 + addu $t1,$t1,$a3 + addu $h4,$h4,$t1 + + li $padbit,1 # if we loop, padbit is 1 +#endif + bne $inp,$len,.Loop + + sw $h0,0($ctx) # store hash value + sw $h1,4($ctx) + sw $h2,8($ctx) + sw $h3,12($ctx) + sw $h4,16($ctx) + + .set noreorder +.Labort: + lw $s11,4*11($sp) + lw $s10,4*10($sp) + lw $s9, 4*9($sp) + lw $s8, 4*8($sp) + lw $s7, 4*7($sp) + lw $s6, 4*6($sp) + lw $s5, 4*5($sp) + lw $s4, 4*4($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue + lw $s3, 4*3($sp) + lw $s2, 4*2($sp) + lw $s1, 4*1($sp) + lw $s0, 4*0($sp) +___ +$code.=<<___; + jr $ra + addu $sp,$sp,4*12 +.end poly1305_blocks +___ +} +{ +my ($ctx,$mac,$nonce,$tmp4) = ($a0,$a1,$a2,$a3); + +$code.=<<___; +.align 5 +.globl poly1305_emit +.ent poly1305_emit +poly1305_emit: + .frame $sp,0,$ra + .set reorder + + lw $tmp4,16($ctx) + lw $tmp0,0($ctx) + lw $tmp1,4($ctx) + lw $tmp2,8($ctx) + lw $tmp3,12($ctx) + + li $in0,-4 # final reduction + srl $ctx,$tmp4,2 + and $in0,$in0,$tmp4 + andi $tmp4,$tmp4,3 + addu $ctx,$ctx,$in0 + + addu $tmp0,$tmp0,$ctx + sltu $ctx,$tmp0,$ctx + addiu $in0,$tmp0,5 # compare to modulus + addu $tmp1,$tmp1,$ctx + sltiu $in1,$in0,5 + sltu $ctx,$tmp1,$ctx + addu $in1,$in1,$tmp1 + addu $tmp2,$tmp2,$ctx + sltu $in2,$in1,$tmp1 + sltu $ctx,$tmp2,$ctx + addu $in2,$in2,$tmp2 + addu $tmp3,$tmp3,$ctx + sltu $in3,$in2,$tmp2 + sltu $ctx,$tmp3,$ctx + addu $in3,$in3,$tmp3 + addu $tmp4,$tmp4,$ctx + sltu $ctx,$in3,$tmp3 + addu $ctx,$tmp4 + + srl $ctx,2 # see if it carried/borrowed + subu $ctx,$zero,$ctx + + xor $in0,$tmp0 + xor $in1,$tmp1 + xor $in2,$tmp2 + xor $in3,$tmp3 + and $in0,$ctx + and $in1,$ctx + and $in2,$ctx + and $in3,$ctx + xor $in0,$tmp0 + xor $in1,$tmp1 + xor $in2,$tmp2 + xor $in3,$tmp3 + + lw $tmp0,0($nonce) # load nonce + lw $tmp1,4($nonce) + lw $tmp2,8($nonce) + lw $tmp3,12($nonce) + + addu $in0,$tmp0 # accumulate nonce + sltu $ctx,$in0,$tmp0 + + addu $in1,$tmp1 + sltu $tmp1,$in1,$tmp1 + addu $in1,$ctx + sltu $ctx,$in1,$ctx + addu $ctx,$tmp1 + + addu $in2,$tmp2 + sltu $tmp2,$in2,$tmp2 + addu $in2,$ctx + sltu $ctx,$in2,$ctx + addu $ctx,$tmp2 + + addu $in3,$tmp3 + addu $in3,$ctx + + srl $tmp0,$in0,8 # write mac value + srl $tmp1,$in0,16 + srl $tmp2,$in0,24 + sb $in0, 0($mac) + sb $tmp0,1($mac) + srl $tmp0,$in1,8 + sb $tmp1,2($mac) + srl $tmp1,$in1,16 + sb $tmp2,3($mac) + srl $tmp2,$in1,24 + sb $in1, 4($mac) + sb $tmp0,5($mac) + srl $tmp0,$in2,8 + sb $tmp1,6($mac) + srl $tmp1,$in2,16 + sb $tmp2,7($mac) + srl $tmp2,$in2,24 + sb $in2, 8($mac) + sb $tmp0,9($mac) + srl $tmp0,$in3,8 + sb $tmp1,10($mac) + srl $tmp1,$in3,16 + sb $tmp2,11($mac) + srl $tmp2,$in3,24 + sb $in3, 12($mac) + sb $tmp0,13($mac) + sb $tmp1,14($mac) + sb $tmp2,15($mac) + + jr $ra +.end poly1305_emit +.rdata +.asciiz "Poly1305 for MIPS32, CRYPTOGAMS by \@dot-asm" +.align 2 +___ +} +}}} + +$output=pop and open STDOUT,">$output"; +print $code; +close STDOUT; diff --git a/arch/mips/fw/arc/Makefile b/arch/mips/fw/arc/Makefile index 31dd7305d643..64d685efcc77 100644 --- a/arch/mips/fw/arc/Makefile +++ b/arch/mips/fw/arc/Makefile @@ -3,8 +3,12 @@ # Makefile for the ARC prom monitor library routines under Linux. # +ifdef CONFIG_ARC_CMDLINE_ONLY +lib-y += cmdline.o +else lib-y += cmdline.o env.o file.o identify.o init.o \ - misc.o salone.o time.o tree.o + misc.o +endif lib-$(CONFIG_ARC_MEMORY) += memory.o lib-$(CONFIG_ARC_CONSOLE) += arc_con.o diff --git a/arch/mips/fw/arc/cmdline.c b/arch/mips/fw/arc/cmdline.c index c0122a1dc587..155c5e911723 100644 --- a/arch/mips/fw/arc/cmdline.c +++ b/arch/mips/fw/arc/cmdline.c @@ -17,6 +17,12 @@ #undef DEBUG_CMDLINE +/* + * A 32-bit ARC PROM pass arguments and environment as 32-bit pointer. + * These macro take care of sign extension. + */ +#define prom_argv(index) ((char *) (long)argv[(index)]) + static char *ignored[] = { "ConsoleIn=", "ConsoleOut=", @@ -32,14 +38,14 @@ static char *used_arc[][2] = { { "OSLoadOptions=", "" } }; -static char * __init move_firmware_args(char* cp) +static char __init *move_firmware_args(int argc, LONG *argv, char *cp) { char *s; int actr, i; actr = 1; /* Always ignore argv[0] */ - while (actr < prom_argc) { + while (actr < argc) { for(i = 0; i < ARRAY_SIZE(used_arc); i++) { int len = strlen(used_arc[i][0]); @@ -64,7 +70,7 @@ static char * __init move_firmware_args(char* cp) return cp; } -void __init prom_init_cmdline(void) +void __init prom_init_cmdline(int argc, LONG *argv) { char *cp; int actr, i; @@ -76,9 +82,9 @@ void __init prom_init_cmdline(void) * Move ARC variables to the beginning to make sure they can be * overridden by later arguments. */ - cp = move_firmware_args(cp); + cp = move_firmware_args(argc, argv, cp); - while (actr < prom_argc) { + while (actr < argc) { for (i = 0; i < ARRAY_SIZE(ignored); i++) { int len = strlen(ignored[i]); diff --git a/arch/mips/fw/arc/env.c b/arch/mips/fw/arc/env.c index 1118a26b32ee..02407a7bb38e 100644 --- a/arch/mips/fw/arc/env.c +++ b/arch/mips/fw/arc/env.c @@ -19,9 +19,3 @@ ArcGetEnvironmentVariable(CHAR *name) { return (CHAR *) ARC_CALL1(get_evar, name); } - -LONG __init -ArcSetEnvironmentVariable(PCHAR name, PCHAR value) -{ - return ARC_CALL2(set_evar, name, value); -} diff --git a/arch/mips/fw/arc/file.c b/arch/mips/fw/arc/file.c index 49fd3ff13fe5..b0d8535c80cc 100644 --- a/arch/mips/fw/arc/file.c +++ b/arch/mips/fw/arc/file.c @@ -13,62 +13,13 @@ #include <asm/sgialib.h> LONG -ArcGetDirectoryEntry(ULONG FileID, struct linux_vdirent *Buffer, - ULONG N, ULONG *Count) -{ - return ARC_CALL4(get_vdirent, FileID, Buffer, N, Count); -} - -LONG -ArcOpen(CHAR *Path, enum linux_omode OpenMode, ULONG *FileID) -{ - return ARC_CALL3(open, Path, OpenMode, FileID); -} - -LONG -ArcClose(ULONG FileID) -{ - return ARC_CALL1(close, FileID); -} - -LONG ArcRead(ULONG FileID, VOID *Buffer, ULONG N, ULONG *Count) { return ARC_CALL4(read, FileID, Buffer, N, Count); } LONG -ArcGetReadStatus(ULONG FileID) -{ - return ARC_CALL1(get_rstatus, FileID); -} - -LONG ArcWrite(ULONG FileID, PVOID Buffer, ULONG N, PULONG Count) { return ARC_CALL4(write, FileID, Buffer, N, Count); } - -LONG -ArcSeek(ULONG FileID, struct linux_bigint *Position, enum linux_seekmode SeekMode) -{ - return ARC_CALL3(seek, FileID, Position, SeekMode); -} - -LONG -ArcMount(char *name, enum linux_mountops op) -{ - return ARC_CALL2(mount, name, op); -} - -LONG -ArcGetFileInformation(ULONG FileID, struct linux_finfo *Information) -{ - return ARC_CALL2(get_finfo, FileID, Information); -} - -LONG ArcSetFileInformation(ULONG FileID, ULONG AttributeFlags, - ULONG AttributeMask) -{ - return ARC_CALL3(set_finfo, FileID, AttributeFlags, AttributeMask); -} diff --git a/arch/mips/fw/arc/identify.c b/arch/mips/fw/arc/identify.c index f90266c02c9d..5527e0f54079 100644 --- a/arch/mips/fw/arc/identify.c +++ b/arch/mips/fw/arc/identify.c @@ -32,10 +32,6 @@ static struct smatch mach_table[] = { .liname = "SGI Indy", .flags = PROM_FLAG_ARCS, }, { - .arcname = "SGI-IP27", - .liname = "SGI Origin", - .flags = PROM_FLAG_ARCS, - }, { .arcname = "SGI-IP28", .liname = "SGI IP28", .flags = PROM_FLAG_ARCS, @@ -87,6 +83,11 @@ const char *get_system_type(void) return system_type; } +static pcomponent * __init ArcGetChild(pcomponent *Current) +{ + return (pcomponent *) ARC_CALL1(child_component, Current); +} + void __init prom_identify_arch(void) { pcomponent *p; @@ -98,13 +99,7 @@ void __init prom_identify_arch(void) */ p = ArcGetChild(PROM_NULL_COMPONENT); if (p == NULL) { -#ifdef CONFIG_SGI_IP27 - /* IP27 PROM misbehaves, seems to not implement ARC - GetChild(). So we just assume it's an IP27. */ - iname = "SGI-IP27"; -#else iname = "Unknown"; -#endif } else iname = (char *) (long) p->iname; diff --git a/arch/mips/fw/arc/init.c b/arch/mips/fw/arc/init.c index 008555969534..f9d1dea9b2ca 100644 --- a/arch/mips/fw/arc/init.c +++ b/arch/mips/fw/arc/init.c @@ -18,8 +18,11 @@ /* Master romvec interface. */ struct linux_romvec *romvec; -int prom_argc; -LONG *_prom_argv, *_prom_envp; + +#if defined(CONFIG_64BIT) && defined(CONFIG_FW_ARC32) +/* stack for calling 32bit ARC prom */ +u64 o32_stk[4096]; +#endif void __init prom_init(void) { @@ -27,10 +30,6 @@ void __init prom_init(void) romvec = ROMVECTOR; - prom_argc = fw_arg0; - _prom_argv = (LONG *) fw_arg1; - _prom_envp = (LONG *) fw_arg2; - if (pb->magic != 0x53435241) { printk(KERN_CRIT "Aieee, bad prom vector magic %08lx\n", (unsigned long) pb->magic); @@ -38,7 +37,7 @@ void __init prom_init(void) ; } - prom_init_cmdline(); + prom_init_cmdline(fw_arg0, (LONG *)fw_arg1); prom_identify_arch(); printk(KERN_INFO "PROMLIB: ARC firmware Version %d Revision %d\n", pb->ver, pb->rev); @@ -49,11 +48,4 @@ void __init prom_init(void) ArcRead(0, &c, 1, &cnt); ArcEnterInteractiveMode(); #endif -#ifdef CONFIG_SGI_IP27 - { - extern const struct plat_smp_ops ip27_smp_ops; - - register_smp_ops(&ip27_smp_ops); - } -#endif } diff --git a/arch/mips/fw/arc/memory.c b/arch/mips/fw/arc/memory.c index af44b35d79a1..dbbcddc82823 100644 --- a/arch/mips/fw/arc/memory.c +++ b/arch/mips/fw/arc/memory.c @@ -158,9 +158,12 @@ void __init prom_meminit(void) } } +void __weak __init prom_cleanup(void) +{ +} + void __init prom_free_prom_memory(void) { - unsigned long addr; int i; if (prom_flags & PROM_FLAG_DONT_FREE_TEMP) @@ -170,4 +173,9 @@ void __init prom_free_prom_memory(void) free_init_pages("prom memory", prom_mem_base[i], prom_mem_base[i] + prom_mem_size[i]); } + /* + * at this point it isn't safe to call PROM functions + * give platforms a way to do PROM cleanups + */ + prom_cleanup(); } diff --git a/arch/mips/fw/arc/misc.c b/arch/mips/fw/arc/misc.c index 19f710117d97..d5b2d5901324 100644 --- a/arch/mips/fw/arc/misc.c +++ b/arch/mips/fw/arc/misc.c @@ -21,47 +21,6 @@ #include <asm/bootinfo.h> VOID __noreturn -ArcHalt(VOID) -{ - bc_disable(); - local_irq_disable(); - ARC_CALL0(halt); - - unreachable(); -} - -VOID __noreturn -ArcPowerDown(VOID) -{ - bc_disable(); - local_irq_disable(); - ARC_CALL0(pdown); - - unreachable(); -} - -/* XXX is this a soft reset basically? XXX */ -VOID __noreturn -ArcRestart(VOID) -{ - bc_disable(); - local_irq_disable(); - ARC_CALL0(restart); - - unreachable(); -} - -VOID __noreturn -ArcReboot(VOID) -{ - bc_disable(); - local_irq_disable(); - ARC_CALL0(reboot); - - unreachable(); -} - -VOID __noreturn ArcEnterInteractiveMode(VOID) { bc_disable(); @@ -71,24 +30,6 @@ ArcEnterInteractiveMode(VOID) unreachable(); } -LONG -ArcSaveConfiguration(VOID) -{ - return ARC_CALL0(cfg_save); -} - -struct linux_sysid * -ArcGetSystemId(VOID) -{ - return (struct linux_sysid *) ARC_CALL0(get_sysid); -} - -VOID __init -ArcFlushAllCaches(VOID) -{ - ARC_CALL0(cache_flush); -} - DISPLAY_STATUS * __init ArcGetDisplayStatus(ULONG FileID) { return (DISPLAY_STATUS *) ARC_CALL1(GetDisplayStatus, FileID); diff --git a/arch/mips/fw/arc/promlib.c b/arch/mips/fw/arc/promlib.c index be381307fbb0..5e9e840a9314 100644 --- a/arch/mips/fw/arc/promlib.c +++ b/arch/mips/fw/arc/promlib.c @@ -11,6 +11,21 @@ #include <asm/bcache.h> #include <asm/setup.h> +#if defined(CONFIG_64BIT) && defined(CONFIG_FW_ARC32) +/* + * For 64bit kernels working with a 32bit ARC PROM pointer arguments + * for ARC calls need to reside in CKEG0/1. But as soon as the kernel + * switches to it's first kernel thread stack is set to an address in + * XKPHYS, so anything on stack can't be used anymore. This is solved + * by using a * static declartion variables are put into BSS, which is + * linked to a CKSEG0 address. Since this is only used on UP platforms + * there is not spinlock needed + */ +#define O32_STATIC static +#else +#define O32_STATIC +#endif + /* * IP22 boardcache is not compatible with board caches. Thus we disable it * during romvec action. Since r4xx0.c is always compiled and linked with your @@ -23,8 +38,10 @@ void prom_putchar(char c) { - ULONG cnt; - CHAR it = c; + O32_STATIC ULONG cnt; + O32_STATIC CHAR it; + + it = c; bc_disable(); ArcWrite(1, &it, 1, &cnt); @@ -33,8 +50,8 @@ void prom_putchar(char c) char prom_getchar(void) { - ULONG cnt; - CHAR c; + O32_STATIC ULONG cnt; + O32_STATIC CHAR c; bc_disable(); ArcRead(0, &c, 1, &cnt); diff --git a/arch/mips/fw/arc/salone.c b/arch/mips/fw/arc/salone.c deleted file mode 100644 index 2d99f44d5576..000000000000 --- a/arch/mips/fw/arc/salone.c +++ /dev/null @@ -1,25 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Routines to load into memory and execute stand-along program images using - * ARCS PROM firmware. - * - * Copyright (C) 1996 David S. Miller (davem@davemloft.net) - */ -#include <linux/init.h> -#include <asm/sgialib.h> - -LONG __init ArcLoad(CHAR *Path, ULONG TopAddr, ULONG *ExecAddr, ULONG *LowAddr) -{ - return ARC_CALL4(load, Path, TopAddr, ExecAddr, LowAddr); -} - -LONG __init ArcInvoke(ULONG ExecAddr, ULONG StackAddr, ULONG Argc, CHAR *Argv[], - CHAR *Envp[]) -{ - return ARC_CALL5(invoke, ExecAddr, StackAddr, Argc, Argv, Envp); -} - -LONG __init ArcExecute(CHAR *Path, LONG Argc, CHAR *Argv[], CHAR *Envp[]) -{ - return ARC_CALL4(exec, Path, Argc, Argv, Envp); -} diff --git a/arch/mips/fw/arc/time.c b/arch/mips/fw/arc/time.c deleted file mode 100644 index 190cdb50b895..000000000000 --- a/arch/mips/fw/arc/time.c +++ /dev/null @@ -1,25 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Extracting time information from ARCS prom. - * - * Copyright (C) 1996 David S. Miller (davem@davemloft.net) - */ -#include <linux/init.h> - -#include <asm/fw/arc/types.h> -#include <asm/sgialib.h> - -struct linux_tinfo * __init -ArcGetTime(VOID) -{ - return (struct linux_tinfo *) ARC_CALL0(get_tinfo); -} - -ULONG __init -ArcGetRelativeTime(VOID) -{ - return ARC_CALL0(get_rtime); -} diff --git a/arch/mips/fw/arc/tree.c b/arch/mips/fw/arc/tree.c deleted file mode 100644 index 924a37dc2569..000000000000 --- a/arch/mips/fw/arc/tree.c +++ /dev/null @@ -1,127 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * PROM component device tree code. - * - * Copyright (C) 1996 David S. Miller (davem@davemloft.net) - * Copyright (C) 1999 Ralf Baechle (ralf@gnu.org) - * Copyright (C) 1999 Silicon Graphics, Inc. - */ -#include <linux/init.h> -#include <asm/fw/arc/types.h> -#include <asm/sgialib.h> - -#undef DEBUG_PROM_TREE - -pcomponent * __init -ArcGetPeer(pcomponent *Current) -{ - if (Current == PROM_NULL_COMPONENT) - return PROM_NULL_COMPONENT; - - return (pcomponent *) ARC_CALL1(next_component, Current); -} - -pcomponent * __init -ArcGetChild(pcomponent *Current) -{ - return (pcomponent *) ARC_CALL1(child_component, Current); -} - -pcomponent * __init -ArcGetParent(pcomponent *Current) -{ - if (Current == PROM_NULL_COMPONENT) - return PROM_NULL_COMPONENT; - - return (pcomponent *) ARC_CALL1(parent_component, Current); -} - -LONG __init -ArcGetConfigurationData(VOID *Buffer, pcomponent *Current) -{ - return ARC_CALL2(component_data, Buffer, Current); -} - -pcomponent * __init -ArcAddChild(pcomponent *Current, pcomponent *Template, VOID *ConfigurationData) -{ - return (pcomponent *) - ARC_CALL3(child_add, Current, Template, ConfigurationData); -} - -LONG __init -ArcDeleteComponent(pcomponent *ComponentToDelete) -{ - return ARC_CALL1(comp_del, ComponentToDelete); -} - -pcomponent * __init -ArcGetComponent(CHAR *Path) -{ - return (pcomponent *)ARC_CALL1(component_by_path, Path); -} - -#ifdef DEBUG_PROM_TREE - -static char *classes[] = { - "system", "processor", "cache", "adapter", "controller", "peripheral", - "memory" -}; - -static char *types[] = { - "arc", "cpu", "fpu", "picache", "pdcache", "sicache", "sdcache", - "sccache", "memdev", "eisa adapter", "tc adapter", "scsi adapter", - "dti adapter", "multi-func adapter", "disk controller", - "tp controller", "cdrom controller", "worm controller", - "serial controller", "net controller", "display controller", - "parallel controller", "pointer controller", "keyboard controller", - "audio controller", "misc controller", "disk peripheral", - "floppy peripheral", "tp peripheral", "modem peripheral", - "monitor peripheral", "printer peripheral", "pointer peripheral", - "keyboard peripheral", "terminal peripheral", "line peripheral", - "net peripheral", "misc peripheral", "anonymous" -}; - -static char *iflags[] = { - "bogus", "read only", "removable", "console in", "console out", - "input", "output" -}; - -static void __init -dump_component(pcomponent *p) -{ - printk("[%p]:class<%s>type<%s>flags<%s>ver<%d>rev<%d>", - p, classes[p->class], types[p->type], - iflags[p->iflags], p->vers, p->rev); - printk("key<%08lx>\n\tamask<%08lx>cdsize<%d>ilen<%d>iname<%s>\n", - p->key, p->amask, (int)p->cdsize, (int)p->ilen, p->iname); -} - -static void __init -traverse(pcomponent *p, int op) -{ - dump_component(p); - if(ArcGetChild(p)) - traverse(ArcGetChild(p), 1); - if(ArcGetPeer(p) && op) - traverse(ArcGetPeer(p), 1); -} - -void __init -prom_testtree(void) -{ - pcomponent *p; - - p = ArcGetChild(PROM_NULL_COMPONENT); - dump_component(p); - p = ArcGetChild(p); - while(p) { - dump_component(p); - p = ArcGetPeer(p); - } -} - -#endif /* DEBUG_PROM_TREE */ diff --git a/arch/mips/fw/sni/sniprom.c b/arch/mips/fw/sni/sniprom.c index 8772617b64ce..80112f2298b6 100644 --- a/arch/mips/fw/sni/sniprom.c +++ b/arch/mips/fw/sni/sniprom.c @@ -43,7 +43,7 @@ /* O32 stack has to be 8-byte aligned. */ static u64 o32_stk[4096]; -#define O32_STK &o32_stk[sizeof(o32_stk)] +#define O32_STK (&o32_stk[ARRAY_SIZE(o32_stk)]) #define __PROM_O32(fun, arg) fun arg __asm__(#fun); \ __asm__(#fun " = call_o32") diff --git a/arch/mips/generic/init.c b/arch/mips/generic/init.c index d5b8c4717ded..1de215b283d6 100644 --- a/arch/mips/generic/init.c +++ b/arch/mips/generic/init.c @@ -20,9 +20,9 @@ #include <asm/smp-ops.h> #include <asm/time.h> -static __initdata const void *fdt; -static __initdata const struct mips_machine *mach; -static __initdata const void *mach_match_data; +static __initconst const void *fdt; +static __initconst const struct mips_machine *mach; +static __initconst const void *mach_match_data; void __init prom_init(void) { diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index bb8658cc7f12..e5ac88392d1f 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -20,157 +20,176 @@ #include <asm/compiler.h> #include <asm/cpu-features.h> #include <asm/cmpxchg.h> +#include <asm/llsc.h> +#include <asm/sync.h> #include <asm/war.h> -/* - * Using a branch-likely instruction to check the result of an sc instruction - * works around a bug present in R10000 CPUs prior to revision 3.0 that could - * cause ll-sc sequences to execute non-atomically. - */ -#if R10000_LLSC_WAR -# define __scbeqz "beqzl" -#else -# define __scbeqz "beqz" -#endif - -#define ATOMIC_INIT(i) { (i) } +#define ATOMIC_OPS(pfx, type) \ +static __always_inline type pfx##_read(const pfx##_t *v) \ +{ \ + return READ_ONCE(v->counter); \ +} \ + \ +static __always_inline void pfx##_set(pfx##_t *v, type i) \ +{ \ + WRITE_ONCE(v->counter, i); \ +} \ + \ +static __always_inline type pfx##_cmpxchg(pfx##_t *v, type o, type n) \ +{ \ + return cmpxchg(&v->counter, o, n); \ +} \ + \ +static __always_inline type pfx##_xchg(pfx##_t *v, type n) \ +{ \ + return xchg(&v->counter, n); \ +} -/* - * atomic_read - read atomic variable - * @v: pointer of type atomic_t - * - * Atomically reads the value of @v. - */ -#define atomic_read(v) READ_ONCE((v)->counter) +#define ATOMIC_INIT(i) { (i) } +ATOMIC_OPS(atomic, int) -/* - * atomic_set - set atomic variable - * @v: pointer of type atomic_t - * @i: required value - * - * Atomically sets the value of @v to @i. - */ -#define atomic_set(v, i) WRITE_ONCE((v)->counter, (i)) +#ifdef CONFIG_64BIT +# define ATOMIC64_INIT(i) { (i) } +ATOMIC_OPS(atomic64, s64) +#endif -#define ATOMIC_OP(op, c_op, asm_op) \ -static __inline__ void atomic_##op(int i, atomic_t * v) \ -{ \ - if (kernel_uses_llsc) { \ - int temp; \ - \ - loongson_llsc_mb(); \ - __asm__ __volatile__( \ - " .set push \n" \ - " .set "MIPS_ISA_LEVEL" \n" \ - "1: ll %0, %1 # atomic_" #op " \n" \ - " " #asm_op " %0, %2 \n" \ - " sc %0, %1 \n" \ - "\t" __scbeqz " %0, 1b \n" \ - " .set pop \n" \ - : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) \ - : "Ir" (i) : __LLSC_CLOBBER); \ - } else { \ - unsigned long flags; \ - \ - raw_local_irq_save(flags); \ - v->counter c_op i; \ - raw_local_irq_restore(flags); \ - } \ +#define ATOMIC_OP(pfx, op, type, c_op, asm_op, ll, sc) \ +static __inline__ void pfx##_##op(type i, pfx##_t * v) \ +{ \ + type temp; \ + \ + if (!kernel_uses_llsc) { \ + unsigned long flags; \ + \ + raw_local_irq_save(flags); \ + v->counter c_op i; \ + raw_local_irq_restore(flags); \ + return; \ + } \ + \ + __asm__ __volatile__( \ + " .set push \n" \ + " .set " MIPS_ISA_LEVEL " \n" \ + " " __SYNC(full, loongson3_war) " \n" \ + "1: " #ll " %0, %1 # " #pfx "_" #op " \n" \ + " " #asm_op " %0, %2 \n" \ + " " #sc " %0, %1 \n" \ + "\t" __SC_BEQZ "%0, 1b \n" \ + " .set pop \n" \ + : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) \ + : "Ir" (i) : __LLSC_CLOBBER); \ } -#define ATOMIC_OP_RETURN(op, c_op, asm_op) \ -static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v) \ -{ \ - int result; \ - \ - if (kernel_uses_llsc) { \ - int temp; \ - \ - loongson_llsc_mb(); \ - __asm__ __volatile__( \ - " .set push \n" \ - " .set "MIPS_ISA_LEVEL" \n" \ - "1: ll %1, %2 # atomic_" #op "_return \n" \ - " " #asm_op " %0, %1, %3 \n" \ - " sc %0, %2 \n" \ - "\t" __scbeqz " %0, 1b \n" \ - " " #asm_op " %0, %1, %3 \n" \ - " .set pop \n" \ - : "=&r" (result), "=&r" (temp), \ - "+" GCC_OFF_SMALL_ASM() (v->counter) \ - : "Ir" (i) : __LLSC_CLOBBER); \ - } else { \ - unsigned long flags; \ - \ - raw_local_irq_save(flags); \ - result = v->counter; \ - result c_op i; \ - v->counter = result; \ - raw_local_irq_restore(flags); \ - } \ - \ - return result; \ +#define ATOMIC_OP_RETURN(pfx, op, type, c_op, asm_op, ll, sc) \ +static __inline__ type pfx##_##op##_return_relaxed(type i, pfx##_t * v) \ +{ \ + type temp, result; \ + \ + if (!kernel_uses_llsc) { \ + unsigned long flags; \ + \ + raw_local_irq_save(flags); \ + result = v->counter; \ + result c_op i; \ + v->counter = result; \ + raw_local_irq_restore(flags); \ + return result; \ + } \ + \ + __asm__ __volatile__( \ + " .set push \n" \ + " .set " MIPS_ISA_LEVEL " \n" \ + " " __SYNC(full, loongson3_war) " \n" \ + "1: " #ll " %1, %2 # " #pfx "_" #op "_return\n" \ + " " #asm_op " %0, %1, %3 \n" \ + " " #sc " %0, %2 \n" \ + "\t" __SC_BEQZ "%0, 1b \n" \ + " " #asm_op " %0, %1, %3 \n" \ + " .set pop \n" \ + : "=&r" (result), "=&r" (temp), \ + "+" GCC_OFF_SMALL_ASM() (v->counter) \ + : "Ir" (i) : __LLSC_CLOBBER); \ + \ + return result; \ } -#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ -static __inline__ int atomic_fetch_##op##_relaxed(int i, atomic_t * v) \ -{ \ - int result; \ - \ - if (kernel_uses_llsc) { \ - int temp; \ - \ - loongson_llsc_mb(); \ - __asm__ __volatile__( \ - " .set push \n" \ - " .set "MIPS_ISA_LEVEL" \n" \ - "1: ll %1, %2 # atomic_fetch_" #op " \n" \ - " " #asm_op " %0, %1, %3 \n" \ - " sc %0, %2 \n" \ - "\t" __scbeqz " %0, 1b \n" \ - " .set pop \n" \ - " move %0, %1 \n" \ - : "=&r" (result), "=&r" (temp), \ - "+" GCC_OFF_SMALL_ASM() (v->counter) \ - : "Ir" (i) : __LLSC_CLOBBER); \ - } else { \ - unsigned long flags; \ - \ - raw_local_irq_save(flags); \ - result = v->counter; \ - v->counter c_op i; \ - raw_local_irq_restore(flags); \ - } \ - \ - return result; \ +#define ATOMIC_FETCH_OP(pfx, op, type, c_op, asm_op, ll, sc) \ +static __inline__ type pfx##_fetch_##op##_relaxed(type i, pfx##_t * v) \ +{ \ + int temp, result; \ + \ + if (!kernel_uses_llsc) { \ + unsigned long flags; \ + \ + raw_local_irq_save(flags); \ + result = v->counter; \ + v->counter c_op i; \ + raw_local_irq_restore(flags); \ + return result; \ + } \ + \ + __asm__ __volatile__( \ + " .set push \n" \ + " .set " MIPS_ISA_LEVEL " \n" \ + " " __SYNC(full, loongson3_war) " \n" \ + "1: " #ll " %1, %2 # " #pfx "_fetch_" #op "\n" \ + " " #asm_op " %0, %1, %3 \n" \ + " " #sc " %0, %2 \n" \ + "\t" __SC_BEQZ "%0, 1b \n" \ + " .set pop \n" \ + " move %0, %1 \n" \ + : "=&r" (result), "=&r" (temp), \ + "+" GCC_OFF_SMALL_ASM() (v->counter) \ + : "Ir" (i) : __LLSC_CLOBBER); \ + \ + return result; \ } -#define ATOMIC_OPS(op, c_op, asm_op) \ - ATOMIC_OP(op, c_op, asm_op) \ - ATOMIC_OP_RETURN(op, c_op, asm_op) \ - ATOMIC_FETCH_OP(op, c_op, asm_op) +#undef ATOMIC_OPS +#define ATOMIC_OPS(pfx, op, type, c_op, asm_op, ll, sc) \ + ATOMIC_OP(pfx, op, type, c_op, asm_op, ll, sc) \ + ATOMIC_OP_RETURN(pfx, op, type, c_op, asm_op, ll, sc) \ + ATOMIC_FETCH_OP(pfx, op, type, c_op, asm_op, ll, sc) -ATOMIC_OPS(add, +=, addu) -ATOMIC_OPS(sub, -=, subu) +ATOMIC_OPS(atomic, add, int, +=, addu, ll, sc) +ATOMIC_OPS(atomic, sub, int, -=, subu, ll, sc) #define atomic_add_return_relaxed atomic_add_return_relaxed #define atomic_sub_return_relaxed atomic_sub_return_relaxed #define atomic_fetch_add_relaxed atomic_fetch_add_relaxed #define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed +#ifdef CONFIG_64BIT +ATOMIC_OPS(atomic64, add, s64, +=, daddu, lld, scd) +ATOMIC_OPS(atomic64, sub, s64, -=, dsubu, lld, scd) +# define atomic64_add_return_relaxed atomic64_add_return_relaxed +# define atomic64_sub_return_relaxed atomic64_sub_return_relaxed +# define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed +# define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed +#endif /* CONFIG_64BIT */ + #undef ATOMIC_OPS -#define ATOMIC_OPS(op, c_op, asm_op) \ - ATOMIC_OP(op, c_op, asm_op) \ - ATOMIC_FETCH_OP(op, c_op, asm_op) +#define ATOMIC_OPS(pfx, op, type, c_op, asm_op, ll, sc) \ + ATOMIC_OP(pfx, op, type, c_op, asm_op, ll, sc) \ + ATOMIC_FETCH_OP(pfx, op, type, c_op, asm_op, ll, sc) -ATOMIC_OPS(and, &=, and) -ATOMIC_OPS(or, |=, or) -ATOMIC_OPS(xor, ^=, xor) +ATOMIC_OPS(atomic, and, int, &=, and, ll, sc) +ATOMIC_OPS(atomic, or, int, |=, or, ll, sc) +ATOMIC_OPS(atomic, xor, int, ^=, xor, ll, sc) #define atomic_fetch_and_relaxed atomic_fetch_and_relaxed #define atomic_fetch_or_relaxed atomic_fetch_or_relaxed #define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed +#ifdef CONFIG_64BIT +ATOMIC_OPS(atomic64, and, s64, &=, and, lld, scd) +ATOMIC_OPS(atomic64, or, s64, |=, or, lld, scd) +ATOMIC_OPS(atomic64, xor, s64, ^=, xor, lld, scd) +# define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed +# define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed +# define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed +#endif + #undef ATOMIC_OPS #undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN @@ -184,258 +203,66 @@ ATOMIC_OPS(xor, ^=, xor) * Atomically test @v and subtract @i if @v is greater or equal than @i. * The function returns the old value of @v minus @i. */ -static __inline__ int atomic_sub_if_positive(int i, atomic_t * v) -{ - int result; - - smp_mb__before_llsc(); - - if (kernel_uses_llsc) { - int temp; - - loongson_llsc_mb(); - __asm__ __volatile__( - " .set push \n" - " .set "MIPS_ISA_LEVEL" \n" - "1: ll %1, %2 # atomic_sub_if_positive\n" - " .set pop \n" - " subu %0, %1, %3 \n" - " move %1, %0 \n" - " bltz %0, 2f \n" - " .set push \n" - " .set "MIPS_ISA_LEVEL" \n" - " sc %1, %2 \n" - "\t" __scbeqz " %1, 1b \n" - "2: \n" - " .set pop \n" - : "=&r" (result), "=&r" (temp), - "+" GCC_OFF_SMALL_ASM() (v->counter) - : "Ir" (i) : __LLSC_CLOBBER); - } else { - unsigned long flags; - - raw_local_irq_save(flags); - result = v->counter; - result -= i; - if (result >= 0) - v->counter = result; - raw_local_irq_restore(flags); - } - - smp_llsc_mb(); - - return result; +#define ATOMIC_SIP_OP(pfx, type, op, ll, sc) \ +static __inline__ int pfx##_sub_if_positive(type i, pfx##_t * v) \ +{ \ + type temp, result; \ + \ + smp_mb__before_atomic(); \ + \ + if (!kernel_uses_llsc) { \ + unsigned long flags; \ + \ + raw_local_irq_save(flags); \ + result = v->counter; \ + result -= i; \ + if (result >= 0) \ + v->counter = result; \ + raw_local_irq_restore(flags); \ + smp_mb__after_atomic(); \ + return result; \ + } \ + \ + __asm__ __volatile__( \ + " .set push \n" \ + " .set " MIPS_ISA_LEVEL " \n" \ + " " __SYNC(full, loongson3_war) " \n" \ + "1: " #ll " %1, %2 # atomic_sub_if_positive\n" \ + " .set pop \n" \ + " " #op " %0, %1, %3 \n" \ + " move %1, %0 \n" \ + " bltz %0, 2f \n" \ + " .set push \n" \ + " .set " MIPS_ISA_LEVEL " \n" \ + " " #sc " %1, %2 \n" \ + " " __SC_BEQZ "%1, 1b \n" \ + "2: " __SYNC(full, loongson3_war) " \n" \ + " .set pop \n" \ + : "=&r" (result), "=&r" (temp), \ + "+" GCC_OFF_SMALL_ASM() (v->counter) \ + : "Ir" (i) \ + : __LLSC_CLOBBER); \ + \ + /* \ + * In the Loongson3 workaround case we already have a \ + * completion barrier at 2: above, which is needed due to the \ + * bltz that can branch to code outside of the LL/SC loop. As \ + * such, we don't need to emit another barrier here. \ + */ \ + if (!__SYNC_loongson3_war) \ + smp_mb__after_atomic(); \ + \ + return result; \ } -#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n))) -#define atomic_xchg(v, new) (xchg(&((v)->counter), (new))) - -/* - * atomic_dec_if_positive - decrement by 1 if old value positive - * @v: pointer of type atomic_t - */ +ATOMIC_SIP_OP(atomic, int, subu, ll, sc) #define atomic_dec_if_positive(v) atomic_sub_if_positive(1, v) #ifdef CONFIG_64BIT - -#define ATOMIC64_INIT(i) { (i) } - -/* - * atomic64_read - read atomic variable - * @v: pointer of type atomic64_t - * - */ -#define atomic64_read(v) READ_ONCE((v)->counter) - -/* - * atomic64_set - set atomic variable - * @v: pointer of type atomic64_t - * @i: required value - */ -#define atomic64_set(v, i) WRITE_ONCE((v)->counter, (i)) - -#define ATOMIC64_OP(op, c_op, asm_op) \ -static __inline__ void atomic64_##op(s64 i, atomic64_t * v) \ -{ \ - if (kernel_uses_llsc) { \ - s64 temp; \ - \ - loongson_llsc_mb(); \ - __asm__ __volatile__( \ - " .set push \n" \ - " .set "MIPS_ISA_LEVEL" \n" \ - "1: lld %0, %1 # atomic64_" #op " \n" \ - " " #asm_op " %0, %2 \n" \ - " scd %0, %1 \n" \ - "\t" __scbeqz " %0, 1b \n" \ - " .set pop \n" \ - : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) \ - : "Ir" (i) : __LLSC_CLOBBER); \ - } else { \ - unsigned long flags; \ - \ - raw_local_irq_save(flags); \ - v->counter c_op i; \ - raw_local_irq_restore(flags); \ - } \ -} - -#define ATOMIC64_OP_RETURN(op, c_op, asm_op) \ -static __inline__ s64 atomic64_##op##_return_relaxed(s64 i, atomic64_t * v) \ -{ \ - s64 result; \ - \ - if (kernel_uses_llsc) { \ - s64 temp; \ - \ - loongson_llsc_mb(); \ - __asm__ __volatile__( \ - " .set push \n" \ - " .set "MIPS_ISA_LEVEL" \n" \ - "1: lld %1, %2 # atomic64_" #op "_return\n" \ - " " #asm_op " %0, %1, %3 \n" \ - " scd %0, %2 \n" \ - "\t" __scbeqz " %0, 1b \n" \ - " " #asm_op " %0, %1, %3 \n" \ - " .set pop \n" \ - : "=&r" (result), "=&r" (temp), \ - "+" GCC_OFF_SMALL_ASM() (v->counter) \ - : "Ir" (i) : __LLSC_CLOBBER); \ - } else { \ - unsigned long flags; \ - \ - raw_local_irq_save(flags); \ - result = v->counter; \ - result c_op i; \ - v->counter = result; \ - raw_local_irq_restore(flags); \ - } \ - \ - return result; \ -} - -#define ATOMIC64_FETCH_OP(op, c_op, asm_op) \ -static __inline__ s64 atomic64_fetch_##op##_relaxed(s64 i, atomic64_t * v) \ -{ \ - s64 result; \ - \ - if (kernel_uses_llsc) { \ - s64 temp; \ - \ - loongson_llsc_mb(); \ - __asm__ __volatile__( \ - " .set push \n" \ - " .set "MIPS_ISA_LEVEL" \n" \ - "1: lld %1, %2 # atomic64_fetch_" #op "\n" \ - " " #asm_op " %0, %1, %3 \n" \ - " scd %0, %2 \n" \ - "\t" __scbeqz " %0, 1b \n" \ - " move %0, %1 \n" \ - " .set pop \n" \ - : "=&r" (result), "=&r" (temp), \ - "+" GCC_OFF_SMALL_ASM() (v->counter) \ - : "Ir" (i) : __LLSC_CLOBBER); \ - } else { \ - unsigned long flags; \ - \ - raw_local_irq_save(flags); \ - result = v->counter; \ - v->counter c_op i; \ - raw_local_irq_restore(flags); \ - } \ - \ - return result; \ -} - -#define ATOMIC64_OPS(op, c_op, asm_op) \ - ATOMIC64_OP(op, c_op, asm_op) \ - ATOMIC64_OP_RETURN(op, c_op, asm_op) \ - ATOMIC64_FETCH_OP(op, c_op, asm_op) - -ATOMIC64_OPS(add, +=, daddu) -ATOMIC64_OPS(sub, -=, dsubu) - -#define atomic64_add_return_relaxed atomic64_add_return_relaxed -#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed -#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed -#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed - -#undef ATOMIC64_OPS -#define ATOMIC64_OPS(op, c_op, asm_op) \ - ATOMIC64_OP(op, c_op, asm_op) \ - ATOMIC64_FETCH_OP(op, c_op, asm_op) - -ATOMIC64_OPS(and, &=, and) -ATOMIC64_OPS(or, |=, or) -ATOMIC64_OPS(xor, ^=, xor) - -#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed -#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed -#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed - -#undef ATOMIC64_OPS -#undef ATOMIC64_FETCH_OP -#undef ATOMIC64_OP_RETURN -#undef ATOMIC64_OP - -/* - * atomic64_sub_if_positive - conditionally subtract integer from atomic - * variable - * @i: integer value to subtract - * @v: pointer of type atomic64_t - * - * Atomically test @v and subtract @i if @v is greater or equal than @i. - * The function returns the old value of @v minus @i. - */ -static __inline__ s64 atomic64_sub_if_positive(s64 i, atomic64_t * v) -{ - s64 result; - - smp_mb__before_llsc(); - - if (kernel_uses_llsc) { - s64 temp; - - __asm__ __volatile__( - " .set push \n" - " .set "MIPS_ISA_LEVEL" \n" - "1: lld %1, %2 # atomic64_sub_if_positive\n" - " dsubu %0, %1, %3 \n" - " move %1, %0 \n" - " bltz %0, 1f \n" - " scd %1, %2 \n" - "\t" __scbeqz " %1, 1b \n" - "1: \n" - " .set pop \n" - : "=&r" (result), "=&r" (temp), - "+" GCC_OFF_SMALL_ASM() (v->counter) - : "Ir" (i)); - } else { - unsigned long flags; - - raw_local_irq_save(flags); - result = v->counter; - result -= i; - if (result >= 0) - v->counter = result; - raw_local_irq_restore(flags); - } - - smp_llsc_mb(); - - return result; -} - -#define atomic64_cmpxchg(v, o, n) \ - ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n))) -#define atomic64_xchg(v, new) (xchg(&((v)->counter), (new))) - -/* - * atomic64_dec_if_positive - decrement by 1 if old value positive - * @v: pointer of type atomic64_t - */ +ATOMIC_SIP_OP(atomic64, s64, dsubu, lld, scd) #define atomic64_dec_if_positive(v) atomic64_sub_if_positive(1, v) +#endif -#endif /* CONFIG_64BIT */ +#undef ATOMIC_SIP_OP #endif /* _ASM_ATOMIC_H */ diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h index 9228f7386220..49ff172a72b9 100644 --- a/arch/mips/include/asm/barrier.h +++ b/arch/mips/include/asm/barrier.h @@ -9,131 +9,26 @@ #define __ASM_BARRIER_H #include <asm/addrspace.h> +#include <asm/sync.h> -/* - * Sync types defined by the MIPS architecture (document MD00087 table 6.5) - * These values are used with the sync instruction to perform memory barriers. - * Types of ordering guarantees available through the SYNC instruction: - * - Completion Barriers - * - Ordering Barriers - * As compared to the completion barrier, the ordering barrier is a - * lighter-weight operation as it does not require the specified instructions - * before the SYNC to be already completed. Instead it only requires that those - * specified instructions which are subsequent to the SYNC in the instruction - * stream are never re-ordered for processing ahead of the specified - * instructions which are before the SYNC in the instruction stream. - * This potentially reduces how many cycles the barrier instruction must stall - * before it completes. - * Implementations that do not use any of the non-zero values of stype to define - * different barriers, such as ordering barriers, must make those stype values - * act the same as stype zero. - */ - -/* - * Completion barriers: - * - Every synchronizable specified memory instruction (loads or stores or both) - * that occurs in the instruction stream before the SYNC instruction must be - * already globally performed before any synchronizable specified memory - * instructions that occur after the SYNC are allowed to be performed, with - * respect to any other processor or coherent I/O module. - * - * - The barrier does not guarantee the order in which instruction fetches are - * performed. - * - * - A stype value of zero will always be defined such that it performs the most - * complete set of synchronization operations that are defined.This means - * stype zero always does a completion barrier that affects both loads and - * stores preceding the SYNC instruction and both loads and stores that are - * subsequent to the SYNC instruction. Non-zero values of stype may be defined - * by the architecture or specific implementations to perform synchronization - * behaviors that are less complete than that of stype zero. If an - * implementation does not use one of these non-zero values to define a - * different synchronization behavior, then that non-zero value of stype must - * act the same as stype zero completion barrier. This allows software written - * for an implementation with a lighter-weight barrier to work on another - * implementation which only implements the stype zero completion barrier. - * - * - A completion barrier is required, potentially in conjunction with SSNOP (in - * Release 1 of the Architecture) or EHB (in Release 2 of the Architecture), - * to guarantee that memory reference results are visible across operating - * mode changes. For example, a completion barrier is required on some - * implementations on entry to and exit from Debug Mode to guarantee that - * memory effects are handled correctly. - */ - -/* - * stype 0 - A completion barrier that affects preceding loads and stores and - * subsequent loads and stores. - * Older instructions which must reach the load/store ordering point before the - * SYNC instruction completes: Loads, Stores - * Younger instructions which must reach the load/store ordering point only - * after the SYNC instruction completes: Loads, Stores - * Older instructions which must be globally performed when the SYNC instruction - * completes: Loads, Stores - */ -#define STYPE_SYNC 0x0 - -/* - * Ordering barriers: - * - Every synchronizable specified memory instruction (loads or stores or both) - * that occurs in the instruction stream before the SYNC instruction must - * reach a stage in the load/store datapath after which no instruction - * re-ordering is possible before any synchronizable specified memory - * instruction which occurs after the SYNC instruction in the instruction - * stream reaches the same stage in the load/store datapath. - * - * - If any memory instruction before the SYNC instruction in program order, - * generates a memory request to the external memory and any memory - * instruction after the SYNC instruction in program order also generates a - * memory request to external memory, the memory request belonging to the - * older instruction must be globally performed before the time the memory - * request belonging to the younger instruction is globally performed. - * - * - The barrier does not guarantee the order in which instruction fetches are - * performed. - */ +static inline void __sync(void) +{ + asm volatile(__SYNC(full, always) ::: "memory"); +} -/* - * stype 0x10 - An ordering barrier that affects preceding loads and stores and - * subsequent loads and stores. - * Older instructions which must reach the load/store ordering point before the - * SYNC instruction completes: Loads, Stores - * Younger instructions which must reach the load/store ordering point only - * after the SYNC instruction completes: Loads, Stores - * Older instructions which must be globally performed when the SYNC instruction - * completes: N/A - */ -#define STYPE_SYNC_MB 0x10 +static inline void rmb(void) +{ + asm volatile(__SYNC(rmb, always) ::: "memory"); +} +#define rmb rmb -/* - * stype 0x14 - A completion barrier specific to global invalidations - * - * When a sync instruction of this type completes any preceding GINVI or GINVT - * operation has been globalized & completed on all coherent CPUs. Anything - * that the GINV* instruction should invalidate will have been invalidated on - * all coherent CPUs when this instruction completes. It is implementation - * specific whether the GINV* instructions themselves will ensure completion, - * or this sync type will. - * - * In systems implementing global invalidates (ie. with Config5.GI == 2 or 3) - * this sync type also requires that previous SYNCI operations have completed. - */ -#define STYPE_GINV 0x14 +static inline void wmb(void) +{ + asm volatile(__SYNC(wmb, always) ::: "memory"); +} +#define wmb wmb -#ifdef CONFIG_CPU_HAS_SYNC -#define __sync() \ - __asm__ __volatile__( \ - ".set push\n\t" \ - ".set noreorder\n\t" \ - ".set mips2\n\t" \ - "sync\n\t" \ - ".set pop" \ - : /* no output */ \ - : /* no input */ \ - : "memory") -#else -#define __sync() do { } while(0) -#endif +#define fast_mb() __sync() #define __fast_iob() \ __asm__ __volatile__( \ @@ -146,17 +41,8 @@ : "m" (*(int *)CKSEG1) \ : "memory") #ifdef CONFIG_CPU_CAVIUM_OCTEON -# define OCTEON_SYNCW_STR ".set push\n.set arch=octeon\nsyncw\nsyncw\n.set pop\n" -# define __syncw() __asm__ __volatile__(OCTEON_SYNCW_STR : : : "memory") - -# define fast_wmb() __syncw() -# define fast_rmb() barrier() -# define fast_mb() __sync() # define fast_iob() do { } while (0) #else /* ! CONFIG_CPU_CAVIUM_OCTEON */ -# define fast_wmb() __sync() -# define fast_rmb() __sync() -# define fast_mb() __sync() # ifdef CONFIG_SGI_IP28 # define fast_iob() \ __asm__ __volatile__( \ @@ -192,23 +78,14 @@ #endif /* !CONFIG_CPU_HAS_WB */ -#define wmb() fast_wmb() -#define rmb() fast_rmb() - #if defined(CONFIG_WEAK_ORDERING) -# ifdef CONFIG_CPU_CAVIUM_OCTEON -# define __smp_mb() __sync() -# define __smp_rmb() barrier() -# define __smp_wmb() __syncw() -# else -# define __smp_mb() __asm__ __volatile__("sync" : : :"memory") -# define __smp_rmb() __asm__ __volatile__("sync" : : :"memory") -# define __smp_wmb() __asm__ __volatile__("sync" : : :"memory") -# endif +# define __smp_mb() __sync() +# define __smp_rmb() rmb() +# define __smp_wmb() wmb() #else -#define __smp_mb() barrier() -#define __smp_rmb() barrier() -#define __smp_wmb() barrier() +# define __smp_mb() barrier() +# define __smp_rmb() barrier() +# define __smp_wmb() barrier() #endif /* @@ -218,13 +95,14 @@ * ordering will be done by smp_llsc_mb() and friends. */ #if defined(CONFIG_WEAK_REORDERING_BEYOND_LLSC) && defined(CONFIG_SMP) -#define __WEAK_LLSC_MB " sync \n" -#define smp_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory") -#define __LLSC_CLOBBER +# define __WEAK_LLSC_MB sync +# define smp_llsc_mb() \ + __asm__ __volatile__(__stringify(__WEAK_LLSC_MB) : : :"memory") +# define __LLSC_CLOBBER #else -#define __WEAK_LLSC_MB " \n" -#define smp_llsc_mb() do { } while (0) -#define __LLSC_CLOBBER "memory" +# define __WEAK_LLSC_MB +# define smp_llsc_mb() do { } while (0) +# define __LLSC_CLOBBER "memory" #endif #ifdef CONFIG_CPU_CAVIUM_OCTEON @@ -241,52 +119,22 @@ #define nudge_writes() mb() #endif -#define __smp_mb__before_atomic() __smp_mb__before_llsc() -#define __smp_mb__after_atomic() smp_llsc_mb() - /* - * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load, - * store or prefetch) in between an LL & SC can cause the SC instruction to - * erroneously succeed, breaking atomicity. Whilst it's unusual to write code - * containing such sequences, this bug bites harder than we might otherwise - * expect due to reordering & speculation: - * - * 1) A memory access appearing prior to the LL in program order may actually - * be executed after the LL - this is the reordering case. - * - * In order to avoid this we need to place a memory barrier (ie. a SYNC - * instruction) prior to every LL instruction, in between it and any earlier - * memory access instructions. - * - * This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later. - * - * 2) If a conditional branch exists between an LL & SC with a target outside - * of the LL-SC loop, for example an exit upon value mismatch in cmpxchg() - * or similar, then misprediction of the branch may allow speculative - * execution of memory accesses from outside of the LL-SC loop. - * - * In order to avoid this we need a memory barrier (ie. a SYNC instruction) - * at each affected branch target, for which we also use loongson_llsc_mb() - * defined below. - * - * This case affects all current Loongson 3 CPUs. - * - * The above described cases cause an error in the cache coherence protocol; - * such that the Invalidate of a competing LL-SC goes 'missing' and SC - * erroneously observes its core still has Exclusive state and lets the SC - * proceed. - * - * Therefore the error only occurs on SMP systems. + * In the Loongson3 LL/SC workaround case, all of our LL/SC loops already have + * a completion barrier immediately preceding the LL instruction. Therefore we + * can skip emitting a barrier from __smp_mb__before_atomic(). */ -#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS /* Loongson-3's LLSC workaround */ -#define loongson_llsc_mb() __asm__ __volatile__("sync" : : :"memory") +#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS +# define __smp_mb__before_atomic() #else -#define loongson_llsc_mb() do { } while (0) +# define __smp_mb__before_atomic() __smp_mb__before_llsc() #endif +#define __smp_mb__after_atomic() smp_llsc_mb() + static inline void sync_ginv(void) { - asm volatile("sync\t%0" :: "i"(STYPE_GINV)); + asm volatile(__SYNC(ginv, always)); } #include <asm-generic/barrier.h> diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h index 985d6a02f9ea..a74769940fbd 100644 --- a/arch/mips/include/asm/bitops.h +++ b/arch/mips/include/asm/bitops.h @@ -13,16 +13,55 @@ #error only <linux/bitops.h> can be included directly #endif +#include <linux/bits.h> #include <linux/compiler.h> #include <linux/types.h> #include <asm/barrier.h> #include <asm/byteorder.h> /* sigh ... */ #include <asm/compiler.h> #include <asm/cpu-features.h> +#include <asm/isa-rev.h> #include <asm/llsc.h> #include <asm/sgidefs.h> #include <asm/war.h> +#define __bit_op(mem, insn, inputs...) do { \ + unsigned long temp; \ + \ + asm volatile( \ + " .set push \n" \ + " .set " MIPS_ISA_LEVEL " \n" \ + " " __SYNC(full, loongson3_war) " \n" \ + "1: " __LL "%0, %1 \n" \ + " " insn " \n" \ + " " __SC "%0, %1 \n" \ + " " __SC_BEQZ "%0, 1b \n" \ + " .set pop \n" \ + : "=&r"(temp), "+" GCC_OFF_SMALL_ASM()(mem) \ + : inputs \ + : __LLSC_CLOBBER); \ +} while (0) + +#define __test_bit_op(mem, ll_dst, insn, inputs...) ({ \ + unsigned long orig, temp; \ + \ + asm volatile( \ + " .set push \n" \ + " .set " MIPS_ISA_LEVEL " \n" \ + " " __SYNC(full, loongson3_war) " \n" \ + "1: " __LL ll_dst ", %2 \n" \ + " " insn " \n" \ + " " __SC "%1, %2 \n" \ + " " __SC_BEQZ "%1, 1b \n" \ + " .set pop \n" \ + : "=&r"(orig), "=&r"(temp), \ + "+" GCC_OFF_SMALL_ASM()(mem) \ + : inputs \ + : __LLSC_CLOBBER); \ + \ + orig; \ +}) + /* * These are the "slower" versions of the functions and are in bitops.c. * These functions call raw_local_irq_{save,restore}(). @@ -30,8 +69,6 @@ void __mips_set_bit(unsigned long nr, volatile unsigned long *addr); void __mips_clear_bit(unsigned long nr, volatile unsigned long *addr); void __mips_change_bit(unsigned long nr, volatile unsigned long *addr); -int __mips_test_and_set_bit(unsigned long nr, - volatile unsigned long *addr); int __mips_test_and_set_bit_lock(unsigned long nr, volatile unsigned long *addr); int __mips_test_and_clear_bit(unsigned long nr, @@ -52,51 +89,20 @@ int __mips_test_and_change_bit(unsigned long nr, */ static inline void set_bit(unsigned long nr, volatile unsigned long *addr) { - unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); - int bit = nr & SZLONG_MASK; - unsigned long temp; + volatile unsigned long *m = &addr[BIT_WORD(nr)]; + int bit = nr % BITS_PER_LONG; - if (kernel_uses_llsc && R10000_LLSC_WAR) { - __asm__ __volatile__( - " .set push \n" - " .set arch=r4000 \n" - "1: " __LL "%0, %1 # set_bit \n" - " or %0, %2 \n" - " " __SC "%0, %1 \n" - " beqzl %0, 1b \n" - " .set pop \n" - : "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*m) - : "ir" (1UL << bit), GCC_OFF_SMALL_ASM() (*m) - : __LLSC_CLOBBER); -#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) - } else if (kernel_uses_llsc && __builtin_constant_p(bit)) { - loongson_llsc_mb(); - do { - __asm__ __volatile__( - " " __LL "%0, %1 # set_bit \n" - " " __INS "%0, %3, %2, 1 \n" - " " __SC "%0, %1 \n" - : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m) - : "ir" (bit), "r" (~0) - : __LLSC_CLOBBER); - } while (unlikely(!temp)); -#endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */ - } else if (kernel_uses_llsc) { - loongson_llsc_mb(); - do { - __asm__ __volatile__( - " .set push \n" - " .set "MIPS_ISA_ARCH_LEVEL" \n" - " " __LL "%0, %1 # set_bit \n" - " or %0, %2 \n" - " " __SC "%0, %1 \n" - " .set pop \n" - : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m) - : "ir" (1UL << bit) - : __LLSC_CLOBBER); - } while (unlikely(!temp)); - } else + if (!kernel_uses_llsc) { __mips_set_bit(nr, addr); + return; + } + + if ((MIPS_ISA_REV >= 2) && __builtin_constant_p(bit) && (bit >= 16)) { + __bit_op(*m, __INS "%0, %3, %2, 1", "i"(bit), "r"(~0)); + return; + } + + __bit_op(*m, "or\t%0, %2", "ir"(BIT(bit))); } /* @@ -111,51 +117,20 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr) */ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr) { - unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); - int bit = nr & SZLONG_MASK; - unsigned long temp; + volatile unsigned long *m = &addr[BIT_WORD(nr)]; + int bit = nr % BITS_PER_LONG; - if (kernel_uses_llsc && R10000_LLSC_WAR) { - __asm__ __volatile__( - " .set push \n" - " .set arch=r4000 \n" - "1: " __LL "%0, %1 # clear_bit \n" - " and %0, %2 \n" - " " __SC "%0, %1 \n" - " beqzl %0, 1b \n" - " .set pop \n" - : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m) - : "ir" (~(1UL << bit)) - : __LLSC_CLOBBER); -#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) - } else if (kernel_uses_llsc && __builtin_constant_p(bit)) { - loongson_llsc_mb(); - do { - __asm__ __volatile__( - " " __LL "%0, %1 # clear_bit \n" - " " __INS "%0, $0, %2, 1 \n" - " " __SC "%0, %1 \n" - : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m) - : "ir" (bit) - : __LLSC_CLOBBER); - } while (unlikely(!temp)); -#endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */ - } else if (kernel_uses_llsc) { - loongson_llsc_mb(); - do { - __asm__ __volatile__( - " .set push \n" - " .set "MIPS_ISA_ARCH_LEVEL" \n" - " " __LL "%0, %1 # clear_bit \n" - " and %0, %2 \n" - " " __SC "%0, %1 \n" - " .set pop \n" - : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m) - : "ir" (~(1UL << bit)) - : __LLSC_CLOBBER); - } while (unlikely(!temp)); - } else + if (!kernel_uses_llsc) { __mips_clear_bit(nr, addr); + return; + } + + if ((MIPS_ISA_REV >= 2) && __builtin_constant_p(bit)) { + __bit_op(*m, __INS "%0, $0, %2, 1", "i"(bit)); + return; + } + + __bit_op(*m, "and\t%0, %2", "ir"(~BIT(bit))); } /* @@ -183,159 +158,61 @@ static inline void clear_bit_unlock(unsigned long nr, volatile unsigned long *ad */ static inline void change_bit(unsigned long nr, volatile unsigned long *addr) { - int bit = nr & SZLONG_MASK; - - if (kernel_uses_llsc && R10000_LLSC_WAR) { - unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); - unsigned long temp; - - __asm__ __volatile__( - " .set push \n" - " .set arch=r4000 \n" - "1: " __LL "%0, %1 # change_bit \n" - " xor %0, %2 \n" - " " __SC "%0, %1 \n" - " beqzl %0, 1b \n" - " .set pop \n" - : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m) - : "ir" (1UL << bit) - : __LLSC_CLOBBER); - } else if (kernel_uses_llsc) { - unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); - unsigned long temp; - - loongson_llsc_mb(); - do { - __asm__ __volatile__( - " .set push \n" - " .set "MIPS_ISA_ARCH_LEVEL" \n" - " " __LL "%0, %1 # change_bit \n" - " xor %0, %2 \n" - " " __SC "%0, %1 \n" - " .set pop \n" - : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m) - : "ir" (1UL << bit) - : __LLSC_CLOBBER); - } while (unlikely(!temp)); - } else + volatile unsigned long *m = &addr[BIT_WORD(nr)]; + int bit = nr % BITS_PER_LONG; + + if (!kernel_uses_llsc) { __mips_change_bit(nr, addr); + return; + } + + __bit_op(*m, "xor\t%0, %2", "ir"(BIT(bit))); } /* - * test_and_set_bit - Set a bit and return its old value + * test_and_set_bit_lock - Set a bit and return its old value * @nr: Bit to set * @addr: Address to count from * - * This operation is atomic and cannot be reordered. - * It also implies a memory barrier. + * This operation is atomic and implies acquire ordering semantics + * after the memory operation. */ -static inline int test_and_set_bit(unsigned long nr, +static inline int test_and_set_bit_lock(unsigned long nr, volatile unsigned long *addr) { - int bit = nr & SZLONG_MASK; - unsigned long res; + volatile unsigned long *m = &addr[BIT_WORD(nr)]; + int bit = nr % BITS_PER_LONG; + unsigned long res, orig; - smp_mb__before_llsc(); - - if (kernel_uses_llsc && R10000_LLSC_WAR) { - unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); - unsigned long temp; - - __asm__ __volatile__( - " .set push \n" - " .set arch=r4000 \n" - "1: " __LL "%0, %1 # test_and_set_bit \n" - " or %2, %0, %3 \n" - " " __SC "%2, %1 \n" - " beqzl %2, 1b \n" - " and %2, %0, %3 \n" - " .set pop \n" - : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res) - : "r" (1UL << bit) - : __LLSC_CLOBBER); - } else if (kernel_uses_llsc) { - unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); - unsigned long temp; - - loongson_llsc_mb(); - do { - __asm__ __volatile__( - " .set push \n" - " .set "MIPS_ISA_ARCH_LEVEL" \n" - " " __LL "%0, %1 # test_and_set_bit \n" - " or %2, %0, %3 \n" - " " __SC "%2, %1 \n" - " .set pop \n" - : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res) - : "r" (1UL << bit) - : __LLSC_CLOBBER); - } while (unlikely(!res)); - - res = temp & (1UL << bit); - } else - res = __mips_test_and_set_bit(nr, addr); + if (!kernel_uses_llsc) { + res = __mips_test_and_set_bit_lock(nr, addr); + } else { + orig = __test_bit_op(*m, "%0", + "or\t%1, %0, %3", + "ir"(BIT(bit))); + res = (orig & BIT(bit)) != 0; + } smp_llsc_mb(); - return res != 0; + return res; } /* - * test_and_set_bit_lock - Set a bit and return its old value + * test_and_set_bit - Set a bit and return its old value * @nr: Bit to set * @addr: Address to count from * - * This operation is atomic and implies acquire ordering semantics - * after the memory operation. + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. */ -static inline int test_and_set_bit_lock(unsigned long nr, +static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *addr) { - int bit = nr & SZLONG_MASK; - unsigned long res; - - if (kernel_uses_llsc && R10000_LLSC_WAR) { - unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); - unsigned long temp; - - __asm__ __volatile__( - " .set push \n" - " .set arch=r4000 \n" - "1: " __LL "%0, %1 # test_and_set_bit \n" - " or %2, %0, %3 \n" - " " __SC "%2, %1 \n" - " beqzl %2, 1b \n" - " and %2, %0, %3 \n" - " .set pop \n" - : "=&r" (temp), "+m" (*m), "=&r" (res) - : "r" (1UL << bit) - : __LLSC_CLOBBER); - } else if (kernel_uses_llsc) { - unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); - unsigned long temp; - - loongson_llsc_mb(); - do { - __asm__ __volatile__( - " .set push \n" - " .set "MIPS_ISA_ARCH_LEVEL" \n" - " " __LL "%0, %1 # test_and_set_bit \n" - " or %2, %0, %3 \n" - " " __SC "%2, %1 \n" - " .set pop \n" - : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res) - : "r" (1UL << bit) - : __LLSC_CLOBBER); - } while (unlikely(!res)); - - res = temp & (1UL << bit); - } else - res = __mips_test_and_set_bit_lock(nr, addr); - - smp_llsc_mb(); - - return res != 0; + smp_mb__before_atomic(); + return test_and_set_bit_lock(nr, addr); } + /* * test_and_clear_bit - Clear a bit and return its old value * @nr: Bit to clear @@ -347,71 +224,30 @@ static inline int test_and_set_bit_lock(unsigned long nr, static inline int test_and_clear_bit(unsigned long nr, volatile unsigned long *addr) { - int bit = nr & SZLONG_MASK; - unsigned long res; - - smp_mb__before_llsc(); + volatile unsigned long *m = &addr[BIT_WORD(nr)]; + int bit = nr % BITS_PER_LONG; + unsigned long res, orig; - if (kernel_uses_llsc && R10000_LLSC_WAR) { - unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); - unsigned long temp; + smp_mb__before_atomic(); - __asm__ __volatile__( - " .set push \n" - " .set arch=r4000 \n" - "1: " __LL "%0, %1 # test_and_clear_bit \n" - " or %2, %0, %3 \n" - " xor %2, %3 \n" - " " __SC "%2, %1 \n" - " beqzl %2, 1b \n" - " and %2, %0, %3 \n" - " .set pop \n" - : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res) - : "r" (1UL << bit) - : __LLSC_CLOBBER); -#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) - } else if (kernel_uses_llsc && __builtin_constant_p(nr)) { - unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); - unsigned long temp; - - loongson_llsc_mb(); - do { - __asm__ __volatile__( - " " __LL "%0, %1 # test_and_clear_bit \n" - " " __EXT "%2, %0, %3, 1 \n" - " " __INS "%0, $0, %3, 1 \n" - " " __SC "%0, %1 \n" - : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res) - : "ir" (bit) - : __LLSC_CLOBBER); - } while (unlikely(!temp)); -#endif - } else if (kernel_uses_llsc) { - unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); - unsigned long temp; - - loongson_llsc_mb(); - do { - __asm__ __volatile__( - " .set push \n" - " .set "MIPS_ISA_ARCH_LEVEL" \n" - " " __LL "%0, %1 # test_and_clear_bit \n" - " or %2, %0, %3 \n" - " xor %2, %3 \n" - " " __SC "%2, %1 \n" - " .set pop \n" - : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res) - : "r" (1UL << bit) - : __LLSC_CLOBBER); - } while (unlikely(!res)); - - res = temp & (1UL << bit); - } else + if (!kernel_uses_llsc) { res = __mips_test_and_clear_bit(nr, addr); + } else if ((MIPS_ISA_REV >= 2) && __builtin_constant_p(nr)) { + res = __test_bit_op(*m, "%1", + __EXT "%0, %1, %3, 1;" + __INS "%1, $0, %3, 1", + "i"(bit)); + } else { + orig = __test_bit_op(*m, "%0", + "or\t%1, %0, %3;" + "xor\t%1, %1, %3", + "ir"(BIT(bit))); + res = (orig & BIT(bit)) != 0; + } smp_llsc_mb(); - return res != 0; + return res; } /* @@ -425,54 +261,29 @@ static inline int test_and_clear_bit(unsigned long nr, static inline int test_and_change_bit(unsigned long nr, volatile unsigned long *addr) { - int bit = nr & SZLONG_MASK; - unsigned long res; + volatile unsigned long *m = &addr[BIT_WORD(nr)]; + int bit = nr % BITS_PER_LONG; + unsigned long res, orig; - smp_mb__before_llsc(); - - if (kernel_uses_llsc && R10000_LLSC_WAR) { - unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); - unsigned long temp; + smp_mb__before_atomic(); - __asm__ __volatile__( - " .set push \n" - " .set arch=r4000 \n" - "1: " __LL "%0, %1 # test_and_change_bit \n" - " xor %2, %0, %3 \n" - " " __SC "%2, %1 \n" - " beqzl %2, 1b \n" - " and %2, %0, %3 \n" - " .set pop \n" - : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res) - : "r" (1UL << bit) - : __LLSC_CLOBBER); - } else if (kernel_uses_llsc) { - unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); - unsigned long temp; - - loongson_llsc_mb(); - do { - __asm__ __volatile__( - " .set push \n" - " .set "MIPS_ISA_ARCH_LEVEL" \n" - " " __LL "%0, %1 # test_and_change_bit \n" - " xor %2, %0, %3 \n" - " " __SC "\t%2, %1 \n" - " .set pop \n" - : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res) - : "r" (1UL << bit) - : __LLSC_CLOBBER); - } while (unlikely(!res)); - - res = temp & (1UL << bit); - } else + if (!kernel_uses_llsc) { res = __mips_test_and_change_bit(nr, addr); + } else { + orig = __test_bit_op(*m, "%0", + "xor\t%1, %0, %3", + "ir"(BIT(bit))); + res = (orig & BIT(bit)) != 0; + } smp_llsc_mb(); - return res != 0; + return res; } +#undef __bit_op +#undef __test_bit_op + #include <asm-generic/bitops/non-atomic.h> /* diff --git a/arch/mips/include/asm/bmips.h b/arch/mips/include/asm/bmips.h index bf6a8afd7ad2..581a6a3c66e4 100644 --- a/arch/mips/include/asm/bmips.h +++ b/arch/mips/include/asm/bmips.h @@ -75,11 +75,11 @@ static inline int register_bmips_smp_ops(void) #endif } -extern char bmips_reset_nmi_vec; -extern char bmips_reset_nmi_vec_end; -extern char bmips_smp_movevec; -extern char bmips_smp_int_vec; -extern char bmips_smp_int_vec_end; +extern char bmips_reset_nmi_vec[]; +extern char bmips_reset_nmi_vec_end[]; +extern char bmips_smp_movevec[]; +extern char bmips_smp_int_vec[]; +extern char bmips_smp_int_vec_end[]; extern int bmips_smp_enabled; extern int bmips_cpu_offset; diff --git a/arch/mips/include/asm/bootinfo.h b/arch/mips/include/asm/bootinfo.h index 34d62229dea5..d41a5057bc69 100644 --- a/arch/mips/include/asm/bootinfo.h +++ b/arch/mips/include/asm/bootinfo.h @@ -61,7 +61,7 @@ /* * Valid machtype for Loongson family */ -enum loongson_machine_type { +enum loongson2ef_machine_type { MACH_LOONGSON_UNKNOWN, MACH_LEMOTE_FL2E, MACH_LEMOTE_FL2F, @@ -70,7 +70,6 @@ enum loongson_machine_type { MACH_DEXXON_GDIUM2F10, MACH_LEMOTE_NAS, MACH_LEMOTE_LL2F, - MACH_LOONGSON_GENERIC, MACH_LOONGSON_END }; @@ -99,6 +98,7 @@ extern void detect_memory_region(phys_addr_t start, phys_addr_t sz_min, phys_ad extern void prom_init(void); extern void prom_free_prom_memory(void); +extern void prom_cleanup(void); extern void free_init_pages(const char *what, unsigned long begin, unsigned long end); diff --git a/arch/mips/include/asm/bugs.h b/arch/mips/include/asm/bugs.h index d8ab8b7129b5..d72dc6e1cf3c 100644 --- a/arch/mips/include/asm/bugs.h +++ b/arch/mips/include/asm/bugs.h @@ -26,9 +26,8 @@ extern void check_bugs64(void); static inline void check_bugs_early(void) { -#ifdef CONFIG_64BIT - check_bugs64_early(); -#endif + if (IS_ENABLED(CONFIG_CPU_R4X00_BUGS64)) + check_bugs64_early(); } static inline void check_bugs(void) @@ -37,19 +36,18 @@ static inline void check_bugs(void) cpu_data[cpu].udelay_val = loops_per_jiffy; check_bugs32(); -#ifdef CONFIG_64BIT - check_bugs64(); -#endif + + if (IS_ENABLED(CONFIG_CPU_R4X00_BUGS64)) + check_bugs64(); } static inline int r4k_daddiu_bug(void) { -#ifdef CONFIG_64BIT + if (!IS_ENABLED(CONFIG_CPU_R4X00_BUGS64)) + return 0; + WARN_ON(daddiu_bug < 0); return daddiu_bug != 0; -#else - return 0; -#endif } #endif /* _ASM_BUGS_H */ diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h index 79bf34efbc04..5b0b3a6777ea 100644 --- a/arch/mips/include/asm/cmpxchg.h +++ b/arch/mips/include/asm/cmpxchg.h @@ -11,20 +11,11 @@ #include <linux/bug.h> #include <linux/irqflags.h> #include <asm/compiler.h> +#include <asm/llsc.h> +#include <asm/sync.h> #include <asm/war.h> /* - * Using a branch-likely instruction to check the result of an sc instruction - * works around a bug present in R10000 CPUs prior to revision 3.0 that could - * cause ll-sc sequences to execute non-atomically. - */ -#if R10000_LLSC_WAR -# define __scbeqz "beqzl" -#else -# define __scbeqz "beqz" -#endif - -/* * These functions doesn't exist, so if they are called you'll either: * * - Get an error at compile-time due to __compiletime_error, if supported by @@ -46,18 +37,18 @@ extern unsigned long __xchg_called_with_bad_pointer(void) __typeof(*(m)) __ret; \ \ if (kernel_uses_llsc) { \ - loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set push \n" \ " .set noat \n" \ " .set push \n" \ " .set " MIPS_ISA_ARCH_LEVEL " \n" \ + " " __SYNC(full, loongson3_war) " \n" \ "1: " ld " %0, %2 # __xchg_asm \n" \ " .set pop \n" \ " move $1, %z3 \n" \ " .set " MIPS_ISA_ARCH_LEVEL " \n" \ " " st " $1, %1 \n" \ - "\t" __scbeqz " $1, 1b \n" \ + "\t" __SC_BEQZ "$1, 1b \n" \ " .set pop \n" \ : "=&r" (__ret), "=" GCC_OFF_SMALL_ASM() (*m) \ : GCC_OFF_SMALL_ASM() (*m), "Jr" (val) \ @@ -77,8 +68,8 @@ extern unsigned long __xchg_called_with_bad_pointer(void) extern unsigned long __xchg_small(volatile void *ptr, unsigned long val, unsigned int size); -static inline unsigned long __xchg(volatile void *ptr, unsigned long x, - int size) +static __always_inline +unsigned long __xchg(volatile void *ptr, unsigned long x, int size) { switch (size) { case 1: @@ -103,7 +94,13 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long x, ({ \ __typeof__(*(ptr)) __res; \ \ - smp_mb__before_llsc(); \ + /* \ + * In the Loongson3 workaround case __xchg_asm() already \ + * contains a completion barrier prior to the LL, so we don't \ + * need to emit an extra one here. \ + */ \ + if (!__SYNC_loongson3_war) \ + smp_mb__before_llsc(); \ \ __res = (__typeof__(*(ptr))) \ __xchg((ptr), (unsigned long)(x), sizeof(*(ptr))); \ @@ -118,25 +115,24 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long x, __typeof(*(m)) __ret; \ \ if (kernel_uses_llsc) { \ - loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set push \n" \ " .set noat \n" \ " .set push \n" \ " .set "MIPS_ISA_ARCH_LEVEL" \n" \ + " " __SYNC(full, loongson3_war) " \n" \ "1: " ld " %0, %2 # __cmpxchg_asm \n" \ " bne %0, %z3, 2f \n" \ " .set pop \n" \ " move $1, %z4 \n" \ " .set "MIPS_ISA_ARCH_LEVEL" \n" \ " " st " $1, %1 \n" \ - "\t" __scbeqz " $1, 1b \n" \ + "\t" __SC_BEQZ "$1, 1b \n" \ " .set pop \n" \ - "2: \n" \ + "2: " __SYNC(full, loongson3_war) " \n" \ : "=&r" (__ret), "=" GCC_OFF_SMALL_ASM() (*m) \ : GCC_OFF_SMALL_ASM() (*m), "Jr" (old), "Jr" (new) \ : __LLSC_CLOBBER); \ - loongson_llsc_mb(); \ } else { \ unsigned long __flags; \ \ @@ -153,8 +149,9 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long x, extern unsigned long __cmpxchg_small(volatile void *ptr, unsigned long old, unsigned long new, unsigned int size); -static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, - unsigned long new, unsigned int size) +static __always_inline +unsigned long __cmpxchg(volatile void *ptr, unsigned long old, + unsigned long new, unsigned int size) { switch (size) { case 1: @@ -189,9 +186,23 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, ({ \ __typeof__(*(ptr)) __res; \ \ - smp_mb__before_llsc(); \ + /* \ + * In the Loongson3 workaround case __cmpxchg_asm() already \ + * contains a completion barrier prior to the LL, so we don't \ + * need to emit an extra one here. \ + */ \ + if (!__SYNC_loongson3_war) \ + smp_mb__before_llsc(); \ + \ __res = cmpxchg_local((ptr), (old), (new)); \ - smp_llsc_mb(); \ + \ + /* \ + * In the Loongson3 workaround case __cmpxchg_asm() already \ + * contains a completion barrier after the SC, so we don't \ + * need to emit an extra one here. \ + */ \ + if (!__SYNC_loongson3_war) \ + smp_llsc_mb(); \ \ __res; \ }) @@ -232,11 +243,11 @@ static inline unsigned long __cmpxchg64(volatile void *ptr, */ local_irq_save(flags); - loongson_llsc_mb(); asm volatile( " .set push \n" " .set " MIPS_ISA_ARCH_LEVEL " \n" /* Load 64 bits from ptr */ + " " __SYNC(full, loongson3_war) " \n" "1: lld %L0, %3 # __cmpxchg64 \n" /* * Split the 64 bit value we loaded into the 2 registers that hold the @@ -268,9 +279,9 @@ static inline unsigned long __cmpxchg64(volatile void *ptr, /* Attempt to store new at ptr */ " scd %L1, %2 \n" /* If we failed, loop! */ - "\t" __scbeqz " %L1, 1b \n" + "\t" __SC_BEQZ "%L1, 1b \n" " .set pop \n" - "2: \n" + "2: " __SYNC(full, loongson3_war) " \n" : "=&r"(ret), "=&r"(tmp), "=" GCC_OFF_SMALL_ASM() (*(unsigned long long *)ptr) @@ -278,7 +289,6 @@ static inline unsigned long __cmpxchg64(volatile void *ptr, "r" (old), "r" (new) : "memory"); - loongson_llsc_mb(); local_irq_restore(flags); return ret; @@ -311,6 +321,4 @@ static inline unsigned long __cmpxchg64(volatile void *ptr, # endif /* !CONFIG_SMP */ #endif /* !CONFIG_64BIT */ -#undef __scbeqz - #endif /* __ASM_CMPXCHG_H */ diff --git a/arch/mips/include/asm/cop2.h b/arch/mips/include/asm/cop2.h index 63b3468ede4c..6b7396a6a115 100644 --- a/arch/mips/include/asm/cop2.h +++ b/arch/mips/include/asm/cop2.h @@ -33,7 +33,7 @@ extern void nlm_cop2_restore(struct nlm_cop2_state *); #define cop2_present 1 #define cop2_lazy_restore 0 -#elif defined(CONFIG_CPU_LOONGSON3) +#elif defined(CONFIG_CPU_LOONGSON64) #define cop2_present 1 #define cop2_lazy_restore 1 diff --git a/arch/mips/include/asm/cpu-type.h b/arch/mips/include/asm/cpu-type.h index 7bbb66760a07..c46c59b0f1b4 100644 --- a/arch/mips/include/asm/cpu-type.h +++ b/arch/mips/include/asm/cpu-type.h @@ -15,18 +15,17 @@ static inline int __pure __get_cpu_type(const int cpu_type) { switch (cpu_type) { -#if defined(CONFIG_SYS_HAS_CPU_LOONGSON2E) || \ - defined(CONFIG_SYS_HAS_CPU_LOONGSON2F) - case CPU_LOONGSON2: +#if defined(CONFIG_SYS_HAS_CPU_LOONGSON2EF) + case CPU_LOONGSON2EF: #endif -#ifdef CONFIG_SYS_HAS_CPU_LOONGSON3 - case CPU_LOONGSON3: +#ifdef CONFIG_SYS_HAS_CPU_LOONGSON64 + case CPU_LOONGSON64: #endif #if defined(CONFIG_SYS_HAS_CPU_LOONGSON1B) || \ defined(CONFIG_SYS_HAS_CPU_LOONGSON1C) - case CPU_LOONGSON1: + case CPU_LOONGSON32: #endif #ifdef CONFIG_SYS_HAS_CPU_MIPS32_R1 diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h index 7fddcb8350c6..ea830783d663 100644 --- a/arch/mips/include/asm/cpu.h +++ b/arch/mips/include/asm/cpu.h @@ -91,7 +91,9 @@ #define PRID_IMP_LOONGSON_32 0x4200 /* Loongson-1 */ #define PRID_IMP_R5432 0x5400 #define PRID_IMP_R5500 0x5500 -#define PRID_IMP_LOONGSON_64 0x6300 /* Loongson-2/3 */ +#define PRID_IMP_LOONGSON_64R 0x6100 /* Reduced Loongson-2 */ +#define PRID_IMP_LOONGSON_64C 0x6300 /* Classic Loongson-2 and Loongson-3 */ +#define PRID_IMP_LOONGSON_64G 0xc000 /* Generic Loongson-2 and Loongson-3 */ #define PRID_IMP_UNKNOWN 0xff00 @@ -310,15 +312,15 @@ enum cpu_type_enum { */ CPU_4KC, CPU_4KEC, CPU_4KSC, CPU_24K, CPU_34K, CPU_1004K, CPU_74K, CPU_ALCHEMY, CPU_PR4450, CPU_BMIPS32, CPU_BMIPS3300, CPU_BMIPS4350, - CPU_BMIPS4380, CPU_BMIPS5000, CPU_XBURST, CPU_LOONGSON1, CPU_M14KC, + CPU_BMIPS4380, CPU_BMIPS5000, CPU_XBURST, CPU_LOONGSON32, CPU_M14KC, CPU_M14KEC, CPU_INTERAPTIV, CPU_P5600, CPU_PROAPTIV, CPU_1074K, CPU_M5150, CPU_I6400, CPU_P6600, CPU_M6250, /* * MIPS64 class processors */ - CPU_5KC, CPU_5KE, CPU_20KC, CPU_25KF, CPU_SB1, CPU_SB1A, CPU_LOONGSON2, - CPU_LOONGSON3, CPU_CAVIUM_OCTEON, CPU_CAVIUM_OCTEON_PLUS, + CPU_5KC, CPU_5KE, CPU_20KC, CPU_25KF, CPU_SB1, CPU_SB1A, CPU_LOONGSON2EF, + CPU_LOONGSON64, CPU_CAVIUM_OCTEON, CPU_CAVIUM_OCTEON_PLUS, CPU_CAVIUM_OCTEON2, CPU_CAVIUM_OCTEON3, CPU_XLR, CPU_XLP, CPU_I6500, CPU_QEMU_GENERIC, diff --git a/arch/mips/include/asm/fixmap.h b/arch/mips/include/asm/fixmap.h index 6842ffafd1e7..1784d4348c36 100644 --- a/arch/mips/include/asm/fixmap.h +++ b/arch/mips/include/asm/fixmap.h @@ -70,7 +70,7 @@ enum fixed_addresses { #include <asm-generic/fixmap.h> #define kmap_get_fixmap_pte(vaddr) \ - pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)), (vaddr)) + pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)), (vaddr)), (vaddr)) /* * Called from pgtable_init() diff --git a/arch/mips/include/asm/futex.h b/arch/mips/include/asm/futex.h index b83b0397462d..110220705e97 100644 --- a/arch/mips/include/asm/futex.h +++ b/arch/mips/include/asm/futex.h @@ -16,6 +16,7 @@ #include <asm/barrier.h> #include <asm/compiler.h> #include <asm/errno.h> +#include <asm/sync.h> #include <asm/war.h> #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \ @@ -32,7 +33,7 @@ " .set arch=r4000 \n" \ "2: sc $1, %2 \n" \ " beqzl $1, 1b \n" \ - __WEAK_LLSC_MB \ + __stringify(__WEAK_LLSC_MB) " \n" \ "3: \n" \ " .insn \n" \ " .set pop \n" \ @@ -50,19 +51,19 @@ "i" (-EFAULT) \ : "memory"); \ } else if (cpu_has_llsc) { \ - loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set push \n" \ " .set noat \n" \ " .set push \n" \ " .set "MIPS_ISA_ARCH_LEVEL" \n" \ + " " __SYNC(full, loongson3_war) " \n" \ "1: "user_ll("%1", "%4")" # __futex_atomic_op\n" \ " .set pop \n" \ " " insn " \n" \ " .set "MIPS_ISA_ARCH_LEVEL" \n" \ "2: "user_sc("$1", "%2")" \n" \ " beqz $1, 1b \n" \ - __WEAK_LLSC_MB \ + __stringify(__WEAK_LLSC_MB) " \n" \ "3: \n" \ " .insn \n" \ " .set pop \n" \ @@ -147,7 +148,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, " .set arch=r4000 \n" "2: sc $1, %2 \n" " beqzl $1, 1b \n" - __WEAK_LLSC_MB + __stringify(__WEAK_LLSC_MB) " \n" "3: \n" " .insn \n" " .set pop \n" @@ -164,13 +165,13 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, "i" (-EFAULT) : "memory"); } else if (cpu_has_llsc) { - loongson_llsc_mb(); __asm__ __volatile__( "# futex_atomic_cmpxchg_inatomic \n" " .set push \n" " .set noat \n" " .set push \n" " .set "MIPS_ISA_ARCH_LEVEL" \n" + " " __SYNC(full, loongson3_war) " \n" "1: "user_ll("%1", "%3")" \n" " bne %1, %z4, 3f \n" " .set pop \n" @@ -178,8 +179,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, " .set "MIPS_ISA_ARCH_LEVEL" \n" "2: "user_sc("$1", "%2")" \n" " beqz $1, 1b \n" - __WEAK_LLSC_MB - "3: \n" + "3: " __SYNC_ELSE(full, loongson3_war, __WEAK_LLSC_MB) "\n" " .insn \n" " .set pop \n" " .section .fixup,\"ax\" \n" @@ -194,7 +194,6 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval), "i" (-EFAULT) : "memory"); - loongson_llsc_mb(); } else return -ENOSYS; diff --git a/arch/mips/include/asm/hazards.h b/arch/mips/include/asm/hazards.h index 0fa27446869a..a4f48b0f5541 100644 --- a/arch/mips/include/asm/hazards.h +++ b/arch/mips/include/asm/hazards.h @@ -158,7 +158,7 @@ do { \ } while (0) #elif defined(CONFIG_MIPS_ALCHEMY) || defined(CONFIG_CPU_CAVIUM_OCTEON) || \ - defined(CONFIG_CPU_LOONGSON2) || defined(CONFIG_LOONGSON3_ENHANCEMENT) || \ + defined(CONFIG_CPU_LOONGSON2EF) || defined(CONFIG_LOONGSON3_ENHANCEMENT) || \ defined(CONFIG_CPU_R10000) || defined(CONFIG_CPU_R5500) || defined(CONFIG_CPU_XLR) /* diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h index 2b7b56736372..3f6ce74335b4 100644 --- a/arch/mips/include/asm/io.h +++ b/arch/mips/include/asm/io.h @@ -306,7 +306,7 @@ static inline void iounmap(const volatile void __iomem *addr) #undef __IS_KSEG1 } -#if defined(CONFIG_CPU_CAVIUM_OCTEON) || defined(CONFIG_CPU_LOONGSON3) +#if defined(CONFIG_CPU_CAVIUM_OCTEON) || defined(CONFIG_CPU_LOONGSON64) #define war_io_reorder_wmb() wmb() #else #define war_io_reorder_wmb() barrier() diff --git a/arch/mips/include/asm/irqflags.h b/arch/mips/include/asm/irqflags.h index f0b862a83816..c4728bbdf15b 100644 --- a/arch/mips/include/asm/irqflags.h +++ b/arch/mips/include/asm/irqflags.h @@ -41,7 +41,7 @@ static inline unsigned long arch_local_irq_save(void) " .set push \n" " .set reorder \n" " .set noat \n" -#if defined(CONFIG_CPU_LOONGSON3) || defined (CONFIG_CPU_LOONGSON1) +#if defined(CONFIG_CPU_LOONGSON64) || defined(CONFIG_CPU_LOONGSON32) " mfc0 %[flags], $12 \n" " di \n" #else diff --git a/arch/mips/include/asm/llsc.h b/arch/mips/include/asm/llsc.h index c6d17d171147..c49738bc3bda 100644 --- a/arch/mips/include/asm/llsc.h +++ b/arch/mips/include/asm/llsc.h @@ -9,20 +9,31 @@ #ifndef __ASM_LLSC_H #define __ASM_LLSC_H +#include <asm/isa-rev.h> + #if _MIPS_SZLONG == 32 -#define SZLONG_LOG 5 -#define SZLONG_MASK 31UL #define __LL "ll " #define __SC "sc " #define __INS "ins " #define __EXT "ext " #elif _MIPS_SZLONG == 64 -#define SZLONG_LOG 6 -#define SZLONG_MASK 63UL #define __LL "lld " #define __SC "scd " #define __INS "dins " #define __EXT "dext " #endif +/* + * Using a branch-likely instruction to check the result of an sc instruction + * works around a bug present in R10000 CPUs prior to revision 3.0 that could + * cause ll-sc sequences to execute non-atomically. + */ +#if R10000_LLSC_WAR +# define __SC_BEQZ "beqzl " +#elif MIPS_ISA_REV >= 6 +# define __SC_BEQZ "beqzc " +#else +# define __SC_BEQZ "beqz " +#endif + #endif /* __ASM_LLSC_H */ diff --git a/arch/mips/include/asm/mach-ip22/spaces.h b/arch/mips/include/asm/mach-ip22/spaces.h index 7f9fa6f66059..24fe92cb5313 100644 --- a/arch/mips/include/asm/mach-ip22/spaces.h +++ b/arch/mips/include/asm/mach-ip22/spaces.h @@ -10,17 +10,7 @@ #ifndef _ASM_MACH_IP22_SPACES_H #define _ASM_MACH_IP22_SPACES_H - -#ifdef CONFIG_64BIT - -#define PAGE_OFFSET 0xffffffff80000000UL - -#define CAC_BASE 0xffffffff80000000 -#define IO_BASE 0xffffffffa0000000 -#define UNCAC_BASE 0xffffffffa0000000 -#define MAP_BASE 0xc000000000000000 - -#endif /* CONFIG_64BIT */ +#define PHYS_OFFSET _AC(0x08000000, UL) #include <asm/mach-generic/spaces.h> diff --git a/arch/mips/include/asm/mach-ip27/mmzone.h b/arch/mips/include/asm/mach-ip27/mmzone.h index 1cd6a23a84f2..f463826515df 100644 --- a/arch/mips/include/asm/mach-ip27/mmzone.h +++ b/arch/mips/include/asm/mach-ip27/mmzone.h @@ -6,7 +6,7 @@ #include <asm/sn/arch.h> #include <asm/sn/hub.h> -#define pa_to_nid(addr) NASID_TO_COMPACT_NODEID(NASID_GET(addr)) +#define pa_to_nid(addr) NASID_GET(addr) struct hub_data { kern_vars_t kern_vars; diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h index 965f0793a5f9..be61ddcdacab 100644 --- a/arch/mips/include/asm/mach-ip27/topology.h +++ b/arch/mips/include/asm/mach-ip27/topology.h @@ -7,14 +7,13 @@ #include <asm/mmzone.h> struct cpuinfo_ip27 { - cnodeid_t p_nodeid; /* my node ID in compact-id-space */ nasid_t p_nasid; /* my node ID in numa-as-id-space */ unsigned char p_slice; /* Physical position on node board */ }; extern struct cpuinfo_ip27 sn_cpu_info[NR_CPUS]; -#define cpu_to_node(cpu) (sn_cpu_info[(cpu)].p_nodeid) +#define cpu_to_node(cpu) (cputonasid(cpu)) #define cpumask_of_node(node) ((node) == -1 ? \ cpu_all_mask : \ &hub_data(node)->h_cpus) @@ -23,7 +22,7 @@ extern int pcibus_to_node(struct pci_bus *); #define cpumask_of_pcibus(bus) (cpumask_of_node(pcibus_to_node(bus))) -extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES]; +extern unsigned char __node_distances[MAX_NUMNODES][MAX_NUMNODES]; #define node_distance(from, to) (__node_distances[(from)][(to)]) diff --git a/arch/mips/include/asm/mach-ip30/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip30/cpu-feature-overrides.h new file mode 100644 index 000000000000..cfa02f3d25df --- /dev/null +++ b/arch/mips/include/asm/mach-ip30/cpu-feature-overrides.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * IP30/Octane cpu-features overrides. + * + * Copyright (C) 2003 Ralf Baechle <ralf@linux-mips.org> + * 2004-2007 Stanislaw Skowronek <skylark@unaligned.org> + * 2009 Johannes Dickgreber <tanzy@gmx.de> + * 2015 Joshua Kinard <kumba@gentoo.org> + * + */ +#ifndef __ASM_MACH_IP30_CPU_FEATURE_OVERRIDES_H +#define __ASM_MACH_IP30_CPU_FEATURE_OVERRIDES_H + +#include <asm/cpu.h> + +/* + * IP30 only supports R1[024]000 processors, all using the same config + */ +#define cpu_has_tlb 1 +#define cpu_has_tlbinv 0 +#define cpu_has_segments 0 +#define cpu_has_eva 0 +#define cpu_has_htw 0 +#define cpu_has_rixiex 0 +#define cpu_has_maar 0 +#define cpu_has_rw_llb 0 +#define cpu_has_3kex 0 +#define cpu_has_4kex 1 +#define cpu_has_3k_cache 0 +#define cpu_has_4k_cache 1 +#define cpu_has_6k_cache 0 +#define cpu_has_8k_cache 0 +#define cpu_has_tx39_cache 0 +#define cpu_has_fpu 1 +#define cpu_has_nofpuex 0 +#define cpu_has_32fpr 1 +#define cpu_has_counter 1 +#define cpu_has_watch 1 +#define cpu_has_64bits 1 +#define cpu_has_divec 0 +#define cpu_has_vce 0 +#define cpu_has_cache_cdex_p 0 +#define cpu_has_cache_cdex_s 0 +#define cpu_has_prefetch 1 +#define cpu_has_mcheck 0 +#define cpu_has_ejtag 0 +#define cpu_has_llsc 1 +#define cpu_has_mips16 0 +#define cpu_has_mdmx 0 +#define cpu_has_mips3d 0 +#define cpu_has_smartmips 0 +#define cpu_has_rixi 0 +#define cpu_has_xpa 0 +#define cpu_has_vtag_icache 0 +#define cpu_has_dc_aliases 0 +#define cpu_has_ic_fills_f_dc 0 + +#define cpu_icache_snoops_remote_store 1 + +#define cpu_has_mips32r1 0 +#define cpu_has_mips32r2 0 +#define cpu_has_mips64r1 0 +#define cpu_has_mips64r2 0 +#define cpu_has_mips32r6 0 +#define cpu_has_mips64r6 0 + +#define cpu_has_dsp 0 +#define cpu_has_dsp2 0 +#define cpu_has_mipsmt 0 +#define cpu_has_userlocal 0 +#define cpu_has_inclusive_pcaches 1 +#define cpu_hwrena_impl_bits 0 +#define cpu_has_perf_cntr_intr_bit 0 +#define cpu_has_vz 0 +#define cpu_has_fre 0 +#define cpu_has_cdmm 0 + +#define cpu_dcache_line_size() 32 +#define cpu_icache_line_size() 64 +#define cpu_scache_line_size() 128 + +#endif /* __ASM_MACH_IP30_CPU_FEATURE_OVERRIDES_H */ + diff --git a/arch/mips/include/asm/mach-ip30/irq.h b/arch/mips/include/asm/mach-ip30/irq.h new file mode 100644 index 000000000000..e5c3dd965266 --- /dev/null +++ b/arch/mips/include/asm/mach-ip30/irq.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * HEART IRQ defines + * + * Copyright (C) 2009 Johannes Dickgreber <tanzy@gmx.de> + * 2014-2016 Joshua Kinard <kumba@gentoo.org> + * + */ + +#ifndef __ASM_MACH_IP30_IRQ_H +#define __ASM_MACH_IP30_IRQ_H + +/* + * HEART has 64 hardware interrupts, but use 128 to leave room for a few + * software interrupts as well (such as the CPU timer interrupt. + */ +#define NR_IRQS 128 + +extern void __init ip30_install_ipi(void); + +/* + * HEART has 64 interrupt vectors available to it, subdivided into five + * priority levels. They are numbered 0 to 63. + */ +#define HEART_NUM_IRQS 64 + +/* + * These are the five interrupt priority levels and their corresponding + * CPU IPx interrupt pins. + * + * Level 4 - Error Interrupts. + * Level 3 - HEART timer interrupt. + * Level 2 - CPU IPI, CPU debug, power putton, general device interrupts. + * Level 1 - General device interrupts. + * Level 0 - General device GFX flow control interrupts. + */ +#define HEART_L4_INT_MASK 0xfff8000000000000ULL /* IP6 */ +#define HEART_L3_INT_MASK 0x0004000000000000ULL /* IP5 */ +#define HEART_L2_INT_MASK 0x0003ffff00000000ULL /* IP4 */ +#define HEART_L1_INT_MASK 0x00000000ffff0000ULL /* IP3 */ +#define HEART_L0_INT_MASK 0x000000000000ffffULL /* IP2 */ + +/* HEART L0 Interrupts (Low Priority) */ +#define HEART_L0_INT_GENERIC 0 +#define HEART_L0_INT_FLOW_CTRL_HWTR_0 1 +#define HEART_L0_INT_FLOW_CTRL_HWTR_1 2 + +/* HEART L2 Interrupts (High Priority) */ +#define HEART_L2_INT_RESCHED_CPU_0 46 +#define HEART_L2_INT_RESCHED_CPU_1 47 +#define HEART_L2_INT_CALL_CPU_0 48 +#define HEART_L2_INT_CALL_CPU_1 49 + +/* HEART L3 Interrupts (Compare/Counter Timer) */ +#define HEART_L3_INT_TIMER 50 + +/* HEART L4 Interrupts (Errors) */ +#define HEART_L4_INT_XWID_ERR_9 51 +#define HEART_L4_INT_XWID_ERR_A 52 +#define HEART_L4_INT_XWID_ERR_B 53 +#define HEART_L4_INT_XWID_ERR_C 54 +#define HEART_L4_INT_XWID_ERR_D 55 +#define HEART_L4_INT_XWID_ERR_E 56 +#define HEART_L4_INT_XWID_ERR_F 57 +#define HEART_L4_INT_XWID_ERR_XBOW 58 +#define HEART_L4_INT_CPU_BUS_ERR_0 59 +#define HEART_L4_INT_CPU_BUS_ERR_1 60 +#define HEART_L4_INT_CPU_BUS_ERR_2 61 +#define HEART_L4_INT_CPU_BUS_ERR_3 62 +#define HEART_L4_INT_HEART_EXCP 63 + +/* + * Power Switch is wired via BaseIO BRIDGE slot #6. + * + * ACFail is wired via BaseIO BRIDGE slot #7. + */ +#define IP30_POWER_IRQ HEART_L2_INT_POWER_BTN + +#include_next <irq.h> + +#define IP30_HEART_L0_IRQ (MIPS_CPU_IRQ_BASE + 2) +#define IP30_HEART_L1_IRQ (MIPS_CPU_IRQ_BASE + 3) +#define IP30_HEART_L2_IRQ (MIPS_CPU_IRQ_BASE + 4) +#define IP30_HEART_TIMER_IRQ (MIPS_CPU_IRQ_BASE + 5) +#define IP30_HEART_ERR_IRQ (MIPS_CPU_IRQ_BASE + 6) + +#endif /* __ASM_MACH_IP30_IRQ_H */ diff --git a/arch/mips/include/asm/mach-ip30/kernel-entry-init.h b/arch/mips/include/asm/mach-ip30/kernel-entry-init.h new file mode 100644 index 000000000000..be0472c977d8 --- /dev/null +++ b/arch/mips/include/asm/mach-ip30/kernel-entry-init.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __ASM_MACH_IP30_KERNEL_ENTRY_H +#define __ASM_MACH_IP30_KERNEL_ENTRY_H + + .macro kernel_entry_setup + .endm + + .macro smp_slave_setup + move gp, a0 + .endm + +#endif /* __ASM_MACH_IP30_KERNEL_ENTRY_H */ diff --git a/arch/mips/include/asm/mach-ip30/mangle-port.h b/arch/mips/include/asm/mach-ip30/mangle-port.h new file mode 100644 index 000000000000..f3e1262a2d5e --- /dev/null +++ b/arch/mips/include/asm/mach-ip30/mangle-port.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2003, 2004 Ralf Baechle + */ +#ifndef __ASM_MACH_IP30_MANGLE_PORT_H +#define __ASM_MACH_IP30_MANGLE_PORT_H + +#define __swizzle_addr_b(port) ((port)^3) +#define __swizzle_addr_w(port) ((port)^2) +#define __swizzle_addr_l(port) (port) +#define __swizzle_addr_q(port) (port) + +#define ioswabb(a, x) (x) +#define __mem_ioswabb(a, x) (x) +#define ioswabw(a, x) (x) +#define __mem_ioswabw(a, x) cpu_to_le16(x) +#define ioswabl(a, x) (x) +#define __mem_ioswabl(a, x) cpu_to_le32(x) +#define ioswabq(a, x) (x) +#define __mem_ioswabq(a, x) cpu_to_le64(x) + +#endif /* __ASM_MACH_IP30_MANGLE_PORT_H */ diff --git a/arch/mips/include/asm/mach-ip30/spaces.h b/arch/mips/include/asm/mach-ip30/spaces.h new file mode 100644 index 000000000000..c8a302dfbe05 --- /dev/null +++ b/arch/mips/include/asm/mach-ip30/spaces.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2016 Joshua Kinard <kumba@gentoo.org> + * + */ +#ifndef _ASM_MACH_IP30_SPACES_H +#define _ASM_MACH_IP30_SPACES_H + +/* + * Memory in IP30/Octane is offset 512MB in the physical address space. + */ +#define PHYS_OFFSET _AC(0x20000000, UL) + +#ifdef CONFIG_64BIT +#define CAC_BASE _AC(0xA800000000000000, UL) +#endif + +#include <asm/mach-generic/spaces.h> + +#endif /* _ASM_MACH_IP30_SPACES_H */ diff --git a/arch/mips/include/asm/mach-ip30/war.h b/arch/mips/include/asm/mach-ip30/war.h new file mode 100644 index 000000000000..a98ba204f183 --- /dev/null +++ b/arch/mips/include/asm/mach-ip30/war.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org> + */ +#ifndef __ASM_MIPS_MACH_IP30_WAR_H +#define __ASM_MIPS_MACH_IP30_WAR_H + +#define R4600_V1_INDEX_ICACHEOP_WAR 0 +#define R4600_V1_HIT_CACHEOP_WAR 0 +#define R4600_V2_HIT_CACHEOP_WAR 0 +#define MIPS_CACHE_SYNC_WAR 0 +#define BCM1250_M3_WAR 0 +#define SIBYTE_1956_WAR 0 +#define MIPS4K_ICACHE_REFILL_WAR 0 +#define MIPS34K_MISSED_ITLB_WAR 0 +#define R5432_CP0_INTERRUPT_WAR 0 +#define TX49XX_ICACHE_INDEX_INV_WAR 0 +#define ICACHE_REFILLS_WORKAROUND_WAR 0 + +#ifdef CONFIG_CPU_R10000 +#define R10000_LLSC_WAR 1 +#else +#define R10000_LLSC_WAR 0 +#endif + +#endif /* __ASM_MIPS_MACH_IP30_WAR_H */ diff --git a/arch/mips/include/asm/mach-loongson2ef/cpu-feature-overrides.h b/arch/mips/include/asm/mach-loongson2ef/cpu-feature-overrides.h new file mode 100644 index 000000000000..b2ee859ca0b7 --- /dev/null +++ b/arch/mips/include/asm/mach-loongson2ef/cpu-feature-overrides.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2009 Wu Zhangjin <wuzhangjin@gmail.com> + * Copyright (C) 2009 Philippe Vachon <philippe@cowpig.ca> + * Copyright (C) 2009 Zhang Le <r0bertz@gentoo.org> + * + * reference: /proc/cpuinfo, + * arch/mips/kernel/cpu-probe.c(cpu_probe_legacy), + * arch/mips/kernel/proc.c(show_cpuinfo), + * loongson2f user manual. + */ + +#ifndef __ASM_MACH_LOONGSON2EF_CPU_FEATURE_OVERRIDES_H +#define __ASM_MACH_LOONGSON2EF_CPU_FEATURE_OVERRIDES_H + +#define cpu_has_32fpr 1 +#define cpu_has_3k_cache 0 +#define cpu_has_4k_cache 1 +#define cpu_has_4kex 1 +#define cpu_has_64bits 1 +#define cpu_has_cache_cdex_p 0 +#define cpu_has_cache_cdex_s 0 +#define cpu_has_counter 1 +#define cpu_has_dc_aliases (PAGE_SIZE < 0x4000) +#define cpu_has_divec 0 +#define cpu_has_ejtag 0 +#define cpu_has_inclusive_pcaches 1 +#define cpu_has_llsc 1 +#define cpu_has_mcheck 0 +#define cpu_has_mdmx 0 +#define cpu_has_mips16 0 +#define cpu_has_mips16e2 0 +#define cpu_has_mips3d 0 +#define cpu_has_mipsmt 0 +#define cpu_has_smartmips 0 +#define cpu_has_tlb 1 +#define cpu_has_tx39_cache 0 +#define cpu_has_vce 0 +#define cpu_has_veic 0 +#define cpu_has_vint 0 +#define cpu_has_vtag_icache 0 +#define cpu_has_watch 1 + +#endif /* __ASM_MACH_LOONGSON64_CPU_FEATURE_OVERRIDES_H */ diff --git a/arch/mips/include/asm/mach-loongson64/cs5536/cs5536.h b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536.h index 9795b3361532..9795b3361532 100644 --- a/arch/mips/include/asm/mach-loongson64/cs5536/cs5536.h +++ b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536.h diff --git a/arch/mips/include/asm/mach-loongson64/cs5536/cs5536_mfgpt.h b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_mfgpt.h index 52e8bb0fc04d..52e8bb0fc04d 100644 --- a/arch/mips/include/asm/mach-loongson64/cs5536/cs5536_mfgpt.h +++ b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_mfgpt.h diff --git a/arch/mips/include/asm/mach-loongson64/cs5536/cs5536_pci.h b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_pci.h index a0d4b752899e..a0d4b752899e 100644 --- a/arch/mips/include/asm/mach-loongson64/cs5536/cs5536_pci.h +++ b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_pci.h diff --git a/arch/mips/include/asm/mach-loongson64/cs5536/cs5536_vsm.h b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_vsm.h index 70d0153cccc3..70d0153cccc3 100644 --- a/arch/mips/include/asm/mach-loongson64/cs5536/cs5536_vsm.h +++ b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_vsm.h diff --git a/arch/mips/include/asm/mach-loongson2ef/loongson.h b/arch/mips/include/asm/mach-loongson2ef/loongson.h new file mode 100644 index 000000000000..5008af0a1a19 --- /dev/null +++ b/arch/mips/include/asm/mach-loongson2ef/loongson.h @@ -0,0 +1,326 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2009 Lemote, Inc. + * Author: Wu Zhangjin <wuzhangjin@gmail.com> + */ + +#ifndef __ASM_MACH_LOONGSON2EF_LOONGSON_H +#define __ASM_MACH_LOONGSON2EF_LOONGSON_H + +#include <linux/io.h> +#include <linux/init.h> +#include <linux/irq.h> + +/* loongson internal northbridge initialization */ +extern void bonito_irq_init(void); + +/* machine-specific reboot/halt operation */ +extern void mach_prepare_reboot(void); +extern void mach_prepare_shutdown(void); + +/* environment arguments from bootloader */ +extern u32 cpu_clock_freq; +extern u32 memsize, highmemsize; + +/* loongson-specific command line, env and memory initialization */ +extern void __init prom_init_memory(void); +extern void __init prom_init_machtype(void); +extern void __init prom_init_env(void); +#ifdef CONFIG_LOONGSON_UART_BASE +extern unsigned long _loongson_uart_base, loongson_uart_base; +extern void prom_init_loongson_uart_base(void); +#endif + +static inline void prom_init_uart_base(void) +{ +#ifdef CONFIG_LOONGSON_UART_BASE + prom_init_loongson_uart_base(); +#endif +} + +/* irq operation functions */ +extern void bonito_irqdispatch(void); +extern void __init bonito_irq_init(void); +extern void __init mach_init_irq(void); +extern void mach_irq_dispatch(unsigned int pending); +extern int mach_i8259_irq(void); + +/* We need this in some places... */ +#define delay() ({ \ + int x; \ + for (x = 0; x < 100000; x++) \ + __asm__ __volatile__(""); \ +}) + +#define LOONGSON_REG(x) \ + (*(volatile u32 *)((char *)CKSEG1ADDR(LOONGSON_REG_BASE) + (x))) + +#define LOONGSON_IRQ_BASE 32 +#define LOONGSON2_PERFCNT_IRQ (MIPS_CPU_IRQ_BASE + 6) /* cpu perf counter */ + +#include <linux/interrupt.h> +static inline void do_perfcnt_IRQ(void) +{ +#if IS_ENABLED(CONFIG_OPROFILE) + do_IRQ(LOONGSON2_PERFCNT_IRQ); +#endif +} + +#define LOONGSON_FLASH_BASE 0x1c000000 +#define LOONGSON_FLASH_SIZE 0x02000000 /* 32M */ +#define LOONGSON_FLASH_TOP (LOONGSON_FLASH_BASE+LOONGSON_FLASH_SIZE-1) + +#define LOONGSON_LIO0_BASE 0x1e000000 +#define LOONGSON_LIO0_SIZE 0x01C00000 /* 28M */ +#define LOONGSON_LIO0_TOP (LOONGSON_LIO0_BASE+LOONGSON_LIO0_SIZE-1) + +#define LOONGSON_BOOT_BASE 0x1fc00000 +#define LOONGSON_BOOT_SIZE 0x00100000 /* 1M */ +#define LOONGSON_BOOT_TOP (LOONGSON_BOOT_BASE+LOONGSON_BOOT_SIZE-1) +#define LOONGSON_REG_BASE 0x1fe00000 +#define LOONGSON_REG_SIZE 0x00100000 /* 256Bytes + 256Bytes + ??? */ +#define LOONGSON_REG_TOP (LOONGSON_REG_BASE+LOONGSON_REG_SIZE-1) + +#define LOONGSON_LIO1_BASE 0x1ff00000 +#define LOONGSON_LIO1_SIZE 0x00100000 /* 1M */ +#define LOONGSON_LIO1_TOP (LOONGSON_LIO1_BASE+LOONGSON_LIO1_SIZE-1) + +#define LOONGSON_PCILO0_BASE 0x10000000 +#define LOONGSON_PCILO1_BASE 0x14000000 +#define LOONGSON_PCILO2_BASE 0x18000000 +#define LOONGSON_PCILO_BASE LOONGSON_PCILO0_BASE +#define LOONGSON_PCILO_SIZE 0x0c000000 /* 64M * 3 */ +#define LOONGSON_PCILO_TOP (LOONGSON_PCILO0_BASE+LOONGSON_PCILO_SIZE-1) + +#define LOONGSON_PCICFG_BASE 0x1fe80000 +#define LOONGSON_PCICFG_SIZE 0x00000800 /* 2K */ +#define LOONGSON_PCICFG_TOP (LOONGSON_PCICFG_BASE+LOONGSON_PCICFG_SIZE-1) +#define LOONGSON_PCIIO_BASE 0x1fd00000 + +#define LOONGSON_PCIIO_SIZE 0x00100000 /* 1M */ +#define LOONGSON_PCIIO_TOP (LOONGSON_PCIIO_BASE+LOONGSON_PCIIO_SIZE-1) + +/* Loongson Register Bases */ + +#define LOONGSON_PCICONFIGBASE 0x00 +#define LOONGSON_REGBASE 0x100 + +/* PCI Configuration Registers */ + +#define LOONGSON_PCI_REG(x) LOONGSON_REG(LOONGSON_PCICONFIGBASE + (x)) +#define LOONGSON_PCIDID LOONGSON_PCI_REG(0x00) +#define LOONGSON_PCICMD LOONGSON_PCI_REG(0x04) +#define LOONGSON_PCICLASS LOONGSON_PCI_REG(0x08) +#define LOONGSON_PCILTIMER LOONGSON_PCI_REG(0x0c) +#define LOONGSON_PCIBASE0 LOONGSON_PCI_REG(0x10) +#define LOONGSON_PCIBASE1 LOONGSON_PCI_REG(0x14) +#define LOONGSON_PCIBASE2 LOONGSON_PCI_REG(0x18) +#define LOONGSON_PCIBASE3 LOONGSON_PCI_REG(0x1c) +#define LOONGSON_PCIBASE4 LOONGSON_PCI_REG(0x20) +#define LOONGSON_PCIEXPRBASE LOONGSON_PCI_REG(0x30) +#define LOONGSON_PCIINT LOONGSON_PCI_REG(0x3c) + +#define LOONGSON_PCI_ISR4C LOONGSON_PCI_REG(0x4c) + +#define LOONGSON_PCICMD_PERR_CLR 0x80000000 +#define LOONGSON_PCICMD_SERR_CLR 0x40000000 +#define LOONGSON_PCICMD_MABORT_CLR 0x20000000 +#define LOONGSON_PCICMD_MTABORT_CLR 0x10000000 +#define LOONGSON_PCICMD_TABORT_CLR 0x08000000 +#define LOONGSON_PCICMD_MPERR_CLR 0x01000000 +#define LOONGSON_PCICMD_PERRRESPEN 0x00000040 +#define LOONGSON_PCICMD_ASTEPEN 0x00000080 +#define LOONGSON_PCICMD_SERREN 0x00000100 +#define LOONGSON_PCILTIMER_BUSLATENCY 0x0000ff00 +#define LOONGSON_PCILTIMER_BUSLATENCY_SHIFT 8 + +/* Loongson h/w Configuration */ + +#define LOONGSON_GENCFG_OFFSET 0x4 +#define LOONGSON_GENCFG LOONGSON_REG(LOONGSON_REGBASE + LOONGSON_GENCFG_OFFSET) + +#define LOONGSON_GENCFG_DEBUGMODE 0x00000001 +#define LOONGSON_GENCFG_SNOOPEN 0x00000002 +#define LOONGSON_GENCFG_CPUSELFRESET 0x00000004 + +#define LOONGSON_GENCFG_FORCE_IRQA 0x00000008 +#define LOONGSON_GENCFG_IRQA_ISOUT 0x00000010 +#define LOONGSON_GENCFG_IRQA_FROM_INT1 0x00000020 +#define LOONGSON_GENCFG_BYTESWAP 0x00000040 + +#define LOONGSON_GENCFG_UNCACHED 0x00000080 +#define LOONGSON_GENCFG_PREFETCHEN 0x00000100 +#define LOONGSON_GENCFG_WBEHINDEN 0x00000200 +#define LOONGSON_GENCFG_CACHEALG 0x00000c00 +#define LOONGSON_GENCFG_CACHEALG_SHIFT 10 +#define LOONGSON_GENCFG_PCIQUEUE 0x00001000 +#define LOONGSON_GENCFG_CACHESTOP 0x00002000 +#define LOONGSON_GENCFG_MSTRBYTESWAP 0x00004000 +#define LOONGSON_GENCFG_BUSERREN 0x00008000 +#define LOONGSON_GENCFG_NORETRYTIMEOUT 0x00010000 +#define LOONGSON_GENCFG_SHORTCOPYTIMEOUT 0x00020000 + +/* PCI address map control */ + +#define LOONGSON_PCIMAP LOONGSON_REG(LOONGSON_REGBASE + 0x10) +#define LOONGSON_PCIMEMBASECFG LOONGSON_REG(LOONGSON_REGBASE + 0x14) +#define LOONGSON_PCIMAP_CFG LOONGSON_REG(LOONGSON_REGBASE + 0x18) + +/* GPIO Regs - r/w */ + +#define LOONGSON_GPIODATA LOONGSON_REG(LOONGSON_REGBASE + 0x1c) +#define LOONGSON_GPIOIE LOONGSON_REG(LOONGSON_REGBASE + 0x20) + +/* ICU Configuration Regs - r/w */ + +#define LOONGSON_INTEDGE LOONGSON_REG(LOONGSON_REGBASE + 0x24) +#define LOONGSON_INTSTEER LOONGSON_REG(LOONGSON_REGBASE + 0x28) +#define LOONGSON_INTPOL LOONGSON_REG(LOONGSON_REGBASE + 0x2c) + +/* ICU Enable Regs - IntEn & IntISR are r/o. */ + +#define LOONGSON_INTENSET LOONGSON_REG(LOONGSON_REGBASE + 0x30) +#define LOONGSON_INTENCLR LOONGSON_REG(LOONGSON_REGBASE + 0x34) +#define LOONGSON_INTEN LOONGSON_REG(LOONGSON_REGBASE + 0x38) +#define LOONGSON_INTISR LOONGSON_REG(LOONGSON_REGBASE + 0x3c) + +/* ICU */ +#define LOONGSON_ICU_MBOXES 0x0000000f +#define LOONGSON_ICU_MBOXES_SHIFT 0 +#define LOONGSON_ICU_DMARDY 0x00000010 +#define LOONGSON_ICU_DMAEMPTY 0x00000020 +#define LOONGSON_ICU_COPYRDY 0x00000040 +#define LOONGSON_ICU_COPYEMPTY 0x00000080 +#define LOONGSON_ICU_COPYERR 0x00000100 +#define LOONGSON_ICU_PCIIRQ 0x00000200 +#define LOONGSON_ICU_MASTERERR 0x00000400 +#define LOONGSON_ICU_SYSTEMERR 0x00000800 +#define LOONGSON_ICU_DRAMPERR 0x00001000 +#define LOONGSON_ICU_RETRYERR 0x00002000 +#define LOONGSON_ICU_GPIOS 0x01ff0000 +#define LOONGSON_ICU_GPIOS_SHIFT 16 +#define LOONGSON_ICU_GPINS 0x7e000000 +#define LOONGSON_ICU_GPINS_SHIFT 25 +#define LOONGSON_ICU_MBOX(N) (1<<(LOONGSON_ICU_MBOXES_SHIFT+(N))) +#define LOONGSON_ICU_GPIO(N) (1<<(LOONGSON_ICU_GPIOS_SHIFT+(N))) +#define LOONGSON_ICU_GPIN(N) (1<<(LOONGSON_ICU_GPINS_SHIFT+(N))) + +/* PCI prefetch window base & mask */ + +#define LOONGSON_MEM_WIN_BASE_L LOONGSON_REG(LOONGSON_REGBASE + 0x40) +#define LOONGSON_MEM_WIN_BASE_H LOONGSON_REG(LOONGSON_REGBASE + 0x44) +#define LOONGSON_MEM_WIN_MASK_L LOONGSON_REG(LOONGSON_REGBASE + 0x48) +#define LOONGSON_MEM_WIN_MASK_H LOONGSON_REG(LOONGSON_REGBASE + 0x4c) + +/* PCI_Hit*_Sel_* */ + +#define LOONGSON_PCI_HIT0_SEL_L LOONGSON_REG(LOONGSON_REGBASE + 0x50) +#define LOONGSON_PCI_HIT0_SEL_H LOONGSON_REG(LOONGSON_REGBASE + 0x54) +#define LOONGSON_PCI_HIT1_SEL_L LOONGSON_REG(LOONGSON_REGBASE + 0x58) +#define LOONGSON_PCI_HIT1_SEL_H LOONGSON_REG(LOONGSON_REGBASE + 0x5c) +#define LOONGSON_PCI_HIT2_SEL_L LOONGSON_REG(LOONGSON_REGBASE + 0x60) +#define LOONGSON_PCI_HIT2_SEL_H LOONGSON_REG(LOONGSON_REGBASE + 0x64) + +/* PXArb Config & Status */ + +#define LOONGSON_PXARB_CFG LOONGSON_REG(LOONGSON_REGBASE + 0x68) +#define LOONGSON_PXARB_STATUS LOONGSON_REG(LOONGSON_REGBASE + 0x6c) + +/* Chip Config registor of each physical cpu package, PRid >= Loongson-2F */ +#define LOONGSON_CHIPCFG (void __iomem *)TO_UNCAC(0x1fc00180) + +/* pcimap */ + +#define LOONGSON_PCIMAP_PCIMAP_LO0 0x0000003f +#define LOONGSON_PCIMAP_PCIMAP_LO0_SHIFT 0 +#define LOONGSON_PCIMAP_PCIMAP_LO1 0x00000fc0 +#define LOONGSON_PCIMAP_PCIMAP_LO1_SHIFT 6 +#define LOONGSON_PCIMAP_PCIMAP_LO2 0x0003f000 +#define LOONGSON_PCIMAP_PCIMAP_LO2_SHIFT 12 +#define LOONGSON_PCIMAP_PCIMAP_2 0x00040000 +#define LOONGSON_PCIMAP_WIN(WIN, ADDR) \ + ((((ADDR)>>26) & LOONGSON_PCIMAP_PCIMAP_LO0) << ((WIN)*6)) + +#ifdef CONFIG_CPU_SUPPORTS_CPUFREQ +#include <linux/cpufreq.h> +extern struct cpufreq_frequency_table loongson2_clockmod_table[]; +#endif + +/* + * address windows configuration module + * + * loongson2e do not have this module + */ +#ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG + +/* address window config module base address */ +#define LOONGSON_ADDRWINCFG_BASE 0x3ff00000ul +#define LOONGSON_ADDRWINCFG_SIZE 0x180 + +extern unsigned long _loongson_addrwincfg_base; +#define LOONGSON_ADDRWINCFG(offset) \ + (*(volatile u64 *)(_loongson_addrwincfg_base + (offset))) + +#define CPU_WIN0_BASE LOONGSON_ADDRWINCFG(0x00) +#define CPU_WIN1_BASE LOONGSON_ADDRWINCFG(0x08) +#define CPU_WIN2_BASE LOONGSON_ADDRWINCFG(0x10) +#define CPU_WIN3_BASE LOONGSON_ADDRWINCFG(0x18) + +#define CPU_WIN0_MASK LOONGSON_ADDRWINCFG(0x20) +#define CPU_WIN1_MASK LOONGSON_ADDRWINCFG(0x28) +#define CPU_WIN2_MASK LOONGSON_ADDRWINCFG(0x30) +#define CPU_WIN3_MASK LOONGSON_ADDRWINCFG(0x38) + +#define CPU_WIN0_MMAP LOONGSON_ADDRWINCFG(0x40) +#define CPU_WIN1_MMAP LOONGSON_ADDRWINCFG(0x48) +#define CPU_WIN2_MMAP LOONGSON_ADDRWINCFG(0x50) +#define CPU_WIN3_MMAP LOONGSON_ADDRWINCFG(0x58) + +#define PCIDMA_WIN0_BASE LOONGSON_ADDRWINCFG(0x60) +#define PCIDMA_WIN1_BASE LOONGSON_ADDRWINCFG(0x68) +#define PCIDMA_WIN2_BASE LOONGSON_ADDRWINCFG(0x70) +#define PCIDMA_WIN3_BASE LOONGSON_ADDRWINCFG(0x78) + +#define PCIDMA_WIN0_MASK LOONGSON_ADDRWINCFG(0x80) +#define PCIDMA_WIN1_MASK LOONGSON_ADDRWINCFG(0x88) +#define PCIDMA_WIN2_MASK LOONGSON_ADDRWINCFG(0x90) +#define PCIDMA_WIN3_MASK LOONGSON_ADDRWINCFG(0x98) + +#define PCIDMA_WIN0_MMAP LOONGSON_ADDRWINCFG(0xa0) +#define PCIDMA_WIN1_MMAP LOONGSON_ADDRWINCFG(0xa8) +#define PCIDMA_WIN2_MMAP LOONGSON_ADDRWINCFG(0xb0) +#define PCIDMA_WIN3_MMAP LOONGSON_ADDRWINCFG(0xb8) + +#define ADDRWIN_WIN0 0 +#define ADDRWIN_WIN1 1 +#define ADDRWIN_WIN2 2 +#define ADDRWIN_WIN3 3 + +#define ADDRWIN_MAP_DST_DDR 0 +#define ADDRWIN_MAP_DST_PCI 1 +#define ADDRWIN_MAP_DST_LIO 1 + +/* + * s: CPU, PCIDMA + * d: DDR, PCI, LIO + * win: 0, 1, 2, 3 + * src: map source + * dst: map destination + * size: ~mask + 1 + */ +#define LOONGSON_ADDRWIN_CFG(s, d, w, src, dst, size) do {\ + s##_WIN##w##_BASE = (src); \ + s##_WIN##w##_MMAP = (dst) | ADDRWIN_MAP_DST_##d; \ + s##_WIN##w##_MASK = ~(size-1); \ +} while (0) + +#define LOONGSON_ADDRWIN_CPUTOPCI(win, src, dst, size) \ + LOONGSON_ADDRWIN_CFG(CPU, PCI, win, src, dst, size) +#define LOONGSON_ADDRWIN_CPUTODDR(win, src, dst, size) \ + LOONGSON_ADDRWIN_CFG(CPU, DDR, win, src, dst, size) +#define LOONGSON_ADDRWIN_PCITODDR(win, src, dst, size) \ + LOONGSON_ADDRWIN_CFG(PCIDMA, DDR, win, src, dst, size) + +#endif /* ! CONFIG_CPU_SUPPORTS_ADDRWINCFG */ + +#endif /* __ASM_MACH_LOONGSON2EF_LOONGSON_H */ diff --git a/arch/mips/include/asm/mach-loongson64/machine.h b/arch/mips/include/asm/mach-loongson2ef/machine.h index 8ef7ea94a26d..4097267ef186 100644 --- a/arch/mips/include/asm/mach-loongson64/machine.h +++ b/arch/mips/include/asm/mach-loongson2ef/machine.h @@ -4,8 +4,8 @@ * Author: Wu Zhangjin <wuzhangjin@gmail.com> */ -#ifndef __ASM_MACH_LOONGSON64_MACHINE_H -#define __ASM_MACH_LOONGSON64_MACHINE_H +#ifndef __ASM_MACH_LOONGSON2EF_MACHINE_H +#define __ASM_MACH_LOONGSON2EF_MACHINE_H #ifdef CONFIG_LEMOTE_FULOONG2E @@ -20,10 +20,4 @@ #endif -#ifdef CONFIG_LOONGSON_MACH3X - -#define LOONGSON_MACHTYPE MACH_LOONGSON_GENERIC - -#endif /* CONFIG_LOONGSON_MACH3X */ - -#endif /* __ASM_MACH_LOONGSON64_MACHINE_H */ +#endif /* __ASM_MACH_LOONGSON2EF_MACHINE_H */ diff --git a/arch/mips/include/asm/mach-loongson2ef/mc146818rtc.h b/arch/mips/include/asm/mach-loongson2ef/mc146818rtc.h new file mode 100644 index 000000000000..00d602629a55 --- /dev/null +++ b/arch/mips/include/asm/mach-loongson2ef/mc146818rtc.h @@ -0,0 +1,36 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1998, 2001, 03, 07 by Ralf Baechle (ralf@linux-mips.org) + * + * RTC routines for PC style attached Dallas chip. + */ +#ifndef __ASM_MACH_LOONGSON2EF_MC146818RTC_H +#define __ASM_MACH_LOONGSON2EF_MC146818RTC_H + +#include <linux/io.h> + +#define RTC_PORT(x) (0x70 + (x)) +#define RTC_IRQ 8 + +static inline unsigned char CMOS_READ(unsigned long addr) +{ + outb_p(addr, RTC_PORT(0)); + return inb_p(RTC_PORT(1)); +} + +static inline void CMOS_WRITE(unsigned char data, unsigned long addr) +{ + outb_p(addr, RTC_PORT(0)); + outb_p(data, RTC_PORT(1)); +} + +#define RTC_ALWAYS_BCD 0 + +#ifndef mc146818_decode_year +#define mc146818_decode_year(year) ((year) < 70 ? (year) + 2000 : (year) + 1970) +#endif + +#endif /* __ASM_MACH_LOONGSON2EF_MC146818RTC_H */ diff --git a/arch/mips/include/asm/mach-loongson64/mem.h b/arch/mips/include/asm/mach-loongson2ef/mem.h index ce33c174c04d..d1d759b8974e 100644 --- a/arch/mips/include/asm/mach-loongson64/mem.h +++ b/arch/mips/include/asm/mach-loongson2ef/mem.h @@ -4,8 +4,8 @@ * Author: Wu Zhangjin <wuzhangjin@gmail.com> */ -#ifndef __ASM_MACH_LOONGSON64_MEM_H -#define __ASM_MACH_LOONGSON64_MEM_H +#ifndef __ASM_MACH_LOONGSON2EF_MEM_H +#define __ASM_MACH_LOONGSON2EF_MEM_H /* * high memory space @@ -34,4 +34,4 @@ #define LOONGSON_MMIO_MEM_END 0x80000000 #endif -#endif /* __ASM_MACH_LOONGSON64_MEM_H */ +#endif /* __ASM_MACH_LOONGSON2EF_MEM_H */ diff --git a/arch/mips/include/asm/mach-loongson2ef/pci.h b/arch/mips/include/asm/mach-loongson2ef/pci.h new file mode 100644 index 000000000000..5588c5bc5395 --- /dev/null +++ b/arch/mips/include/asm/mach-loongson2ef/pci.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2008 Zhang Le <r0bertz@gentoo.org> + * Copyright (c) 2009 Wu Zhangjin <wuzhangjin@gmail.com> + */ + +#ifndef __ASM_MACH_LOONGSON2EF_PCI_H_ +#define __ASM_MACH_LOONGSON2EF_PCI_H_ + +extern struct pci_ops loongson_pci_ops; + +/* this is an offset from mips_io_port_base */ +#define LOONGSON_PCI_IO_START 0x00004000UL + +#ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG + +/* + * we use address window2 to map cpu address space to pci space + * window2: cpu [1G, 2G] -> pci [1G, 2G] + * why not use window 0 & 1? because they are used by cpu when booting. + * window0: cpu [0, 256M] -> ddr [0, 256M] + * window1: cpu [256M, 512M] -> pci [256M, 512M] + */ + +/* the smallest LOONGSON_CPU_MEM_SRC can be 512M */ +#define LOONGSON_CPU_MEM_SRC 0x40000000ul /* 1G */ +#define LOONGSON_PCI_MEM_DST LOONGSON_CPU_MEM_SRC + +#define LOONGSON_PCI_MEM_START LOONGSON_PCI_MEM_DST +#define LOONGSON_PCI_MEM_END (0x80000000ul-1) /* 2G */ + +#define MMAP_CPUTOPCI_SIZE (LOONGSON_PCI_MEM_END - \ + LOONGSON_PCI_MEM_START + 1) + +#else /* loongson2f/32bit & loongson2e */ + +/* this pci memory space is mapped by pcimap in pci.c */ +#define LOONGSON_PCI_MEM_START LOONGSON_PCILO1_BASE +#define LOONGSON_PCI_MEM_END (LOONGSON_PCILO1_BASE + 0x04000000 * 2) + +/* this is an offset from mips_io_port_base */ +#define LOONGSON_PCI_IO_START 0x00004000UL + +#endif /* !CONFIG_CPU_SUPPORTS_ADDRWINCFG */ + +#endif /* !__ASM_MACH_LOONGSON2EF_PCI_H_ */ diff --git a/arch/mips/include/asm/mach-loongson2ef/spaces.h b/arch/mips/include/asm/mach-loongson2ef/spaces.h new file mode 100644 index 000000000000..ba4e8e9b618e --- /dev/null +++ b/arch/mips/include/asm/mach-loongson2ef/spaces.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_MACH_LOONGSON2EF_SPACES_H_ +#define __ASM_MACH_LOONGSON2EF_SPACES_H_ + +#if defined(CONFIG_64BIT) +#define CAC_BASE _AC(0x9800000000000000, UL) +#endif /* CONFIG_64BIT */ + +#include <asm/mach-generic/spaces.h> +#endif diff --git a/arch/mips/include/asm/mach-loongson32/prom.h b/arch/mips/include/asm/mach-loongson32/prom.h deleted file mode 100644 index cb789f18d790..000000000000 --- a/arch/mips/include/asm/mach-loongson32/prom.h +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2011 Zhang, Keguang <keguang.zhang@gmail.com> - */ - -#ifndef __ASM_MACH_LOONGSON32_PROM_H -#define __ASM_MACH_LOONGSON32_PROM_H - -#include <linux/io.h> -#include <linux/init.h> -#include <linux/irq.h> - -/* environment arguments from bootloader */ -extern unsigned long memsize, highmemsize; - -/* loongson-specific command line, env and memory initialization */ -extern char *prom_getenv(char *name); -extern void __init prom_init_cmdline(void); - -#endif /* __ASM_MACH_LOONGSON32_PROM_H */ diff --git a/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h index 4aca25f2ff06..7dc8d75445a9 100644 --- a/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h @@ -43,11 +43,8 @@ #define cpu_has_vint 0 #define cpu_has_vtag_icache 0 #define cpu_has_watch 1 - -#ifdef CONFIG_CPU_LOONGSON3 #define cpu_has_wsbh 1 #define cpu_has_ic_fills_f_dc 1 #define cpu_hwrena_impl_bits 0xc0000000 -#endif #endif /* __ASM_MACH_LOONGSON64_CPU_FEATURE_OVERRIDES_H */ diff --git a/arch/mips/include/asm/mach-loongson64/irq.h b/arch/mips/include/asm/mach-loongson64/irq.h index be9f727a9328..73a89913dc38 100644 --- a/arch/mips/include/asm/mach-loongson64/irq.h +++ b/arch/mips/include/asm/mach-loongson64/irq.h @@ -4,8 +4,6 @@ #include <boot_param.h> -#ifdef CONFIG_CPU_LOONGSON3 - /* cpu core interrupt numbers */ #define MIPS_CPU_IRQ_BASE 56 @@ -35,8 +33,6 @@ #define LOONGSON_INT_COREx_INTy(x, y) (1<<(x) | 1<<(y+4)) /* route to int y of core x */ -#endif - extern void fixup_irqs(void); extern void loongson3_ipi_interrupt(struct pt_regs *regs); diff --git a/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h b/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h index b5e288a12dfe..87a5bfbf8cfe 100644 --- a/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h +++ b/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h @@ -17,7 +17,6 @@ * Override macros used in arch/mips/kernel/head.S. */ .macro kernel_entry_setup -#ifdef CONFIG_CPU_LOONGSON3 .set push .set mips64 /* Set LPA on LOONGSON3 config3 */ @@ -30,23 +29,29 @@ mtc0 t0, CP0_PAGEGRAIN /* Enable STFill Buffer */ mfc0 t0, CP0_PRID + /* Loongson-3A R4+ */ + andi t1, t0, PRID_IMP_MASK + li t2, PRID_IMP_LOONGSON_64G + beq t1, t2, 1f + nop + /* Loongson-3A R2/R3 */ andi t0, (PRID_IMP_MASK | PRID_REV_MASK) - slti t0, (PRID_IMP_LOONGSON_64 | PRID_REV_LOONGSON3A_R2_0) - bnez t0, 1f + slti t0, (PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0) + bnez t0, 2f + nop +1: mfc0 t0, CP0_CONFIG6 or t0, 0x100 mtc0 t0, CP0_CONFIG6 -1: +2: _ehb .set pop -#endif .endm /* * Do SMP slave processor setup. */ .macro smp_slave_setup -#ifdef CONFIG_CPU_LOONGSON3 .set push .set mips64 /* Set LPA on LOONGSON3 config3 */ @@ -59,16 +64,23 @@ mtc0 t0, CP0_PAGEGRAIN /* Enable STFill Buffer */ mfc0 t0, CP0_PRID + /* Loongson-3A R4+ */ + andi t1, t0, PRID_IMP_MASK + li t2, PRID_IMP_LOONGSON_64G + beq t1, t2, 1f + nop + /* Loongson-3A R2/R3 */ andi t0, (PRID_IMP_MASK | PRID_REV_MASK) - slti t0, (PRID_IMP_LOONGSON_64 | PRID_REV_LOONGSON3A_R2_0) - bnez t0, 1f + slti t0, (PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0) + bnez t0, 2f + nop +1: mfc0 t0, CP0_CONFIG6 or t0, 0x100 mtc0 t0, CP0_CONFIG6 -1: +2: _ehb .set pop -#endif .endm #endif /* __ASM_MACH_LOONGSON64_KERNEL_ENTRY_H */ diff --git a/arch/mips/include/asm/mach-loongson64/loongson.h b/arch/mips/include/asm/mach-loongson64/loongson.h index 694a58574ec0..a8fce112a9b0 100644 --- a/arch/mips/include/asm/mach-loongson64/loongson.h +++ b/arch/mips/include/asm/mach-loongson64/loongson.h @@ -12,8 +12,6 @@ #include <linux/irq.h> #include <boot_param.h> -/* loongson internal northbridge initialization */ -extern void bonito_irq_init(void); /* machine-specific reboot/halt operation */ extern void mach_prepare_reboot(void); @@ -26,25 +24,9 @@ extern const struct plat_smp_ops loongson3_smp_ops; /* loongson-specific command line, env and memory initialization */ extern void __init prom_init_memory(void); -extern void __init prom_init_cmdline(void); -extern void __init prom_init_machtype(void); extern void __init prom_init_env(void); -#ifdef CONFIG_LOONGSON_UART_BASE -extern unsigned long _loongson_uart_base[], loongson_uart_base[]; -extern void prom_init_loongson_uart_base(void); -#endif - -static inline void prom_init_uart_base(void) -{ -#ifdef CONFIG_LOONGSON_UART_BASE - prom_init_loongson_uart_base(); -#endif -} /* irq operation functions */ -extern void bonito_irqdispatch(void); -extern void __init bonito_irq_init(void); -extern void __init mach_init_irq(void); extern void mach_irq_dispatch(unsigned int pending); extern int mach_i8259_irq(void); @@ -64,17 +46,6 @@ extern int mach_i8259_irq(void); #define LOONGSON3_REG32(base, x) \ (*(volatile u32 *)((char *)TO_UNCAC(base) + (x))) -#define LOONGSON_IRQ_BASE 32 -#define LOONGSON2_PERFCNT_IRQ (MIPS_CPU_IRQ_BASE + 6) /* cpu perf counter */ - -#include <linux/interrupt.h> -static inline void do_perfcnt_IRQ(void) -{ -#if IS_ENABLED(CONFIG_OPROFILE) - do_IRQ(LOONGSON2_PERFCNT_IRQ); -#endif -} - #define LOONGSON_FLASH_BASE 0x1c000000 #define LOONGSON_FLASH_SIZE 0x02000000 /* 32M */ #define LOONGSON_FLASH_TOP (LOONGSON_FLASH_BASE+LOONGSON_FLASH_SIZE-1) @@ -109,11 +80,7 @@ static inline void do_perfcnt_IRQ(void) #define LOONGSON_PCICFG_SIZE 0x00000800 /* 2K */ #define LOONGSON_PCICFG_TOP (LOONGSON_PCICFG_BASE+LOONGSON_PCICFG_SIZE-1) -#ifdef CONFIG_CPU_LOONGSON3 #define LOONGSON_PCIIO_BASE loongson_sysconf.pci_io_base -#else -#define LOONGSON_PCIIO_BASE 0x1fd00000 -#endif #define LOONGSON_PCIIO_SIZE 0x00100000 /* 1M */ #define LOONGSON_PCIIO_TOP (LOONGSON_PCIIO_BASE+LOONGSON_PCIIO_SIZE-1) @@ -270,86 +237,4 @@ extern u64 loongson_freqctrl[MAX_PACKAGES]; #define LOONGSON_PCIMAP_WIN(WIN, ADDR) \ ((((ADDR)>>26) & LOONGSON_PCIMAP_PCIMAP_LO0) << ((WIN)*6)) -#ifdef CONFIG_CPU_SUPPORTS_CPUFREQ -#include <linux/cpufreq.h> -extern struct cpufreq_frequency_table loongson2_clockmod_table[]; -#endif - -/* - * address windows configuration module - * - * loongson2e do not have this module - */ -#ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG - -/* address window config module base address */ -#define LOONGSON_ADDRWINCFG_BASE 0x3ff00000ul -#define LOONGSON_ADDRWINCFG_SIZE 0x180 - -extern unsigned long _loongson_addrwincfg_base; -#define LOONGSON_ADDRWINCFG(offset) \ - (*(volatile u64 *)(_loongson_addrwincfg_base + (offset))) - -#define CPU_WIN0_BASE LOONGSON_ADDRWINCFG(0x00) -#define CPU_WIN1_BASE LOONGSON_ADDRWINCFG(0x08) -#define CPU_WIN2_BASE LOONGSON_ADDRWINCFG(0x10) -#define CPU_WIN3_BASE LOONGSON_ADDRWINCFG(0x18) - -#define CPU_WIN0_MASK LOONGSON_ADDRWINCFG(0x20) -#define CPU_WIN1_MASK LOONGSON_ADDRWINCFG(0x28) -#define CPU_WIN2_MASK LOONGSON_ADDRWINCFG(0x30) -#define CPU_WIN3_MASK LOONGSON_ADDRWINCFG(0x38) - -#define CPU_WIN0_MMAP LOONGSON_ADDRWINCFG(0x40) -#define CPU_WIN1_MMAP LOONGSON_ADDRWINCFG(0x48) -#define CPU_WIN2_MMAP LOONGSON_ADDRWINCFG(0x50) -#define CPU_WIN3_MMAP LOONGSON_ADDRWINCFG(0x58) - -#define PCIDMA_WIN0_BASE LOONGSON_ADDRWINCFG(0x60) -#define PCIDMA_WIN1_BASE LOONGSON_ADDRWINCFG(0x68) -#define PCIDMA_WIN2_BASE LOONGSON_ADDRWINCFG(0x70) -#define PCIDMA_WIN3_BASE LOONGSON_ADDRWINCFG(0x78) - -#define PCIDMA_WIN0_MASK LOONGSON_ADDRWINCFG(0x80) -#define PCIDMA_WIN1_MASK LOONGSON_ADDRWINCFG(0x88) -#define PCIDMA_WIN2_MASK LOONGSON_ADDRWINCFG(0x90) -#define PCIDMA_WIN3_MASK LOONGSON_ADDRWINCFG(0x98) - -#define PCIDMA_WIN0_MMAP LOONGSON_ADDRWINCFG(0xa0) -#define PCIDMA_WIN1_MMAP LOONGSON_ADDRWINCFG(0xa8) -#define PCIDMA_WIN2_MMAP LOONGSON_ADDRWINCFG(0xb0) -#define PCIDMA_WIN3_MMAP LOONGSON_ADDRWINCFG(0xb8) - -#define ADDRWIN_WIN0 0 -#define ADDRWIN_WIN1 1 -#define ADDRWIN_WIN2 2 -#define ADDRWIN_WIN3 3 - -#define ADDRWIN_MAP_DST_DDR 0 -#define ADDRWIN_MAP_DST_PCI 1 -#define ADDRWIN_MAP_DST_LIO 1 - -/* - * s: CPU, PCIDMA - * d: DDR, PCI, LIO - * win: 0, 1, 2, 3 - * src: map source - * dst: map destination - * size: ~mask + 1 - */ -#define LOONGSON_ADDRWIN_CFG(s, d, w, src, dst, size) do {\ - s##_WIN##w##_BASE = (src); \ - s##_WIN##w##_MMAP = (dst) | ADDRWIN_MAP_DST_##d; \ - s##_WIN##w##_MASK = ~(size-1); \ -} while (0) - -#define LOONGSON_ADDRWIN_CPUTOPCI(win, src, dst, size) \ - LOONGSON_ADDRWIN_CFG(CPU, PCI, win, src, dst, size) -#define LOONGSON_ADDRWIN_CPUTODDR(win, src, dst, size) \ - LOONGSON_ADDRWIN_CFG(CPU, DDR, win, src, dst, size) -#define LOONGSON_ADDRWIN_PCITODDR(win, src, dst, size) \ - LOONGSON_ADDRWIN_CFG(PCIDMA, DDR, win, src, dst, size) - -#endif /* ! CONFIG_CPU_SUPPORTS_ADDRWINCFG */ - #endif /* __ASM_MACH_LOONGSON64_LOONGSON_H */ diff --git a/arch/mips/include/asm/mach-loongson64/loongson_regs.h b/arch/mips/include/asm/mach-loongson64/loongson_regs.h new file mode 100644 index 000000000000..363a47a5d26e --- /dev/null +++ b/arch/mips/include/asm/mach-loongson64/loongson_regs.h @@ -0,0 +1,227 @@ +/* + * Read/Write Loongson Extension Registers + */ + +#ifndef _LOONGSON_REGS_H_ +#define _LOONGSON_REGS_H_ + +#include <linux/types.h> +#include <linux/bits.h> + +#include <asm/mipsregs.h> +#include <asm/cpu.h> + +static inline bool cpu_has_cfg(void) +{ + return ((read_c0_prid() & PRID_IMP_MASK) == PRID_IMP_LOONGSON_64G); +} + +static inline u32 read_cpucfg(u32 reg) +{ + u32 __res; + + __asm__ __volatile__( + "parse_r __res,%0\n\t" + "parse_r reg,%1\n\t" + ".insn \n\t" + ".word (0xc8080118 | (reg << 21) | (__res << 11))\n\t" + :"=r"(__res) + :"r"(reg) + : + ); + return __res; +} + +/* Bit Domains for CFG registers */ +#define LOONGSON_CFG0 0x0 +#define LOONGSON_CFG0_PRID GENMASK(31, 0) + +#define LOONGSON_CFG1 0x1 +#define LOONGSON_CFG1_FP BIT(0) +#define LOONGSON_CFG1_FPREV GENMASK(3, 1) +#define LOONGSON_CFG1_MMI BIT(4) +#define LOONGSON_CFG1_MSA1 BIT(5) +#define LOONGSON_CFG1_MSA2 BIT(6) +#define LOONGSON_CFG1_CGP BIT(7) +#define LOONGSON_CFG1_WRP BIT(8) +#define LOONGSON_CFG1_LSX1 BIT(9) +#define LOONGSON_CFG1_LSX2 BIT(10) +#define LOONGSON_CFG1_LASX BIT(11) +#define LOONGSON_CFG1_R6FXP BIT(12) +#define LOONGSON_CFG1_R6CRCP BIT(13) +#define LOONGSON_CFG1_R6FPP BIT(14) +#define LOONGSON_CFG1_CNT64 BIT(15) +#define LOONGSON_CFG1_LSLDR0 BIT(16) +#define LOONGSON_CFG1_LSPREF BIT(17) +#define LOONGSON_CFG1_LSPREFX BIT(18) +#define LOONGSON_CFG1_LSSYNCI BIT(19) +#define LOONGSON_CFG1_LSUCA BIT(20) +#define LOONGSON_CFG1_LLSYNC BIT(21) +#define LOONGSON_CFG1_TGTSYNC BIT(22) +#define LOONGSON_CFG1_LLEXC BIT(23) +#define LOONGSON_CFG1_SCRAND BIT(24) +#define LOONGSON_CFG1_MUALP BIT(25) +#define LOONGSON_CFG1_KMUALEN BIT(26) +#define LOONGSON_CFG1_ITLBT BIT(27) +#define LOONGSON_CFG1_LSUPERF BIT(28) +#define LOONGSON_CFG1_SFBP BIT(29) +#define LOONGSON_CFG1_CDMAP BIT(30) + +#define LOONGSON_CFG2 0x2 +#define LOONGSON_CFG2_LEXT1 BIT(0) +#define LOONGSON_CFG2_LEXT2 BIT(1) +#define LOONGSON_CFG2_LEXT3 BIT(2) +#define LOONGSON_CFG2_LSPW BIT(3) +#define LOONGSON_CFG2_LBT1 BIT(4) +#define LOONGSON_CFG2_LBT2 BIT(5) +#define LOONGSON_CFG2_LBT3 BIT(6) +#define LOONGSON_CFG2_LBTMMU BIT(7) +#define LOONGSON_CFG2_LPMP BIT(8) +#define LOONGSON_CFG2_LPMPREV GENMASK(11, 9) +#define LOONGSON_CFG2_LAMO BIT(12) +#define LOONGSON_CFG2_LPIXU BIT(13) +#define LOONGSON_CFG2_LPIXUN BIT(14) +#define LOONGSON_CFG2_LZVP BIT(15) +#define LOONGSON_CFG2_LZVREV GENMASK(18, 16) +#define LOONGSON_CFG2_LGFTP BIT(19) +#define LOONGSON_CFG2_LGFTPREV GENMASK(22, 20) +#define LOONGSON_CFG2_LLFTP BIT(23) +#define LOONGSON_CFG2_LLFTPREV GENMASK(26, 24) +#define LOONGSON_CFG2_LCSRP BIT(27) +#define LOONGSON_CFG2_LDISBLIKELY BIT(28) + +#define LOONGSON_CFG3 0x3 +#define LOONGSON_CFG3_LCAMP BIT(0) +#define LOONGSON_CFG3_LCAMREV GENMASK(3, 1) +#define LOONGSON_CFG3_LCAMNUM GENMASK(11, 4) +#define LOONGSON_CFG3_LCAMKW GENMASK(19, 12) +#define LOONGSON_CFG3_LCAMVW GENMASK(27, 20) + +#define LOONGSON_CFG4 0x4 +#define LOONGSON_CFG4_CCFREQ GENMASK(31, 0) + +#define LOONGSON_CFG5 0x5 +#define LOONGSON_CFG5_CFM GENMASK(15, 0) +#define LOONGSON_CFG5_CFD GENMASK(31, 16) + +#define LOONGSON_CFG6 0x6 + +#define LOONGSON_CFG7 0x7 +#define LOONGSON_CFG7_GCCAEQRP BIT(0) +#define LOONGSON_CFG7_UCAWINP BIT(1) + +static inline bool cpu_has_csr(void) +{ + if (cpu_has_cfg()) + return (read_cpucfg(LOONGSON_CFG2) & LOONGSON_CFG2_LCSRP); + + return false; +} + +static inline u32 csr_readl(u32 reg) +{ + u32 __res; + + /* RDCSR reg, val */ + __asm__ __volatile__( + "parse_r __res,%0\n\t" + "parse_r reg,%1\n\t" + ".insn \n\t" + ".word (0xc8000118 | (reg << 21) | (__res << 11))\n\t" + :"=r"(__res) + :"r"(reg) + : + ); + return __res; +} + +static inline u64 csr_readq(u32 reg) +{ + u64 __res; + + /* DWRCSR reg, val */ + __asm__ __volatile__( + "parse_r __res,%0\n\t" + "parse_r reg,%1\n\t" + ".insn \n\t" + ".word (0xc8020118 | (reg << 21) | (__res << 11))\n\t" + :"=r"(__res) + :"r"(reg) + : + ); + return __res; +} + +static inline void csr_writel(u32 val, u32 reg) +{ + /* WRCSR reg, val */ + __asm__ __volatile__( + "parse_r reg,%0\n\t" + "parse_r val,%1\n\t" + ".insn \n\t" + ".word (0xc8010118 | (reg << 21) | (val << 11))\n\t" + : + :"r"(reg),"r"(val) + : + ); +} + +static inline void csr_writeq(u64 val, u32 reg) +{ + /* DWRCSR reg, val */ + __asm__ __volatile__( + "parse_r reg,%0\n\t" + "parse_r val,%1\n\t" + ".insn \n\t" + ".word (0xc8030118 | (reg << 21) | (val << 11))\n\t" + : + :"r"(reg),"r"(val) + : + ); +} + +/* Public CSR Register can also be accessed with regular addresses */ +#define CSR_PUBLIC_MMIO_BASE 0x1fe00000 + +#define MMIO_CSR(x) (void *)TO_UNCAC(CSR_PUBLIC_MMIO_BASE + x) + +#define LOONGSON_CSR_FEATURES 0x8 +#define LOONGSON_CSRF_TEMP BIT(0) +#define LOONGSON_CSRF_NODECNT BIT(1) +#define LOONGSON_CSRF_MSI BIT(2) +#define LOONGSON_CSRF_EXTIOI BIT(3) +#define LOONGSON_CSRF_IPI BIT(4) +#define LOONGSON_CSRF_FREQ BIT(5) + +#define LOONGSON_CSR_VENDOR 0x10 /* Vendor name string, should be "Loongson" */ +#define LOONGSON_CSR_CPUNAME 0x20 /* Processor name string */ +#define LOONGSON_CSR_NODECNT 0x408 +#define LOONGSON_CSR_CPUTEMP 0x428 + +/* PerCore CSR, only accessable by local cores */ +#define LOONGSON_CSR_IPI_STATUS 0x1000 +#define LOONGSON_CSR_IPI_EN 0x1004 +#define LOONGSON_CSR_IPI_SET 0x1008 +#define LOONGSON_CSR_IPI_CLEAR 0x100c +#define LOONGSON_CSR_IPI_SEND 0x1040 +#define CSR_IPI_SEND_IP_SHIFT 0 +#define CSR_IPI_SEND_CPU_SHIFT 16 +#define CSR_IPI_SEND_BLOCK BIT(31) + +static inline u64 drdtime(void) +{ + int rID = 0; + u64 val = 0; + + __asm__ __volatile__( + "parse_r rID,%0\n\t" + "parse_r val,%1\n\t" + ".insn \n\t" + ".word (0xc8090118 | (rID << 21) | (val << 11))\n\t" + :"=r"(rID),"=r"(val) + : + ); + return val; +} + +#endif diff --git a/arch/mips/include/asm/mach-loongson64/mmzone.h b/arch/mips/include/asm/mach-loongson64/mmzone.h index 62073d60739f..3a25dbd3b3e9 100644 --- a/arch/mips/include/asm/mach-loongson64/mmzone.h +++ b/arch/mips/include/asm/mach-loongson64/mmzone.h @@ -6,8 +6,8 @@ * Huacai Chen, chenhc@lemote.com * Xiaofu Meng, Shuangshuang Zhang */ -#ifndef _ASM_MACH_MMZONE_H -#define _ASM_MACH_MMZONE_H +#ifndef _ASM_MACH_LOONGSON64_MMZONE_H +#define _ASM_MACH_LOONGSON64_MMZONE_H #include <boot_param.h> #define NODE_ADDRSPACE_SHIFT 44 @@ -19,30 +19,9 @@ #define pa_to_nid(addr) (((addr) & 0xf00000000000) >> NODE_ADDRSPACE_SHIFT) #define nid_to_addrbase(nid) ((nid) << NODE_ADDRSPACE_SHIFT) -#define LEVELS_PER_SLICE 128 +extern struct pglist_data *__node_data[]; -struct slice_data { - unsigned long irq_enable_mask[2]; - int level_to_irq[LEVELS_PER_SLICE]; -}; - -struct hub_data { - cpumask_t h_cpus; - unsigned long slice_map; - unsigned long irq_alloc_mask[2]; - struct slice_data slice[2]; -}; - -struct node_data { - struct pglist_data pglist; - struct hub_data hub; - cpumask_t cpumask; -}; - -extern struct node_data *__node_data[]; - -#define NODE_DATA(n) (&__node_data[(n)]->pglist) -#define hub_data(n) (&__node_data[(n)]->hub) +#define NODE_DATA(n) (__node_data[n]) extern void setup_zero_pages(void); extern void __init prom_init_numa_memory(void); diff --git a/arch/mips/include/asm/mach-loongson64/pci.h b/arch/mips/include/asm/mach-loongson64/pci.h index 97f807fb2117..8b59d64a23e8 100644 --- a/arch/mips/include/asm/mach-loongson64/pci.h +++ b/arch/mips/include/asm/mach-loongson64/pci.h @@ -12,39 +12,8 @@ extern struct pci_ops loongson_pci_ops; /* this is an offset from mips_io_port_base */ #define LOONGSON_PCI_IO_START 0x00004000UL -#ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG - -/* - * we use address window2 to map cpu address space to pci space - * window2: cpu [1G, 2G] -> pci [1G, 2G] - * why not use window 0 & 1? because they are used by cpu when booting. - * window0: cpu [0, 256M] -> ddr [0, 256M] - * window1: cpu [256M, 512M] -> pci [256M, 512M] - */ - -/* the smallest LOONGSON_CPU_MEM_SRC can be 512M */ -#define LOONGSON_CPU_MEM_SRC 0x40000000ul /* 1G */ -#define LOONGSON_PCI_MEM_DST LOONGSON_CPU_MEM_SRC - -#define LOONGSON_PCI_MEM_START LOONGSON_PCI_MEM_DST -#define LOONGSON_PCI_MEM_END (0x80000000ul-1) /* 2G */ - -#define MMAP_CPUTOPCI_SIZE (LOONGSON_PCI_MEM_END - \ - LOONGSON_PCI_MEM_START + 1) - -#else /* loongson2f/32bit & loongson2e */ - -/* this pci memory space is mapped by pcimap in pci.c */ -#ifdef CONFIG_CPU_LOONGSON3 #define LOONGSON_PCI_MEM_START 0x40000000UL #define LOONGSON_PCI_MEM_END 0x7effffffUL -#else -#define LOONGSON_PCI_MEM_START LOONGSON_PCILO1_BASE -#define LOONGSON_PCI_MEM_END (LOONGSON_PCILO1_BASE + 0x04000000 * 2) -#endif -/* this is an offset from mips_io_port_base */ -#define LOONGSON_PCI_IO_START 0x00004000UL -#endif /* !CONFIG_CPU_SUPPORTS_ADDRWINCFG */ #endif /* !__ASM_MACH_LOONGSON64_PCI_H_ */ diff --git a/arch/mips/include/asm/mach-loongson64/topology.h b/arch/mips/include/asm/mach-loongson64/topology.h index 7ff819ab308a..3414a1fd1783 100644 --- a/arch/mips/include/asm/mach-loongson64/topology.h +++ b/arch/mips/include/asm/mach-loongson64/topology.h @@ -5,7 +5,9 @@ #ifdef CONFIG_NUMA #define cpu_to_node(cpu) (cpu_logical_map(cpu) >> 2) -#define cpumask_of_node(node) (&__node_data[(node)]->cpumask) + +extern cpumask_t __node_cpumask[]; +#define cpumask_of_node(node) (&__node_cpumask[node]) struct pci_bus; extern int pcibus_to_node(struct pci_bus *); diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index bdbdc19a2b8f..0d5a30988697 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -689,6 +689,9 @@ #define MIPS_CONF7_IAR (_ULCAST_(1) << 10) #define MIPS_CONF7_AR (_ULCAST_(1) << 16) +/* Ingenic HPTLB off bits */ +#define XBURST_PAGECTRL_HPTLB_DIS 0xa9000000 + /* Ingenic Config7 bits */ #define MIPS_CONF7_BTB_LOOP_EN (_ULCAST_(1) << 4) @@ -1971,6 +1974,9 @@ do { \ #define read_c0_brcm_sleepcount() __read_32bit_c0_register($22, 7) #define write_c0_brcm_sleepcount(val) __write_32bit_c0_register($22, 7, val) +/* Ingenic page ctrl register */ +#define write_c0_page_ctrl(val) __write_32bit_c0_register($5, 4, val) + /* * Macros to access the guest system control coprocessor */ diff --git a/arch/mips/include/asm/module.h b/arch/mips/include/asm/module.h index ed70994fbbec..9846047b3d3d 100644 --- a/arch/mips/include/asm/module.h +++ b/arch/mips/include/asm/module.h @@ -119,12 +119,12 @@ search_module_dbetables(unsigned long addr) #define MODULE_PROC_FAMILY "RM7000 " #elif defined CONFIG_CPU_SB1 #define MODULE_PROC_FAMILY "SB1 " -#elif defined CONFIG_CPU_LOONGSON1 -#define MODULE_PROC_FAMILY "LOONGSON1 " -#elif defined CONFIG_CPU_LOONGSON2 -#define MODULE_PROC_FAMILY "LOONGSON2 " -#elif defined CONFIG_CPU_LOONGSON3 -#define MODULE_PROC_FAMILY "LOONGSON3 " +#elif defined CONFIG_CPU_LOONGSON32 +#define MODULE_PROC_FAMILY "LOONGSON32 " +#elif defined CONFIG_CPU_LOONGSON2EF +#define MODULE_PROC_FAMILY "LOONGSON2EF " +#elif defined CONFIG_CPU_LOONGSON64 +#define MODULE_PROC_FAMILY "LOONGSON64 " #elif defined CONFIG_CPU_CAVIUM_OCTEON #define MODULE_PROC_FAMILY "OCTEON " #elif defined CONFIG_CPU_XLR diff --git a/arch/mips/include/asm/octeon/cvmx-ipd.h b/arch/mips/include/asm/octeon/cvmx-ipd.h index cbdc14b77435..adab7b54c3b4 100644 --- a/arch/mips/include/asm/octeon/cvmx-ipd.h +++ b/arch/mips/include/asm/octeon/cvmx-ipd.h @@ -36,6 +36,7 @@ #include <asm/octeon/octeon-feature.h> #include <asm/octeon/cvmx-ipd-defs.h> +#include <asm/octeon/cvmx-pip-defs.h> enum cvmx_ipd_mode { CVMX_IPD_OPC_MODE_STT = 0LL, /* All blocks DRAM, not cached in L2 */ diff --git a/arch/mips/include/asm/pci/bridge.h b/arch/mips/include/asm/pci/bridge.h index a92cd30b48c9..3bc630ff9ad4 100644 --- a/arch/mips/include/asm/pci/bridge.h +++ b/arch/mips/include/asm/pci/bridge.h @@ -807,6 +807,7 @@ struct bridge_controller { unsigned long intr_addr; struct irq_domain *domain; unsigned int pci_int[8]; + u32 ioc3_sid[8]; nasid_t nasid; }; diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h index 166842337eb2..fa77cb71f303 100644 --- a/arch/mips/include/asm/pgalloc.h +++ b/arch/mips/include/asm/pgalloc.h @@ -96,9 +96,9 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud) free_pages((unsigned long)pud, PUD_ORDER); } -static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) +static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud) { - set_pgd(pgd, __pgd((unsigned long)pud)); + set_p4d(p4d, __p4d((unsigned long)pud)); } #define __pud_free_tlb(tlb, x, addr) pud_free((tlb)->mm, x) diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h index ba967148b016..1945c8970141 100644 --- a/arch/mips/include/asm/pgtable-32.h +++ b/arch/mips/include/asm/pgtable-32.h @@ -16,7 +16,6 @@ #include <asm/cachectl.h> #include <asm/fixmap.h> -#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #ifdef CONFIG_HIGHMEM @@ -196,14 +195,11 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot) #define pte_page(x) pfn_to_page(pte_pfn(x)) -#define __pgd_offset(address) pgd_index(address) -#define __pud_offset(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) -#define __pmd_offset(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) - /* to find an entry in a kernel page-table-directory */ #define pgd_offset_k(address) pgd_offset(&init_mm, address) #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) +#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) /* to find an entry in a page-table-directory */ diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h index 93a9dce31f25..f92716cfa4f4 100644 --- a/arch/mips/include/asm/pgtable-64.h +++ b/arch/mips/include/asm/pgtable-64.h @@ -17,11 +17,12 @@ #include <asm/cachectl.h> #include <asm/fixmap.h> -#define __ARCH_USE_5LEVEL_HACK -#if defined(CONFIG_PAGE_SIZE_64KB) && !defined(CONFIG_MIPS_VA_BITS_48) +#if CONFIG_PGTABLE_LEVELS == 2 #include <asm-generic/pgtable-nopmd.h> -#elif !(defined(CONFIG_PAGE_SIZE_4KB) && defined(CONFIG_MIPS_VA_BITS_48)) +#elif CONFIG_PGTABLE_LEVELS == 3 #include <asm-generic/pgtable-nopud.h> +#else +#include <asm-generic/pgtable-nop4d.h> #endif /* @@ -186,44 +187,49 @@ extern pud_t invalid_pud_table[PTRS_PER_PUD]; /* * Empty pgd entries point to the invalid_pud_table. */ -static inline int pgd_none(pgd_t pgd) +static inline int p4d_none(p4d_t p4d) { - return pgd_val(pgd) == (unsigned long)invalid_pud_table; + return p4d_val(p4d) == (unsigned long)invalid_pud_table; } -static inline int pgd_bad(pgd_t pgd) +static inline int p4d_bad(p4d_t p4d) { - if (unlikely(pgd_val(pgd) & ~PAGE_MASK)) + if (unlikely(p4d_val(p4d) & ~PAGE_MASK)) return 1; return 0; } -static inline int pgd_present(pgd_t pgd) +static inline int p4d_present(p4d_t p4d) { - return pgd_val(pgd) != (unsigned long)invalid_pud_table; + return p4d_val(p4d) != (unsigned long)invalid_pud_table; } -static inline void pgd_clear(pgd_t *pgdp) +static inline void p4d_clear(p4d_t *p4dp) { - pgd_val(*pgdp) = (unsigned long)invalid_pud_table; + p4d_val(*p4dp) = (unsigned long)invalid_pud_table; } #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) -static inline unsigned long pgd_page_vaddr(pgd_t pgd) +static inline unsigned long p4d_page_vaddr(p4d_t p4d) { - return pgd_val(pgd); + return p4d_val(p4d); } -static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) +#define p4d_phys(p4d) virt_to_phys((void *)p4d_val(p4d)) +#define p4d_page(p4d) (pfn_to_page(p4d_phys(p4d) >> PAGE_SHIFT)) + +#define p4d_index(address) (((address) >> P4D_SHIFT) & (PTRS_PER_P4D - 1)) + +static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) { - return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(address); + return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address); } -static inline void set_pgd(pgd_t *pgd, pgd_t pgdval) +static inline void set_p4d(p4d_t *p4d, p4d_t p4dval) { - *pgd = pgdval; + *p4d = p4dval; } #endif @@ -314,10 +320,6 @@ static inline void pud_clear(pud_t *pudp) #define pfn_pmd(pfn, prot) __pmd(((pfn) << _PFN_SHIFT) | pgprot_val(prot)) #endif -#define __pgd_offset(address) pgd_index(address) -#define __pud_offset(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) -#define __pmd_offset(address) pmd_index(address) - /* to find an entry in a kernel page-table-directory */ #define pgd_offset_k(address) pgd_offset(&init_mm, address) diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index f85bd5b15f51..91b89aab1787 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -644,17 +644,6 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, #include <asm-generic/pgtable.h> /* - * uncached accelerated TLB map for video memory access - */ -#ifdef CONFIG_CPU_SUPPORTS_UNCACHED_ACCELERATED -#define __HAVE_PHYS_MEM_ACCESS_PROT - -struct file; -pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, - unsigned long size, pgprot_t vma_prot); -#endif - -/* * We provide our own get_unmapped area to cope with the virtual aliasing * constraints placed on us by the cache architecture. */ diff --git a/arch/mips/include/asm/pmon.h b/arch/mips/include/asm/pmon.h deleted file mode 100644 index 6ad519189ce2..000000000000 --- a/arch/mips/include/asm/pmon.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2004 by Ralf Baechle - * - * The cpustart method is a PMC-Sierra's function to start the secondary CPU. - * Stock PMON 2000 has the smpfork, semlock and semunlock methods instead. - */ -#ifndef _ASM_PMON_H -#define _ASM_PMON_H - -struct callvectors { - int (*open) (char*, int, int); - int (*close) (int); - int (*read) (int, void*, int); - int (*write) (int, void*, int); - off_t (*lseek) (int, off_t, int); - int (*printf) (const char*, ...); - void (*cacheflush) (void); - char* (*gets) (char*); - union { - int (*smpfork) (unsigned long cp, char *sp); - int (*cpustart) (long, void (*)(void), void *, long); - } _s; - int (*semlock) (int sem); - void (*semunlock) (int sem); -}; - -extern struct callvectors *debug_vectors; - -#define pmon_open(name, flags, mode) debug_vectors->open(name, flage, mode) -#define pmon_close(fd) debug_vectors->close(fd) -#define pmon_read(fd, buf, count) debug_vectors->read(fd, buf, count) -#define pmon_write(fd, buf, count) debug_vectors->write(fd, buf, count) -#define pmon_lseek(fd, off, whence) debug_vectors->lseek(fd, off, whence) -#define pmon_printf(fmt...) debug_vectors->printf(fmt) -#define pmon_cacheflush() debug_vectors->cacheflush() -#define pmon_gets(s) debug_vectors->gets(s) -#define pmon_cpustart(n, f, sp, gp) debug_vectors->_s.cpustart(n, f, sp, gp) -#define pmon_smpfork(cp, sp) debug_vectors->_s.smpfork(cp, sp) -#define pmon_semlock(sem) debug_vectors->semlock(sem) -#define pmon_semunlock(sem) debug_vectors->semunlock(sem) - -#endif /* _ASM_PMON_H */ diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h index fba18d4a9190..7619ad319400 100644 --- a/arch/mips/include/asm/processor.h +++ b/arch/mips/include/asm/processor.h @@ -385,7 +385,7 @@ unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(tsk) (task_pt_regs(tsk)->regs[29]) #define KSTK_STATUS(tsk) (task_pt_regs(tsk)->cp0_status) -#ifdef CONFIG_CPU_LOONGSON3 +#ifdef CONFIG_CPU_LOONGSON64 /* * Loongson-3's SFB (Store-Fill-Buffer) may buffer writes indefinitely when a * tight read loop is executed, because reads take priority over writes & the diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h index 7f4a32d3345a..15ab16f99f28 100644 --- a/arch/mips/include/asm/r4kcache.h +++ b/arch/mips/include/asm/r4kcache.h @@ -15,12 +15,14 @@ #include <linux/stringify.h> #include <asm/asm.h> +#include <asm/asm-eva.h> #include <asm/cacheops.h> #include <asm/compiler.h> #include <asm/cpu-features.h> #include <asm/cpu-type.h> #include <asm/mipsmtregs.h> #include <asm/mmzone.h> +#include <asm/unroll.h> #include <linux/uaccess.h> /* for uaccess_kernel() */ extern void (*r4k_blast_dcache)(void); @@ -39,16 +41,19 @@ extern void (*r4k_blast_icache)(void); */ #define INDEX_BASE CKSEG0 -#define cache_op(op,addr) \ +#define _cache_op(insn, op, addr) \ __asm__ __volatile__( \ " .set push \n" \ " .set noreorder \n" \ " .set "MIPS_ISA_ARCH_LEVEL" \n" \ - " cache %0, %1 \n" \ + " " insn("%0", "%1") " \n" \ " .set pop \n" \ : \ : "i" (op), "R" (*(unsigned char *)(addr))) +#define cache_op(op, addr) \ + _cache_op(kernel_cache, op, addr) + static inline void flush_icache_line_indexed(unsigned long addr) { cache_op(Index_Invalidate_I, addr); @@ -67,7 +72,7 @@ static inline void flush_scache_line_indexed(unsigned long addr) static inline void flush_icache_line(unsigned long addr) { switch (boot_cpu_type()) { - case CPU_LOONGSON2: + case CPU_LOONGSON2EF: cache_op(Hit_Invalidate_I_Loongson2, addr); break; @@ -149,7 +154,7 @@ static inline void flush_scache_line(unsigned long addr) static inline int protected_flush_icache_line(unsigned long addr) { switch (boot_cpu_type()) { - case CPU_LOONGSON2: + case CPU_LOONGSON2EF: return protected_cache_op(Hit_Invalidate_I_Loongson2, addr); default: @@ -193,338 +198,10 @@ static inline void invalidate_tcache_page(unsigned long addr) cache_op(Page_Invalidate_T, addr); } -#ifndef CONFIG_CPU_MIPSR6 -#define cache16_unroll32(base,op) \ - __asm__ __volatile__( \ - " .set push \n" \ - " .set noreorder \n" \ - " .set mips3 \n" \ - " cache %1, 0x000(%0); cache %1, 0x010(%0) \n" \ - " cache %1, 0x020(%0); cache %1, 0x030(%0) \n" \ - " cache %1, 0x040(%0); cache %1, 0x050(%0) \n" \ - " cache %1, 0x060(%0); cache %1, 0x070(%0) \n" \ - " cache %1, 0x080(%0); cache %1, 0x090(%0) \n" \ - " cache %1, 0x0a0(%0); cache %1, 0x0b0(%0) \n" \ - " cache %1, 0x0c0(%0); cache %1, 0x0d0(%0) \n" \ - " cache %1, 0x0e0(%0); cache %1, 0x0f0(%0) \n" \ - " cache %1, 0x100(%0); cache %1, 0x110(%0) \n" \ - " cache %1, 0x120(%0); cache %1, 0x130(%0) \n" \ - " cache %1, 0x140(%0); cache %1, 0x150(%0) \n" \ - " cache %1, 0x160(%0); cache %1, 0x170(%0) \n" \ - " cache %1, 0x180(%0); cache %1, 0x190(%0) \n" \ - " cache %1, 0x1a0(%0); cache %1, 0x1b0(%0) \n" \ - " cache %1, 0x1c0(%0); cache %1, 0x1d0(%0) \n" \ - " cache %1, 0x1e0(%0); cache %1, 0x1f0(%0) \n" \ - " .set pop \n" \ - : \ - : "r" (base), \ - "i" (op)); - -#define cache32_unroll32(base,op) \ - __asm__ __volatile__( \ - " .set push \n" \ - " .set noreorder \n" \ - " .set mips3 \n" \ - " cache %1, 0x000(%0); cache %1, 0x020(%0) \n" \ - " cache %1, 0x040(%0); cache %1, 0x060(%0) \n" \ - " cache %1, 0x080(%0); cache %1, 0x0a0(%0) \n" \ - " cache %1, 0x0c0(%0); cache %1, 0x0e0(%0) \n" \ - " cache %1, 0x100(%0); cache %1, 0x120(%0) \n" \ - " cache %1, 0x140(%0); cache %1, 0x160(%0) \n" \ - " cache %1, 0x180(%0); cache %1, 0x1a0(%0) \n" \ - " cache %1, 0x1c0(%0); cache %1, 0x1e0(%0) \n" \ - " cache %1, 0x200(%0); cache %1, 0x220(%0) \n" \ - " cache %1, 0x240(%0); cache %1, 0x260(%0) \n" \ - " cache %1, 0x280(%0); cache %1, 0x2a0(%0) \n" \ - " cache %1, 0x2c0(%0); cache %1, 0x2e0(%0) \n" \ - " cache %1, 0x300(%0); cache %1, 0x320(%0) \n" \ - " cache %1, 0x340(%0); cache %1, 0x360(%0) \n" \ - " cache %1, 0x380(%0); cache %1, 0x3a0(%0) \n" \ - " cache %1, 0x3c0(%0); cache %1, 0x3e0(%0) \n" \ - " .set pop \n" \ - : \ - : "r" (base), \ - "i" (op)); - -#define cache64_unroll32(base,op) \ - __asm__ __volatile__( \ - " .set push \n" \ - " .set noreorder \n" \ - " .set mips3 \n" \ - " cache %1, 0x000(%0); cache %1, 0x040(%0) \n" \ - " cache %1, 0x080(%0); cache %1, 0x0c0(%0) \n" \ - " cache %1, 0x100(%0); cache %1, 0x140(%0) \n" \ - " cache %1, 0x180(%0); cache %1, 0x1c0(%0) \n" \ - " cache %1, 0x200(%0); cache %1, 0x240(%0) \n" \ - " cache %1, 0x280(%0); cache %1, 0x2c0(%0) \n" \ - " cache %1, 0x300(%0); cache %1, 0x340(%0) \n" \ - " cache %1, 0x380(%0); cache %1, 0x3c0(%0) \n" \ - " cache %1, 0x400(%0); cache %1, 0x440(%0) \n" \ - " cache %1, 0x480(%0); cache %1, 0x4c0(%0) \n" \ - " cache %1, 0x500(%0); cache %1, 0x540(%0) \n" \ - " cache %1, 0x580(%0); cache %1, 0x5c0(%0) \n" \ - " cache %1, 0x600(%0); cache %1, 0x640(%0) \n" \ - " cache %1, 0x680(%0); cache %1, 0x6c0(%0) \n" \ - " cache %1, 0x700(%0); cache %1, 0x740(%0) \n" \ - " cache %1, 0x780(%0); cache %1, 0x7c0(%0) \n" \ - " .set pop \n" \ - : \ - : "r" (base), \ - "i" (op)); - -#define cache128_unroll32(base,op) \ - __asm__ __volatile__( \ - " .set push \n" \ - " .set noreorder \n" \ - " .set mips3 \n" \ - " cache %1, 0x000(%0); cache %1, 0x080(%0) \n" \ - " cache %1, 0x100(%0); cache %1, 0x180(%0) \n" \ - " cache %1, 0x200(%0); cache %1, 0x280(%0) \n" \ - " cache %1, 0x300(%0); cache %1, 0x380(%0) \n" \ - " cache %1, 0x400(%0); cache %1, 0x480(%0) \n" \ - " cache %1, 0x500(%0); cache %1, 0x580(%0) \n" \ - " cache %1, 0x600(%0); cache %1, 0x680(%0) \n" \ - " cache %1, 0x700(%0); cache %1, 0x780(%0) \n" \ - " cache %1, 0x800(%0); cache %1, 0x880(%0) \n" \ - " cache %1, 0x900(%0); cache %1, 0x980(%0) \n" \ - " cache %1, 0xa00(%0); cache %1, 0xa80(%0) \n" \ - " cache %1, 0xb00(%0); cache %1, 0xb80(%0) \n" \ - " cache %1, 0xc00(%0); cache %1, 0xc80(%0) \n" \ - " cache %1, 0xd00(%0); cache %1, 0xd80(%0) \n" \ - " cache %1, 0xe00(%0); cache %1, 0xe80(%0) \n" \ - " cache %1, 0xf00(%0); cache %1, 0xf80(%0) \n" \ - " .set pop \n" \ - : \ - : "r" (base), \ - "i" (op)); - -#else -/* - * MIPS R6 changed the cache opcode and moved to a 8-bit offset field. - * This means we now need to increment the base register before we flush - * more cache lines - */ -#define cache16_unroll32(base,op) \ - __asm__ __volatile__( \ - " .set push\n" \ - " .set noreorder\n" \ - " .set mips64r6\n" \ - " .set noat\n" \ - " cache %1, 0x000(%0); cache %1, 0x010(%0)\n" \ - " cache %1, 0x020(%0); cache %1, 0x030(%0)\n" \ - " cache %1, 0x040(%0); cache %1, 0x050(%0)\n" \ - " cache %1, 0x060(%0); cache %1, 0x070(%0)\n" \ - " cache %1, 0x080(%0); cache %1, 0x090(%0)\n" \ - " cache %1, 0x0a0(%0); cache %1, 0x0b0(%0)\n" \ - " cache %1, 0x0c0(%0); cache %1, 0x0d0(%0)\n" \ - " cache %1, 0x0e0(%0); cache %1, 0x0f0(%0)\n" \ - " "__stringify(LONG_ADDIU)" $1, %0, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x010($1)\n" \ - " cache %1, 0x020($1); cache %1, 0x030($1)\n" \ - " cache %1, 0x040($1); cache %1, 0x050($1)\n" \ - " cache %1, 0x060($1); cache %1, 0x070($1)\n" \ - " cache %1, 0x080($1); cache %1, 0x090($1)\n" \ - " cache %1, 0x0a0($1); cache %1, 0x0b0($1)\n" \ - " cache %1, 0x0c0($1); cache %1, 0x0d0($1)\n" \ - " cache %1, 0x0e0($1); cache %1, 0x0f0($1)\n" \ - " .set pop\n" \ - : \ - : "r" (base), \ - "i" (op)); - -#define cache32_unroll32(base,op) \ - __asm__ __volatile__( \ - " .set push\n" \ - " .set noreorder\n" \ - " .set mips64r6\n" \ - " .set noat\n" \ - " cache %1, 0x000(%0); cache %1, 0x020(%0)\n" \ - " cache %1, 0x040(%0); cache %1, 0x060(%0)\n" \ - " cache %1, 0x080(%0); cache %1, 0x0a0(%0)\n" \ - " cache %1, 0x0c0(%0); cache %1, 0x0e0(%0)\n" \ - " "__stringify(LONG_ADDIU)" $1, %0, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x020($1)\n" \ - " cache %1, 0x040($1); cache %1, 0x060($1)\n" \ - " cache %1, 0x080($1); cache %1, 0x0a0($1)\n" \ - " cache %1, 0x0c0($1); cache %1, 0x0e0($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x020($1)\n" \ - " cache %1, 0x040($1); cache %1, 0x060($1)\n" \ - " cache %1, 0x080($1); cache %1, 0x0a0($1)\n" \ - " cache %1, 0x0c0($1); cache %1, 0x0e0($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100\n" \ - " cache %1, 0x000($1); cache %1, 0x020($1)\n" \ - " cache %1, 0x040($1); cache %1, 0x060($1)\n" \ - " cache %1, 0x080($1); cache %1, 0x0a0($1)\n" \ - " cache %1, 0x0c0($1); cache %1, 0x0e0($1)\n" \ - " .set pop\n" \ - : \ - : "r" (base), \ - "i" (op)); - -#define cache64_unroll32(base,op) \ - __asm__ __volatile__( \ - " .set push\n" \ - " .set noreorder\n" \ - " .set mips64r6\n" \ - " .set noat\n" \ - " cache %1, 0x000(%0); cache %1, 0x040(%0)\n" \ - " cache %1, 0x080(%0); cache %1, 0x0c0(%0)\n" \ - " "__stringify(LONG_ADDIU)" $1, %0, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x040($1)\n" \ - " cache %1, 0x080($1); cache %1, 0x0c0($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x040($1)\n" \ - " cache %1, 0x080($1); cache %1, 0x0c0($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x040($1)\n" \ - " cache %1, 0x080($1); cache %1, 0x0c0($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x040($1)\n" \ - " cache %1, 0x080($1); cache %1, 0x0c0($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x040($1)\n" \ - " cache %1, 0x080($1); cache %1, 0x0c0($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x040($1)\n" \ - " cache %1, 0x080($1); cache %1, 0x0c0($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x040($1)\n" \ - " cache %1, 0x080($1); cache %1, 0x0c0($1)\n" \ - " .set pop\n" \ - : \ - : "r" (base), \ - "i" (op)); - -#define cache128_unroll32(base,op) \ - __asm__ __volatile__( \ - " .set push\n" \ - " .set noreorder\n" \ - " .set mips64r6\n" \ - " .set noat\n" \ - " cache %1, 0x000(%0); cache %1, 0x080(%0)\n" \ - " "__stringify(LONG_ADDIU)" $1, %0, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x080($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x080($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x080($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x080($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x080($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x080($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x080($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x080($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x080($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x080($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x080($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x080($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x080($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x080($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x080($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x080($1)\n" \ - " .set pop\n" \ - : \ - : "r" (base), \ - "i" (op)); -#endif /* CONFIG_CPU_MIPSR6 */ - -/* - * Perform the cache operation specified by op using a user mode virtual - * address while in kernel mode. - */ -#define cache16_unroll32_user(base,op) \ - __asm__ __volatile__( \ - " .set push \n" \ - " .set noreorder \n" \ - " .set mips0 \n" \ - " .set eva \n" \ - " cachee %1, 0x000(%0); cachee %1, 0x010(%0) \n" \ - " cachee %1, 0x020(%0); cachee %1, 0x030(%0) \n" \ - " cachee %1, 0x040(%0); cachee %1, 0x050(%0) \n" \ - " cachee %1, 0x060(%0); cachee %1, 0x070(%0) \n" \ - " cachee %1, 0x080(%0); cachee %1, 0x090(%0) \n" \ - " cachee %1, 0x0a0(%0); cachee %1, 0x0b0(%0) \n" \ - " cachee %1, 0x0c0(%0); cachee %1, 0x0d0(%0) \n" \ - " cachee %1, 0x0e0(%0); cachee %1, 0x0f0(%0) \n" \ - " cachee %1, 0x100(%0); cachee %1, 0x110(%0) \n" \ - " cachee %1, 0x120(%0); cachee %1, 0x130(%0) \n" \ - " cachee %1, 0x140(%0); cachee %1, 0x150(%0) \n" \ - " cachee %1, 0x160(%0); cachee %1, 0x170(%0) \n" \ - " cachee %1, 0x180(%0); cachee %1, 0x190(%0) \n" \ - " cachee %1, 0x1a0(%0); cachee %1, 0x1b0(%0) \n" \ - " cachee %1, 0x1c0(%0); cachee %1, 0x1d0(%0) \n" \ - " cachee %1, 0x1e0(%0); cachee %1, 0x1f0(%0) \n" \ - " .set pop \n" \ - : \ - : "r" (base), \ - "i" (op)); - -#define cache32_unroll32_user(base, op) \ - __asm__ __volatile__( \ - " .set push \n" \ - " .set noreorder \n" \ - " .set mips0 \n" \ - " .set eva \n" \ - " cachee %1, 0x000(%0); cachee %1, 0x020(%0) \n" \ - " cachee %1, 0x040(%0); cachee %1, 0x060(%0) \n" \ - " cachee %1, 0x080(%0); cachee %1, 0x0a0(%0) \n" \ - " cachee %1, 0x0c0(%0); cachee %1, 0x0e0(%0) \n" \ - " cachee %1, 0x100(%0); cachee %1, 0x120(%0) \n" \ - " cachee %1, 0x140(%0); cachee %1, 0x160(%0) \n" \ - " cachee %1, 0x180(%0); cachee %1, 0x1a0(%0) \n" \ - " cachee %1, 0x1c0(%0); cachee %1, 0x1e0(%0) \n" \ - " cachee %1, 0x200(%0); cachee %1, 0x220(%0) \n" \ - " cachee %1, 0x240(%0); cachee %1, 0x260(%0) \n" \ - " cachee %1, 0x280(%0); cachee %1, 0x2a0(%0) \n" \ - " cachee %1, 0x2c0(%0); cachee %1, 0x2e0(%0) \n" \ - " cachee %1, 0x300(%0); cachee %1, 0x320(%0) \n" \ - " cachee %1, 0x340(%0); cachee %1, 0x360(%0) \n" \ - " cachee %1, 0x380(%0); cachee %1, 0x3a0(%0) \n" \ - " cachee %1, 0x3c0(%0); cachee %1, 0x3e0(%0) \n" \ - " .set pop \n" \ - : \ - : "r" (base), \ - "i" (op)); - -#define cache64_unroll32_user(base, op) \ - __asm__ __volatile__( \ - " .set push \n" \ - " .set noreorder \n" \ - " .set mips0 \n" \ - " .set eva \n" \ - " cachee %1, 0x000(%0); cachee %1, 0x040(%0) \n" \ - " cachee %1, 0x080(%0); cachee %1, 0x0c0(%0) \n" \ - " cachee %1, 0x100(%0); cachee %1, 0x140(%0) \n" \ - " cachee %1, 0x180(%0); cachee %1, 0x1c0(%0) \n" \ - " cachee %1, 0x200(%0); cachee %1, 0x240(%0) \n" \ - " cachee %1, 0x280(%0); cachee %1, 0x2c0(%0) \n" \ - " cachee %1, 0x300(%0); cachee %1, 0x340(%0) \n" \ - " cachee %1, 0x380(%0); cachee %1, 0x3c0(%0) \n" \ - " cachee %1, 0x400(%0); cachee %1, 0x440(%0) \n" \ - " cachee %1, 0x480(%0); cachee %1, 0x4c0(%0) \n" \ - " cachee %1, 0x500(%0); cachee %1, 0x540(%0) \n" \ - " cachee %1, 0x580(%0); cachee %1, 0x5c0(%0) \n" \ - " cachee %1, 0x600(%0); cachee %1, 0x640(%0) \n" \ - " cachee %1, 0x680(%0); cachee %1, 0x6c0(%0) \n" \ - " cachee %1, 0x700(%0); cachee %1, 0x740(%0) \n" \ - " cachee %1, 0x780(%0); cachee %1, 0x7c0(%0) \n" \ - " .set pop \n" \ - : \ - : "r" (base), \ - "i" (op)); +#define cache_unroll(times, insn, op, addr, lsize) do { \ + int i = 0; \ + unroll(times, _cache_op, insn, op, (addr) + (i++ * (lsize))); \ +} while (0) /* build blast_xxx, blast_xxx_page, blast_xxx_page_indexed */ #define __BUILD_BLAST_CACHE(pfx, desc, indexop, hitop, lsize, extra) \ @@ -539,7 +216,8 @@ static inline void extra##blast_##pfx##cache##lsize(void) \ \ for (ws = 0; ws < ws_end; ws += ws_inc) \ for (addr = start; addr < end; addr += lsize * 32) \ - cache##lsize##_unroll32(addr|ws, indexop); \ + cache_unroll(32, kernel_cache, indexop, \ + addr | ws, lsize); \ } \ \ static inline void extra##blast_##pfx##cache##lsize##_page(unsigned long page) \ @@ -548,7 +226,7 @@ static inline void extra##blast_##pfx##cache##lsize##_page(unsigned long page) \ unsigned long end = page + PAGE_SIZE; \ \ do { \ - cache##lsize##_unroll32(start, hitop); \ + cache_unroll(32, kernel_cache, hitop, start, lsize); \ start += lsize * 32; \ } while (start < end); \ } \ @@ -565,7 +243,8 @@ static inline void extra##blast_##pfx##cache##lsize##_page_indexed(unsigned long \ for (ws = 0; ws < ws_end; ws += ws_inc) \ for (addr = start; addr < end; addr += lsize * 32) \ - cache##lsize##_unroll32(addr|ws, indexop); \ + cache_unroll(32, kernel_cache, indexop, \ + addr | ws, lsize); \ } __BUILD_BLAST_CACHE(d, dcache, Index_Writeback_Inv_D, Hit_Writeback_Inv_D, 16, ) @@ -596,7 +275,7 @@ static inline void blast_##pfx##cache##lsize##_user_page(unsigned long page) \ unsigned long end = page + PAGE_SIZE; \ \ do { \ - cache##lsize##_unroll32_user(start, hitop); \ + cache_unroll(32, user_cache, hitop, start, lsize); \ start += lsize * 32; \ } while (start < end); \ } @@ -688,7 +367,8 @@ static inline void blast_##pfx##cache##lsize##_node(long node) \ \ for (ws = 0; ws < ws_end; ws += ws_inc) \ for (addr = start; addr < end; addr += lsize * 32) \ - cache##lsize##_unroll32(addr|ws, indexop); \ + cache_unroll(32, kernel_cache, indexop, \ + addr | ws, lsize); \ } __BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 16) diff --git a/arch/mips/include/asm/sgi/heart.h b/arch/mips/include/asm/sgi/heart.h new file mode 100644 index 000000000000..c423221b4792 --- /dev/null +++ b/arch/mips/include/asm/sgi/heart.h @@ -0,0 +1,272 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * HEART chip definitions + * + * Copyright (C) 2004-2007 Stanislaw Skowronek <skylark@unaligned.org> + * 2009 Johannes Dickgreber <tanzy@gmx.de> + * 2007-2015 Joshua Kinard <kumba@gentoo.org> + */ +#ifndef __ASM_SGI_HEART_H +#define __ASM_SGI_HEART_H + +#include <linux/types.h> +#include <linux/time.h> + +/* + * There are 8 DIMM slots on an IP30 system + * board, which are grouped into four banks + */ +#define HEART_MEMORY_BANKS 4 + +/* HEART can support up to four CPUs */ +#define HEART_MAX_CPUS 4 + +#define HEART_XKPHYS_BASE ((void *)(IO_BASE | 0x000000000ff00000ULL)) + +/** + * struct ip30_heart_regs - struct that maps IP30 HEART registers. + * @mode: HEART_MODE - Purpose Unknown, machine reset called from here. + * @sdram_mode: HEART_SDRAM_MODE - purpose unknown. + * @mem_refresh: HEART_MEM_REF - purpose unknown. + * @mem_req_arb: HEART_MEM_REQ_ARB - purpose unknown. + * @mem_cfg.q: union for 64bit access to HEART_MEMCFG - 4x 64bit registers. + * @mem_cfg.l: union for 32bit access to HEART_MEMCFG - 8x 32bit registers. + * @fc_mode: HEART_FC_MODE - Purpose Unknown, possibly for GFX flow control. + * @fc_timer_limit: HEART_FC_TIMER_LIMIT - purpose unknown. + * @fc_addr: HEART_FC0_ADDR, HEART_FC1_ADDR - purpose unknown. + * @fc_credit_cnt: HEART_FC0_CR_CNT, HEART_FC1_CR_CNT - purpose unknown. + * @fc_timer: HEART_FC0_TIMER, HEART_FC1_TIMER - purpose unknown. + * @status: HEART_STATUS - HEART status information. + * @bus_err_addr: HEART_BERR_ADDR - likely contains addr of recent SIGBUS. + * @bus_err_misc: HEART_BERR_MISC - purpose unknown. + * @mem_err_addr: HEART_MEMERR_ADDR - likely contains addr of recent mem err. + * @mem_err_data: HEART_MEMERR_DATA - purpose unknown. + * @piur_acc_err: HEART_PIUR_ACC_ERR - likely for access err to HEART regs. + * @mlan_clock_div: HEART_MLAN_CLK_DIV - MicroLAN clock divider. + * @mlan_ctrl: HEART_MLAN_CTL - MicroLAN control. + * @__pad0: 0x0f40 bytes of padding -> next HEART register 0x01000. + * @undefined: Undefined/diag register, write to it triggers PIUR_ACC_ERR. + * @__pad1: 0xeff8 bytes of padding -> next HEART register 0x10000. + * @imr: HEART_IMR0 to HEART_IMR3 - per-cpu interrupt mask register. + * @set_isr: HEART_SET_ISR - set interrupt status register. + * @clear_isr: HEART_CLR_ISR - clear interrupt status register. + * @isr: HEART_ISR - interrupt status register (read-only). + * @imsr: HEART_IMSR - purpose unknown. + * @cause: HEART_CAUSE - HEART cause information. + * @__pad2: 0xffb8 bytes of padding -> next HEART register 0x20000. + * @count: HEART_COUNT - 52-bit counter. + * @__pad3: 0xfff8 bytes of padding -> next HEART register 0x30000. + * @compare: HEART_COMPARE - 24-bit compare. + * @__pad4: 0xfff8 bytes of padding -> next HEART register 0x40000. + * @trigger: HEART_TRIGGER - purpose unknown. + * @__pad5: 0xfff8 bytes of padding -> next HEART register 0x50000. + * @cpuid: HEART_PRID - contains CPU ID of CPU currently accessing HEART. + * @__pad6: 0xfff8 bytes of padding -> next HEART register 0x60000. + * @sync: HEART_SYNC - purpose unknown. + * + * HEART is the main system controller ASIC for IP30 system. It incorporates + * a memory controller, interrupt status/cause/set/clear management, basic + * timer with count/compare, and other functionality. For Linux, not all of + * HEART's functions are fully understood. + * + * Implementation note: All HEART registers are 64bits-wide, but the mem_cfg + * register only reports correct values if queried in 32bits. Hence the need + * for a union. Even though mem_cfg.l has 8 array slots, we only ever query + * up to 4 of those. IP30 has 8 DIMM slots arranged into 4 banks, w/ 2 DIMMs + * per bank. Each 32bit read accesses one of these banks. Perhaps HEART was + * designed to address up to 8 banks (16 DIMMs)? We may never know. + */ +struct ip30_heart_regs { /* 0x0ff00000 */ + u64 mode; /* + 0x00000 */ + /* Memory */ + u64 sdram_mode; /* + 0x00008 */ + u64 mem_refresh; /* + 0x00010 */ + u64 mem_req_arb; /* + 0x00018 */ + union { + u64 q[HEART_MEMORY_BANKS]; /* readq() */ + u32 l[HEART_MEMORY_BANKS * 2]; /* readl() */ + } mem_cfg; /* + 0x00020 */ + /* Flow control (gfx?) */ + u64 fc_mode; /* + 0x00040 */ + u64 fc_timer_limit; /* + 0x00048 */ + u64 fc_addr[2]; /* + 0x00050 */ + u64 fc_credit_cnt[2]; /* + 0x00060 */ + u64 fc_timer[2]; /* + 0x00070 */ + /* Status */ + u64 status; /* + 0x00080 */ + /* Bus error */ + u64 bus_err_addr; /* + 0x00088 */ + u64 bus_err_misc; /* + 0x00090 */ + /* Memory error */ + u64 mem_err_addr; /* + 0x00098 */ + u64 mem_err_data; /* + 0x000a0 */ + /* Misc */ + u64 piur_acc_err; /* + 0x000a8 */ + u64 mlan_clock_div; /* + 0x000b0 */ + u64 mlan_ctrl; /* + 0x000b8 */ + u64 __pad0[0x01e8]; /* + 0x000c0 + 0x0f40 */ + /* Undefined */ + u64 undefined; /* + 0x01000 */ + u64 __pad1[0x1dff]; /* + 0x01008 + 0xeff8 */ + /* Interrupts */ + u64 imr[HEART_MAX_CPUS]; /* + 0x10000 */ + u64 set_isr; /* + 0x10020 */ + u64 clear_isr; /* + 0x10028 */ + u64 isr; /* + 0x10030 */ + u64 imsr; /* + 0x10038 */ + u64 cause; /* + 0x10040 */ + u64 __pad2[0x1ff7]; /* + 0x10048 + 0xffb8 */ + /* Timer */ + u64 count; /* + 0x20000 */ + u64 __pad3[0x1fff]; /* + 0x20008 + 0xfff8 */ + u64 compare; /* + 0x30000 */ + u64 __pad4[0x1fff]; /* + 0x30008 + 0xfff8 */ + u64 trigger; /* + 0x40000 */ + u64 __pad5[0x1fff]; /* + 0x40008 + 0xfff8 */ + /* Misc */ + u64 cpuid; /* + 0x50000 */ + u64 __pad6[0x1fff]; /* + 0x50008 + 0xfff8 */ + u64 sync; /* + 0x60000 */ +}; + + +/* For timer-related bits. */ +#define HEART_NS_PER_CYCLE 80 +#define HEART_CYCLES_PER_SEC (NSEC_PER_SEC / HEART_NS_PER_CYCLE) + + +/* + * XXX: Everything below this comment will either go away or be cleaned + * up to fit in better with Linux. A lot of the bit definitions for + * HEART were derived from IRIX's sys/RACER/heart.h header file. + */ + +/* HEART Masks */ +#define HEART_ATK_MASK 0x0007ffffffffffff /* HEART attack mask */ +#define HEART_ACK_ALL_MASK 0xffffffffffffffff /* Ack everything */ +#define HEART_CLR_ALL_MASK 0x0000000000000000 /* Clear all */ +#define HEART_BR_ERR_MASK 0x7ff8000000000000 /* BRIDGE error mask */ +#define HEART_CPU0_ERR_MASK 0x8ff8000000000000 /* CPU0 error mask */ +#define HEART_CPU1_ERR_MASK 0x97f8000000000000 /* CPU1 error mask */ +#define HEART_CPU2_ERR_MASK 0xa7f8000000000000 /* CPU2 error mask */ +#define HEART_CPU3_ERR_MASK 0xc7f8000000000000 /* CPU3 error mask */ +#define HEART_ERR_MASK 0x1ff /* HEART error mask */ +#define HEART_ERR_MASK_START 51 /* HEART error start */ +#define HEART_ERR_MASK_END 63 /* HEART error end */ + +/* Bits in the HEART_MODE register. */ +#define HM_PROC_DISABLE_SHFT 60 +#define HM_PROC_DISABLE_MSK (0xfUL << HM_PROC_DISABLE_SHFT) +#define HM_PROC_DISABLE(x) (0x1UL << (x) + HM_PROC_DISABLE_SHFT) +#define HM_MAX_PSR (0x7UL << 57) +#define HM_MAX_IOSR (0x7UL << 54) +#define HM_MAX_PEND_IOSR (0x7UL << 51) +#define HM_TRIG_SRC_SEL_MSK (0x7UL << 48) +#define HM_TRIG_HEART_EXC (0x0UL << 48) +#define HM_TRIG_REG_BIT (0x1UL << 48) +#define HM_TRIG_SYSCLK (0x2UL << 48) +#define HM_TRIG_MEMCLK_2X (0x3UL << 48) +#define HM_TRIG_MEMCLK (0x4UL << 48) +#define HM_TRIG_IOCLK (0x5UL << 48) +#define HM_PIU_TEST_MODE (0xfUL << 40) +#define HM_GP_FLAG_MSK (0xfUL << 36) +#define HM_GP_FLAG(x) BIT((x) + 36) +#define HM_MAX_PROC_HYST (0xfUL << 32) +#define HM_LLP_WRST_AFTER_RST BIT(28) +#define HM_LLP_LINK_RST BIT(27) +#define HM_LLP_WARM_RST BIT(26) +#define HM_COR_ECC_LCK BIT(25) +#define HM_REDUCED_PWR BIT(24) +#define HM_COLD_RST BIT(23) +#define HM_SW_RST BIT(22) +#define HM_MEM_FORCE_WR BIT(21) +#define HM_DB_ERR_GEN BIT(20) +#define HM_SB_ERR_GEN BIT(19) +#define HM_CACHED_PIO_EN BIT(18) +#define HM_CACHED_PROM_EN BIT(17) +#define HM_PE_SYS_COR_ERE BIT(16) +#define HM_GLOBAL_ECC_EN BIT(15) +#define HM_IO_COH_EN BIT(14) +#define HM_INT_EN BIT(13) +#define HM_DATA_CHK_EN BIT(12) +#define HM_REF_EN BIT(11) +#define HM_BAD_SYSWR_ERE BIT(10) +#define HM_BAD_SYSRD_ERE BIT(9) +#define HM_SYSSTATE_ERE BIT(8) +#define HM_SYSCMD_ERE BIT(7) +#define HM_NCOR_SYS_ERE BIT(6) +#define HM_COR_SYS_ERE BIT(5) +#define HM_DATA_ELMNT_ERE BIT(4) +#define HM_MEM_ADDR_PROC_ERE BIT(3) +#define HM_MEM_ADDR_IO_ERE BIT(2) +#define HM_NCOR_MEM_ERE BIT(1) +#define HM_COR_MEM_ERE BIT(0) + +/* Bits in the HEART_MEM_REF register. */ +#define HEART_MEMREF_REFS(x) ((0xfUL & (x)) << 16) +#define HEART_MEMREF_PERIOD(x) ((0xffffUL & (x))) +#define HEART_MEMREF_REFS_VAL HEART_MEMREF_REFS(8) +#define HEART_MEMREF_PERIOD_VAL HEART_MEMREF_PERIOD(0x4000) +#define HEART_MEMREF_VAL (HEART_MEMREF_REFS_VAL | \ + HEART_MEMREF_PERIOD_VAL) + +/* Bits in the HEART_MEM_REQ_ARB register. */ +#define HEART_MEMARB_IODIS (1 << 20) +#define HEART_MEMARB_MAXPMWRQS (15 << 16) +#define HEART_MEMARB_MAXPMRRQS (15 << 12) +#define HEART_MEMARB_MAXPMRQS (15 << 8) +#define HEART_MEMARB_MAXRRRQS (15 << 4) +#define HEART_MEMARB_MAXGBRRQS (15) + +/* Bits in the HEART_MEMCFG<x> registers. */ +#define HEART_MEMCFG_VALID 0x80000000 /* Bank is valid */ +#define HEART_MEMCFG_DENSITY 0x01c00000 /* Mem density */ +#define HEART_MEMCFG_SIZE_MASK 0x003f0000 /* Mem size mask */ +#define HEART_MEMCFG_ADDR_MASK 0x000001ff /* Base addr mask */ +#define HEART_MEMCFG_SIZE_SHIFT 16 /* Mem size shift */ +#define HEART_MEMCFG_DENSITY_SHIFT 22 /* Density Shift */ +#define HEART_MEMCFG_UNIT_SHIFT 25 /* Unit Shift, 32MB */ + +/* Bits in the HEART_STATUS register */ +#define HEART_STAT_HSTL_SDRV BIT(14) +#define HEART_STAT_FC_CR_OUT(x) BIT((x) + 12) +#define HEART_STAT_DIR_CNNCT BIT(11) +#define HEART_STAT_TRITON BIT(10) +#define HEART_STAT_R4K BIT(9) +#define HEART_STAT_BIG_ENDIAN BIT(8) +#define HEART_STAT_PROC_SHFT 4 +#define HEART_STAT_PROC_MSK (0xfUL << HEART_STAT_PROC_SHFT) +#define HEART_STAT_PROC_ACTIVE(x) (0x1UL << ((x) + HEART_STAT_PROC_SHFT)) +#define HEART_STAT_WIDGET_ID 0xf + +/* Bits in the HEART_CAUSE register */ +#define HC_PE_SYS_COR_ERR_MSK (0xfUL << 60) +#define HC_PE_SYS_COR_ERR(x) BIT((x) + 60) +#define HC_PIOWDB_OFLOW BIT(44) +#define HC_PIORWRB_OFLOW BIT(43) +#define HC_PIUR_ACC_ERR BIT(42) +#define HC_BAD_SYSWR_ERR BIT(41) +#define HC_BAD_SYSRD_ERR BIT(40) +#define HC_SYSSTATE_ERR_MSK (0xfUL << 36) +#define HC_SYSSTATE_ERR(x) BIT((x) + 36) +#define HC_SYSCMD_ERR_MSK (0xfUL << 32) +#define HC_SYSCMD_ERR(x) BIT((x) + 32) +#define HC_NCOR_SYSAD_ERR_MSK (0xfUL << 28) +#define HC_NCOR_SYSAD_ERR(x) BIT((x) + 28) +#define HC_COR_SYSAD_ERR_MSK (0xfUL << 24) +#define HC_COR_SYSAD_ERR(x) BIT((x) + 24) +#define HC_DATA_ELMNT_ERR_MSK (0xfUL << 20) +#define HC_DATA_ELMNT_ERR(x) BIT((x) + 20) +#define HC_WIDGET_ERR BIT(16) +#define HC_MEM_ADDR_ERR_PROC_MSK (0xfUL << 4) +#define HC_MEM_ADDR_ERR_PROC(x) BIT((x) + 4) +#define HC_MEM_ADDR_ERR_IO BIT(2) +#define HC_NCOR_MEM_ERR BIT(1) +#define HC_COR_MEM_ERR BIT(0) + +extern struct ip30_heart_regs __iomem *heart_regs; + +#define heart_read ____raw_readq +#define heart_write ____raw_writeq + +#endif /* __ASM_SGI_HEART_H */ diff --git a/arch/mips/include/asm/sgi/sgi.h b/arch/mips/include/asm/sgi/sgi.h deleted file mode 100644 index b61557151e3f..000000000000 --- a/arch/mips/include/asm/sgi/sgi.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * sgi.h: Definitions specific to SGI machines. - * - * Copyright (C) 1996 David S. Miller (dm@sgi.com) - */ -#ifndef _ASM_SGI_SGI_H -#define _ASM_SGI_SGI_H - -/* UP=UniProcessor MP=MultiProcessor(capable) */ -enum sgi_mach { - ip4, /* R2k UP */ - ip5, /* R2k MP */ - ip6, /* R3k UP */ - ip7, /* R3k MP */ - ip9, /* R3k UP */ - ip12, /* R3kA UP, Indigo */ - ip15, /* R3kA MP */ - ip17, /* R4K UP */ - ip19, /* R4K MP */ - ip20, /* R4K UP, Indigo */ - ip21, /* R8k/TFP MP */ - ip22, /* R4x00 UP, Indy, Indigo2 */ - ip25, /* R10k MP */ - ip26, /* R8k/TFP UP, Indigo2 */ - ip27, /* R10k MP, R12k MP, R14k MP, Origin 200/2k, Onyx2 */ - ip28, /* R10k UP, Indigo2 Impact R10k */ - ip30, /* R10k MP, R12k MP, R14k MP, Octane */ - ip32, /* R5k UP, RM5200 UP, RM7k UP, R10k UP, R12k UP, O2 */ - ip35, /* R14k MP, R16k MP, Origin 300/3k, Onyx3, Fuel, Tezro */ -}; - -extern enum sgi_mach sgimach; -extern void sgi_sysinit(void); - -/* Many I/O space registers are byte sized and are contained within - * one byte per word, specifically the MSB, this macro helps out. - */ -#ifdef __MIPSEL__ -#define SGI_MSB(regaddr) (regaddr) -#else -#define SGI_MSB(regaddr) ((regaddr) | 0x3) -#endif - -#endif /* _ASM_SGI_SGI_H */ diff --git a/arch/mips/include/asm/sgialib.h b/arch/mips/include/asm/sgialib.h index 0d9fad5915fe..80f900417f7e 100644 --- a/arch/mips/include/asm/sgialib.h +++ b/arch/mips/include/asm/sgialib.h @@ -15,14 +15,6 @@ #include <asm/sgiarcs.h> extern struct linux_romvec *romvec; -extern int prom_argc; - -extern LONG *_prom_argv, *_prom_envp; - -/* A 32-bit ARC PROM pass arguments and environment as 32-bit pointer. - These macros take care of sign extension. */ -#define prom_argv(index) ((char *) (long) _prom_argv[(index)]) -#define prom_argc(index) ((char *) (long) _prom_argc[(index)]) extern int prom_flags; @@ -47,12 +39,6 @@ extern void prom_meminit(void); /* PROM device tree library routines. */ #define PROM_NULL_COMPONENT ((pcomponent *) 0) -/* Get sibling component of THIS. */ -extern pcomponent *ArcGetPeer(pcomponent *this); - -/* Get child component of THIS. */ -extern pcomponent *ArcGetChild(pcomponent *this); - /* This is called at prom_init time to identify the * ARC architecture we are running on */ @@ -60,22 +46,16 @@ extern void prom_identify_arch(void); /* Environment variable routines. */ extern PCHAR ArcGetEnvironmentVariable(PCHAR name); -extern LONG ArcSetEnvironmentVariable(PCHAR name, PCHAR value); /* ARCS command line parsing. */ -extern void prom_init_cmdline(void); +extern void prom_init_cmdline(int argc, LONG *argv); /* File operations. */ extern LONG ArcRead(ULONG fd, PVOID buf, ULONG num, PULONG cnt); extern LONG ArcWrite(ULONG fd, PVOID buf, ULONG num, PULONG cnt); /* Misc. routines. */ -extern VOID ArcHalt(VOID) __noreturn; -extern VOID ArcPowerDown(VOID) __noreturn; -extern VOID ArcRestart(VOID) __noreturn; -extern VOID ArcReboot(VOID) __noreturn; extern VOID ArcEnterInteractiveMode(VOID) __noreturn; -extern VOID ArcFlushAllCaches(VOID); extern DISPLAY_STATUS *ArcGetDisplayStatus(ULONG FileID); #endif /* _ASM_SGIALIB_H */ diff --git a/arch/mips/include/asm/sgiarcs.h b/arch/mips/include/asm/sgiarcs.h index 105a9479ac5f..e1512cab180b 100644 --- a/arch/mips/include/asm/sgiarcs.h +++ b/arch/mips/include/asm/sgiarcs.h @@ -12,6 +12,8 @@ #ifndef _ASM_SGIARCS_H #define _ASM_SGIARCS_H +#include <linux/kernel.h> + #include <asm/types.h> #include <asm/fw/arc/types.h> @@ -368,110 +370,65 @@ struct linux_smonblock { #if defined(CONFIG_64BIT) && defined(CONFIG_FW_ARC32) -#define __arc_clobbers \ - "$2", "$3" /* ... */, "$8", "$9", "$10", "$11", \ - "$12", "$13", "$14", "$15", "$16", "$24", "$25", "$31" +extern long call_o32(long vec, void *stack, ...); + +extern u64 o32_stk[4096]; +#define O32_STK (&o32_stk[ARRAY_SIZE(o32_stk)]) #define ARC_CALL0(dest) \ ({ long __res; \ long __vec = (long) romvec->dest; \ - __asm__ __volatile__( \ - "dsubu\t$29, 32\n\t" \ - "jalr\t%1\n\t" \ - "daddu\t$29, 32\n\t" \ - "move\t%0, $2" \ - : "=r" (__res), "=r" (__vec) \ - : "1" (__vec) \ - : __arc_clobbers, "$4", "$5", "$6", "$7"); \ - (unsigned long) __res; \ + __res = call_o32(__vec, O32_STK); \ + __res; \ }) #define ARC_CALL1(dest, a1) \ ({ long __res; \ - register signed int __a1 __asm__("$4") = (int) (long) (a1); \ + int __a1 = (int) (long) (a1); \ long __vec = (long) romvec->dest; \ - __asm__ __volatile__( \ - "dsubu\t$29, 32\n\t" \ - "jalr\t%1\n\t" \ - "daddu\t$29, 32\n\t" \ - "move\t%0, $2" \ - : "=r" (__res), "=r" (__vec) \ - : "1" (__vec), "r" (__a1) \ - : __arc_clobbers, "$5", "$6", "$7"); \ - (unsigned long) __res; \ + __res = call_o32(__vec, O32_STK, __a1); \ + __res; \ }) #define ARC_CALL2(dest, a1, a2) \ ({ long __res; \ - register signed int __a1 __asm__("$4") = (int) (long) (a1); \ - register signed int __a2 __asm__("$5") = (int) (long) (a2); \ + int __a1 = (int) (long) (a1); \ + int __a2 = (int) (long) (a2); \ long __vec = (long) romvec->dest; \ - __asm__ __volatile__( \ - "dsubu\t$29, 32\n\t" \ - "jalr\t%1\n\t" \ - "daddu\t$29, 32\n\t" \ - "move\t%0, $2" \ - : "=r" (__res), "=r" (__vec) \ - : "1" (__vec), "r" (__a1), "r" (__a2) \ - : __arc_clobbers, "$6", "$7"); \ + __res = call_o32(__vec, O32_STK, __a1, __a2); \ __res; \ }) #define ARC_CALL3(dest, a1, a2, a3) \ ({ long __res; \ - register signed int __a1 __asm__("$4") = (int) (long) (a1); \ - register signed int __a2 __asm__("$5") = (int) (long) (a2); \ - register signed int __a3 __asm__("$6") = (int) (long) (a3); \ + int __a1 = (int) (long) (a1); \ + int __a2 = (int) (long) (a2); \ + int __a3 = (int) (long) (a3); \ long __vec = (long) romvec->dest; \ - __asm__ __volatile__( \ - "dsubu\t$29, 32\n\t" \ - "jalr\t%1\n\t" \ - "daddu\t$29, 32\n\t" \ - "move\t%0, $2" \ - : "=r" (__res), "=r" (__vec) \ - : "1" (__vec), "r" (__a1), "r" (__a2), "r" (__a3) \ - : __arc_clobbers, "$7"); \ + __res = call_o32(__vec, O32_STK, __a1, __a2, __a3); \ __res; \ }) #define ARC_CALL4(dest, a1, a2, a3, a4) \ ({ long __res; \ - register signed int __a1 __asm__("$4") = (int) (long) (a1); \ - register signed int __a2 __asm__("$5") = (int) (long) (a2); \ - register signed int __a3 __asm__("$6") = (int) (long) (a3); \ - register signed int __a4 __asm__("$7") = (int) (long) (a4); \ + int __a1 = (int) (long) (a1); \ + int __a2 = (int) (long) (a2); \ + int __a3 = (int) (long) (a3); \ + int __a4 = (int) (long) (a4); \ long __vec = (long) romvec->dest; \ - __asm__ __volatile__( \ - "dsubu\t$29, 32\n\t" \ - "jalr\t%1\n\t" \ - "daddu\t$29, 32\n\t" \ - "move\t%0, $2" \ - : "=r" (__res), "=r" (__vec) \ - : "1" (__vec), "r" (__a1), "r" (__a2), "r" (__a3), \ - "r" (__a4) \ - : __arc_clobbers); \ + __res = call_o32(__vec, O32_STK, __a1, __a2, __a3, __a4); \ __res; \ }) -#define ARC_CALL5(dest, a1, a2, a3, a4, a5) \ +#define ARC_CALL5(dest, a1, a2, a3, a4, a5) \ ({ long __res; \ - register signed int __a1 __asm__("$4") = (int) (long) (a1); \ - register signed int __a2 __asm__("$5") = (int) (long) (a2); \ - register signed int __a3 __asm__("$6") = (int) (long) (a3); \ - register signed int __a4 __asm__("$7") = (int) (long) (a4); \ - register signed int __a5 = (int) (long) (a5); \ + int __a1 = (int) (long) (a1); \ + int __a2 = (int) (long) (a2); \ + int __a3 = (int) (long) (a3); \ + int __a4 = (int) (long) (a4); \ + int __a5 = (int) (long) (a5); \ long __vec = (long) romvec->dest; \ - __asm__ __volatile__( \ - "dsubu\t$29, 32\n\t" \ - "sw\t%7, 16($29)\n\t" \ - "jalr\t%1\n\t" \ - "daddu\t$29, 32\n\t" \ - "move\t%0, $2" \ - : "=r" (__res), "=r" (__vec) \ - : "1" (__vec), \ - "r" (__a1), "r" (__a2), "r" (__a3), "r" (__a4), \ - "r" (__a5) \ - : __arc_clobbers); \ + __res = call_o32(__vec, O32_STK, __a1, __a2, __a3, __a4, __a5); \ __res; \ }) diff --git a/arch/mips/include/asm/sn/agent.h b/arch/mips/include/asm/sn/agent.h index e33d09293019..7e9b3271737a 100644 --- a/arch/mips/include/asm/sn/agent.h +++ b/arch/mips/include/asm/sn/agent.h @@ -26,7 +26,7 @@ #if defined(CONFIG_SGI_IP27) #define HUB_NIC_ADDR(_cpuid) \ - REMOTE_HUB_ADDR(COMPACT_TO_NASID_NODEID(cpu_to_node(_cpuid)), \ + REMOTE_HUB_ADDR(cpu_to_node(_cpuid), \ MD_MLAN_CTL) #endif diff --git a/arch/mips/include/asm/sn/arch.h b/arch/mips/include/asm/sn/arch.h index 3f1fb1454749..f7d3273d9599 100644 --- a/arch/mips/include/asm/sn/arch.h +++ b/arch/mips/include/asm/sn/arch.h @@ -19,44 +19,13 @@ #define cputonasid(cpu) (sn_cpu_info[(cpu)].p_nasid) #define cputoslice(cpu) (sn_cpu_info[(cpu)].p_slice) -#define makespnum(_nasid, _slice) \ - (((_nasid) << CPUS_PER_NODE_SHFT) | (_slice)) #define INVALID_NASID (nasid_t)-1 -#define INVALID_CNODEID (cnodeid_t)-1 #define INVALID_PNODEID (pnodeid_t)-1 #define INVALID_MODULE (moduleid_t)-1 #define INVALID_PARTID (partid_t)-1 extern nasid_t get_nasid(void); -extern cnodeid_t get_cpu_cnode(cpuid_t); extern int get_cpu_slice(cpuid_t); -/* - * NO ONE should access these arrays directly. The only reason we refer to - * them here is to avoid the procedure call that would be required in the - * macros below. (Really want private data members here :-) - */ -extern cnodeid_t nasid_to_compact_node[MAX_NASIDS]; -extern nasid_t compact_to_nasid_node[MAX_COMPACT_NODES]; - -/* - * These macros are used by various parts of the kernel to convert - * between the three different kinds of node numbering. At least some - * of them may change to procedure calls in the future, but the macros - * will continue to work. Don't use the arrays above directly. - */ - -#define NASID_TO_REGION(nnode) \ - ((nnode) >> \ - (is_fine_dirmode() ? NASID_TO_FINEREG_SHFT : NASID_TO_COARSEREG_SHFT)) - -extern cnodeid_t nasid_to_compact_node[MAX_NASIDS]; -extern nasid_t compact_to_nasid_node[MAX_COMPACT_NODES]; -extern cnodeid_t cpuid_to_compact_node[MAXCPUS]; - -#define NASID_TO_COMPACT_NODEID(nnode) (nasid_to_compact_node[nnode]) -#define COMPACT_TO_NASID_NODEID(cnode) (compact_to_nasid_node[cnode]) -#define CPUID_TO_COMPACT_NODEID(cpu) (cpuid_to_compact_node[(cpu)]) - #endif /* _ASM_SN_ARCH_H */ diff --git a/arch/mips/include/asm/sn/gda.h b/arch/mips/include/asm/sn/gda.h index 85fa1b5f639d..d52f81620661 100644 --- a/arch/mips/include/asm/sn/gda.h +++ b/arch/mips/include/asm/sn/gda.h @@ -60,9 +60,7 @@ typedef struct gda { /* Pointer to a mask of nodes with copies * of the kernel. */ char g_padding[56]; /* pad out to 128 bytes */ - nasid_t g_nasidtable[MAX_COMPACT_NODES]; /* NASID of each node, - * indexed by cnodeid. - */ + nasid_t g_nasidtable[MAX_NUMNODES]; /* NASID of each node */ } gda_t; #define GDA ((gda_t*) GDA_ADDR(get_nasid())) diff --git a/arch/mips/include/asm/sn/hub.h b/arch/mips/include/asm/sn/hub.h index 338f7eed74f1..45878fbefbae 100644 --- a/arch/mips/include/asm/sn/hub.h +++ b/arch/mips/include/asm/sn/hub.h @@ -10,8 +10,8 @@ #include <asm/xtalk/xtalk.h> /* ip27-hubio.c */ -extern unsigned long hub_pio_map(cnodeid_t cnode, xwidgetnum_t widget, +extern unsigned long hub_pio_map(nasid_t nasid, xwidgetnum_t widget, unsigned long xtalk_addr, size_t size); -extern void hub_pio_init(cnodeid_t cnode); +extern void hub_pio_init(nasid_t nasid); #endif /* __ASM_SN_HUB_H */ diff --git a/arch/mips/include/asm/sn/ioc3.h b/arch/mips/include/asm/sn/ioc3.h index a947eed48fee..78ef760ddde4 100644 --- a/arch/mips/include/asm/sn/ioc3.h +++ b/arch/mips/include/asm/sn/ioc3.h @@ -590,4 +590,13 @@ struct ioc3_etxd { #define MIDR_DATA_MASK 0x0000ffff +/* subsystem IDs supplied by card detection in pci-xtalk-bridge */ +#define IOC3_SUBSYS_IP27_BASEIO6G 0xc300 +#define IOC3_SUBSYS_IP27_MIO 0xc301 +#define IOC3_SUBSYS_IP27_BASEIO 0xc302 +#define IOC3_SUBSYS_IP29_SYSBOARD 0xc303 +#define IOC3_SUBSYS_IP30_SYSBOARD 0xc304 +#define IOC3_SUBSYS_MENET 0xc305 +#define IOC3_SUBSYS_MENET4 0xc306 + #endif /* MIPS_SN_IOC3_H */ diff --git a/arch/mips/include/asm/sn/mapped_kernel.h b/arch/mips/include/asm/sn/mapped_kernel.h index 2f3efa91c16e..3f1049807018 100644 --- a/arch/mips/include/asm/sn/mapped_kernel.h +++ b/arch/mips/include/asm/sn/mapped_kernel.h @@ -37,10 +37,10 @@ #define MAPPED_KERN_RO_TO_PHYS(x) \ ((unsigned long)MAPPED_ADDR_RO_TO_PHYS(x) | \ - MAPPED_KERN_RO_PHYSBASE(get_compact_nodeid())) + MAPPED_KERN_RO_PHYSBASE(get_nasid())) #define MAPPED_KERN_RW_TO_PHYS(x) \ ((unsigned long)MAPPED_ADDR_RW_TO_PHYS(x) | \ - MAPPED_KERN_RW_PHYSBASE(get_compact_nodeid())) + MAPPED_KERN_RW_PHYSBASE(get_nasid())) #else /* CONFIG_MAPPED_KERNEL */ diff --git a/arch/mips/include/asm/sn/sn0/arch.h b/arch/mips/include/asm/sn/sn0/arch.h index 425a67e6a947..12f4c4649ff0 100644 --- a/arch/mips/include/asm/sn/sn0/arch.h +++ b/arch/mips/include/asm/sn/sn0/arch.h @@ -12,25 +12,11 @@ #define _ASM_SN_SN0_ARCH_H -#ifndef SN0XXL /* 128 cpu SMP max */ -/* - * This is the maximum number of nodes that can be part of a kernel. - * Effectively, it's the maximum number of compact node ids (cnodeid_t). - */ -#define MAX_COMPACT_NODES 64 - /* * MAXCPUS refers to the maximum number of CPUs in a single kernel. * This is not necessarily the same as MAXNODES * CPUS_PER_NODE */ -#define MAXCPUS 128 - -#else /* SN0XXL system */ - -#define MAX_COMPACT_NODES 128 -#define MAXCPUS 256 - -#endif /* SN0XXL */ +#define MAXCPUS (MAX_NUMNODES * CPUS_PER_NODE) /* * This is the maximum number of NASIDS that can be present in a system. @@ -66,7 +52,5 @@ #define SLOT_MIN_MEM_SIZE (32*1024*1024) #define CPUS_PER_NODE 2 /* CPUs on a single hub */ -#define CPUS_PER_NODE_SHFT 1 /* Bits to shift in the node number */ -#define CPUS_PER_SUBNODE 2 /* CPUs on a single hub PI */ #endif /* _ASM_SN_SN0_ARCH_H */ diff --git a/arch/mips/include/asm/sn/sn_private.h b/arch/mips/include/asm/sn/sn_private.h index f09ba846c644..63a2c30d81c6 100644 --- a/arch/mips/include/asm/sn/sn_private.h +++ b/arch/mips/include/asm/sn/sn_private.h @@ -7,14 +7,13 @@ extern nasid_t master_nasid; extern void cpu_node_probe(void); -extern cnodeid_t get_compact_nodeid(void); -extern void hub_rtc_init(cnodeid_t); +extern void hub_rtc_init(nasid_t nasid); extern void cpu_time_init(void); extern void per_cpu_init(void); extern void install_cpu_nmi_handler(int slice); extern void install_ipi(void); extern void setup_replication_mask(void); extern void replicate_kernel_text(void); -extern unsigned long node_getfirstfree(cnodeid_t); +extern unsigned long node_getfirstfree(nasid_t nasid); #endif /* __ASM_SN_SN_PRIVATE_H */ diff --git a/arch/mips/include/asm/sn/types.h b/arch/mips/include/asm/sn/types.h index 6d24d4e8b9ed..203c927e84d1 100644 --- a/arch/mips/include/asm/sn/types.h +++ b/arch/mips/include/asm/sn/types.h @@ -12,13 +12,9 @@ #include <linux/types.h> typedef unsigned long cpuid_t; -typedef unsigned long cnodemask_t; typedef signed short nasid_t; /* node id in numa-as-id space */ -typedef signed short cnodeid_t; /* node id in compact-id space */ typedef signed char partid_t; /* partition ID type */ typedef signed short moduleid_t; /* user-visible module number type */ -typedef signed short cmoduleid_t; /* kernel compact module id type */ -typedef unsigned char clusterid_t; /* Clusterid of the cell */ typedef dev_t vertex_hdl_t; /* hardware graph vertex handle */ diff --git a/arch/mips/include/asm/string.h b/arch/mips/include/asm/string.h index 29030cb398ee..1de3bbce8e88 100644 --- a/arch/mips/include/asm/string.h +++ b/arch/mips/include/asm/string.h @@ -10,127 +10,6 @@ #ifndef _ASM_STRING_H #define _ASM_STRING_H - -/* - * Most of the inline functions are rather naive implementations so I just - * didn't bother updating them for 64-bit ... - */ -#ifdef CONFIG_32BIT - -#ifndef IN_STRING_C - -#define __HAVE_ARCH_STRCPY -static __inline__ char *strcpy(char *__dest, __const__ char *__src) -{ - char *__xdest = __dest; - - __asm__ __volatile__( - ".set\tnoreorder\n\t" - ".set\tnoat\n" - "1:\tlbu\t$1,(%1)\n\t" - "addiu\t%1,1\n\t" - "sb\t$1,(%0)\n\t" - "bnez\t$1,1b\n\t" - "addiu\t%0,1\n\t" - ".set\tat\n\t" - ".set\treorder" - : "=r" (__dest), "=r" (__src) - : "0" (__dest), "1" (__src) - : "memory"); - - return __xdest; -} - -#define __HAVE_ARCH_STRNCPY -static __inline__ char *strncpy(char *__dest, __const__ char *__src, size_t __n) -{ - char *__xdest = __dest; - - if (__n == 0) - return __xdest; - - __asm__ __volatile__( - ".set\tnoreorder\n\t" - ".set\tnoat\n" - "1:\tlbu\t$1,(%1)\n\t" - "subu\t%2,1\n\t" - "sb\t$1,(%0)\n\t" - "beqz\t$1,2f\n\t" - "addiu\t%0,1\n\t" - "bnez\t%2,1b\n\t" - "addiu\t%1,1\n" - "2:\n\t" - ".set\tat\n\t" - ".set\treorder" - : "=r" (__dest), "=r" (__src), "=r" (__n) - : "0" (__dest), "1" (__src), "2" (__n) - : "memory"); - - return __xdest; -} - -#define __HAVE_ARCH_STRCMP -static __inline__ int strcmp(__const__ char *__cs, __const__ char *__ct) -{ - int __res; - - __asm__ __volatile__( - ".set\tnoreorder\n\t" - ".set\tnoat\n\t" - "lbu\t%2,(%0)\n" - "1:\tlbu\t$1,(%1)\n\t" - "addiu\t%0,1\n\t" - "bne\t$1,%2,2f\n\t" - "addiu\t%1,1\n\t" - "bnez\t%2,1b\n\t" - "lbu\t%2,(%0)\n\t" -#if defined(CONFIG_CPU_R3000) - "nop\n\t" -#endif - "move\t%2,$1\n" - "2:\tsubu\t%2,$1\n" - "3:\t.set\tat\n\t" - ".set\treorder" - : "=r" (__cs), "=r" (__ct), "=r" (__res) - : "0" (__cs), "1" (__ct)); - - return __res; -} - -#endif /* !defined(IN_STRING_C) */ - -#define __HAVE_ARCH_STRNCMP -static __inline__ int -strncmp(__const__ char *__cs, __const__ char *__ct, size_t __count) -{ - int __res; - - __asm__ __volatile__( - ".set\tnoreorder\n\t" - ".set\tnoat\n" - "1:\tlbu\t%3,(%0)\n\t" - "beqz\t%2,2f\n\t" - "lbu\t$1,(%1)\n\t" - "subu\t%2,1\n\t" - "bne\t$1,%3,3f\n\t" - "addiu\t%0,1\n\t" - "bnez\t%3,1b\n\t" - "addiu\t%1,1\n" - "2:\n\t" -#if defined(CONFIG_CPU_R3000) - "nop\n\t" -#endif - "move\t%3,$1\n" - "3:\tsubu\t%3,$1\n\t" - ".set\tat\n\t" - ".set\treorder" - : "=r" (__cs), "=r" (__ct), "=r" (__count), "=r" (__res) - : "0" (__cs), "1" (__ct), "2" (__count)); - - return __res; -} -#endif /* CONFIG_32BIT */ - #define __HAVE_ARCH_MEMSET extern void *memset(void *__s, int __c, size_t __count); diff --git a/arch/mips/include/asm/sync.h b/arch/mips/include/asm/sync.h new file mode 100644 index 000000000000..7c6a1095f556 --- /dev/null +++ b/arch/mips/include/asm/sync.h @@ -0,0 +1,207 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __MIPS_ASM_SYNC_H__ +#define __MIPS_ASM_SYNC_H__ + +/* + * sync types are defined by the MIPS64 Instruction Set documentation in Volume + * II-A of the MIPS Architecture Reference Manual, which can be found here: + * + * https://www.mips.com/?do-download=the-mips64-instruction-set-v6-06 + * + * Two types of barrier are provided: + * + * 1) Completion barriers, which ensure that a memory operation has actually + * completed & often involve stalling the CPU pipeline to do so. + * + * 2) Ordering barriers, which only ensure that affected memory operations + * won't be reordered in the CPU pipeline in a manner that violates the + * restrictions imposed by the barrier. + * + * Ordering barriers can be more efficient than completion barriers, since: + * + * a) Ordering barriers only require memory access instructions which preceed + * them in program order (older instructions) to reach a point in the + * load/store datapath beyond which reordering is not possible before + * allowing memory access instructions which follow them (younger + * instructions) to be performed. That is, older instructions don't + * actually need to complete - they just need to get far enough that all + * other coherent CPUs will observe their completion before they observe + * the effects of younger instructions. + * + * b) Multiple variants of ordering barrier are provided which allow the + * effects to be restricted to different combinations of older or younger + * loads or stores. By way of example, if we only care that stores older + * than a barrier are observed prior to stores that are younger than a + * barrier & don't care about the ordering of loads then the 'wmb' + * ordering barrier can be used. Limiting the barrier's effects to stores + * allows loads to continue unaffected & potentially allows the CPU to + * make progress faster than if younger loads had to wait for older stores + * to complete. + */ + +/* + * No sync instruction at all; used to allow code to nullify the effect of the + * __SYNC() macro without needing lots of #ifdefery. + */ +#define __SYNC_none -1 + +/* + * A full completion barrier; all memory accesses appearing prior to this sync + * instruction in program order must complete before any memory accesses + * appearing after this sync instruction in program order. + */ +#define __SYNC_full 0x00 + +/* + * For now we use a full completion barrier to implement all sync types, until + * we're satisfied that lightweight ordering barriers defined by MIPSr6 are + * sufficient to uphold our desired memory model. + */ +#define __SYNC_aq __SYNC_full +#define __SYNC_rl __SYNC_full +#define __SYNC_mb __SYNC_full + +/* + * ...except on Cavium Octeon CPUs, which have been using the 'wmb' ordering + * barrier since 2010 & omit 'rmb' barriers because the CPUs don't perform + * speculative reads. + */ +#ifdef CONFIG_CPU_CAVIUM_OCTEON +# define __SYNC_rmb __SYNC_none +# define __SYNC_wmb 0x04 +#else +# define __SYNC_rmb __SYNC_full +# define __SYNC_wmb __SYNC_full +#endif + +/* + * A GINV sync is a little different; it doesn't relate directly to loads or + * stores, but instead causes synchronization of an icache or TLB global + * invalidation operation triggered by the ginvi or ginvt instructions + * respectively. In cases where we need to know that a ginvi or ginvt operation + * has been performed by all coherent CPUs, we must issue a sync instruction of + * this type. Once this instruction graduates all coherent CPUs will have + * observed the invalidation. + */ +#define __SYNC_ginv 0x14 + +/* Trivial; indicate that we always need this sync instruction. */ +#define __SYNC_always (1 << 0) + +/* + * Indicate that we need this sync instruction only on systems with weakly + * ordered memory access. In general this is most MIPS systems, but there are + * exceptions which provide strongly ordered memory. + */ +#ifdef CONFIG_WEAK_ORDERING +# define __SYNC_weak_ordering (1 << 1) +#else +# define __SYNC_weak_ordering 0 +#endif + +/* + * Indicate that we need this sync instruction only on systems where LL/SC + * don't implicitly provide a memory barrier. In general this is most MIPS + * systems. + */ +#ifdef CONFIG_WEAK_REORDERING_BEYOND_LLSC +# define __SYNC_weak_llsc (1 << 2) +#else +# define __SYNC_weak_llsc 0 +#endif + +/* + * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load, + * store or prefetch) in between an LL & SC can cause the SC instruction to + * erroneously succeed, breaking atomicity. Whilst it's unusual to write code + * containing such sequences, this bug bites harder than we might otherwise + * expect due to reordering & speculation: + * + * 1) A memory access appearing prior to the LL in program order may actually + * be executed after the LL - this is the reordering case. + * + * In order to avoid this we need to place a memory barrier (ie. a SYNC + * instruction) prior to every LL instruction, in between it and any earlier + * memory access instructions. + * + * This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later. + * + * 2) If a conditional branch exists between an LL & SC with a target outside + * of the LL-SC loop, for example an exit upon value mismatch in cmpxchg() + * or similar, then misprediction of the branch may allow speculative + * execution of memory accesses from outside of the LL-SC loop. + * + * In order to avoid this we need a memory barrier (ie. a SYNC instruction) + * at each affected branch target. + * + * This case affects all current Loongson 3 CPUs. + * + * The above described cases cause an error in the cache coherence protocol; + * such that the Invalidate of a competing LL-SC goes 'missing' and SC + * erroneously observes its core still has Exclusive state and lets the SC + * proceed. + * + * Therefore the error only occurs on SMP systems. + */ +#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS +# define __SYNC_loongson3_war (1 << 31) +#else +# define __SYNC_loongson3_war 0 +#endif + +/* + * Some Cavium Octeon CPUs suffer from a bug that causes a single wmb ordering + * barrier to be ineffective, requiring the use of 2 in sequence to provide an + * effective barrier as noted by commit 6b07d38aaa52 ("MIPS: Octeon: Use + * optimized memory barrier primitives."). Here we specify that the affected + * sync instructions should be emitted twice. + */ +#ifdef CONFIG_CPU_CAVIUM_OCTEON +# define __SYNC_rpt(type) (1 + (type == __SYNC_wmb)) +#else +# define __SYNC_rpt(type) 1 +#endif + +/* + * The main event. Here we actually emit a sync instruction of a given type, if + * reason is non-zero. + * + * In future we have the option of emitting entries in a fixups-style table + * here that would allow us to opportunistically remove some sync instructions + * when we detect at runtime that we're running on a CPU that doesn't need + * them. + */ +#ifdef CONFIG_CPU_HAS_SYNC +# define ____SYNC(_type, _reason, _else) \ + .if (( _type ) != -1) && ( _reason ); \ + .set push; \ + .set MIPS_ISA_LEVEL_RAW; \ + .rept __SYNC_rpt(_type); \ + sync _type; \ + .endr; \ + .set pop; \ + .else; \ + _else; \ + .endif +#else +# define ____SYNC(_type, _reason, _else) +#endif + +/* + * Preprocessor magic to expand macros used as arguments before we insert them + * into assembly code. + */ +#ifdef __ASSEMBLY__ +# define ___SYNC(type, reason, else) \ + ____SYNC(type, reason, else) +#else +# define ___SYNC(type, reason, else) \ + __stringify(____SYNC(type, reason, else)) +#endif + +#define __SYNC(type, reason) \ + ___SYNC(__SYNC_##type, __SYNC_##reason, ) +#define __SYNC_ELSE(type, reason, else) \ + ___SYNC(__SYNC_##type, __SYNC_##reason, else) + +#endif /* __MIPS_ASM_SYNC_H__ */ diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h index 071053ece677..5d70babfc9ee 100644 --- a/arch/mips/include/asm/unistd.h +++ b/arch/mips/include/asm/unistd.h @@ -52,6 +52,7 @@ # endif #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_CLONE +#define __ARCH_WANT_SYS_CLONE3 /* whitelists for checksyscalls */ #define __IGNORE_fadvise64_64 diff --git a/arch/mips/include/asm/unroll.h b/arch/mips/include/asm/unroll.h new file mode 100644 index 000000000000..c628747d4ecd --- /dev/null +++ b/arch/mips/include/asm/unroll.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_UNROLL_H__ +#define __ASM_UNROLL_H__ + +/* + * Explicitly unroll a loop, for use in cases where doing so is performance + * critical. + * + * Ideally we'd rely upon the compiler to provide this but there's no commonly + * available means to do so. For example GCC's "#pragma GCC unroll" + * functionality would be ideal but is only available from GCC 8 onwards. Using + * -funroll-loops is an option but GCC tends to make poor choices when + * compiling our string functions. -funroll-all-loops leads to massive code + * bloat, even if only applied to the string functions. + */ +#define unroll(times, fn, ...) do { \ + extern void bad_unroll(void) \ + __compiletime_error("Unsupported unroll"); \ + \ + /* \ + * We can't unroll if the number of iterations isn't \ + * compile-time constant. Unfortunately GCC versions \ + * up until 4.6 tend to miss obvious constants & cause \ + * this check to fail, even though they go on to \ + * generate reasonable code for the switch statement, \ + * so we skip the sanity check for those compilers. \ + */ \ + BUILD_BUG_ON((CONFIG_GCC_VERSION >= 40700 || \ + CONFIG_CLANG_VERSION >= 80000) && \ + !__builtin_constant_p(times)); \ + \ + switch (times) { \ + case 32: fn(__VA_ARGS__); /* fall through */ \ + case 31: fn(__VA_ARGS__); /* fall through */ \ + case 30: fn(__VA_ARGS__); /* fall through */ \ + case 29: fn(__VA_ARGS__); /* fall through */ \ + case 28: fn(__VA_ARGS__); /* fall through */ \ + case 27: fn(__VA_ARGS__); /* fall through */ \ + case 26: fn(__VA_ARGS__); /* fall through */ \ + case 25: fn(__VA_ARGS__); /* fall through */ \ + case 24: fn(__VA_ARGS__); /* fall through */ \ + case 23: fn(__VA_ARGS__); /* fall through */ \ + case 22: fn(__VA_ARGS__); /* fall through */ \ + case 21: fn(__VA_ARGS__); /* fall through */ \ + case 20: fn(__VA_ARGS__); /* fall through */ \ + case 19: fn(__VA_ARGS__); /* fall through */ \ + case 18: fn(__VA_ARGS__); /* fall through */ \ + case 17: fn(__VA_ARGS__); /* fall through */ \ + case 16: fn(__VA_ARGS__); /* fall through */ \ + case 15: fn(__VA_ARGS__); /* fall through */ \ + case 14: fn(__VA_ARGS__); /* fall through */ \ + case 13: fn(__VA_ARGS__); /* fall through */ \ + case 12: fn(__VA_ARGS__); /* fall through */ \ + case 11: fn(__VA_ARGS__); /* fall through */ \ + case 10: fn(__VA_ARGS__); /* fall through */ \ + case 9: fn(__VA_ARGS__); /* fall through */ \ + case 8: fn(__VA_ARGS__); /* fall through */ \ + case 7: fn(__VA_ARGS__); /* fall through */ \ + case 6: fn(__VA_ARGS__); /* fall through */ \ + case 5: fn(__VA_ARGS__); /* fall through */ \ + case 4: fn(__VA_ARGS__); /* fall through */ \ + case 3: fn(__VA_ARGS__); /* fall through */ \ + case 2: fn(__VA_ARGS__); /* fall through */ \ + case 1: fn(__VA_ARGS__); /* fall through */ \ + case 0: break; \ + \ + default: \ + /* \ + * Either the iteration count is unreasonable \ + * or we need to add more cases above. \ + */ \ + bad_unroll(); \ + break; \ + } \ +} while (0) + +#endif /* __ASM_UNROLL_H__ */ diff --git a/arch/mips/include/asm/vdso/gettimeofday.h b/arch/mips/include/asm/vdso/gettimeofday.h index e78462e8ca2e..b08825531e9f 100644 --- a/arch/mips/include/asm/vdso/gettimeofday.h +++ b/arch/mips/include/asm/vdso/gettimeofday.h @@ -24,6 +24,8 @@ #define VDSO_HAS_CLOCK_GETRES 1 +#define __VDSO_USE_SYSCALL ULLONG_MAX + #ifdef CONFIG_MIPS_CLOCK_VSYSCALL static __always_inline long gettimeofday_fallback( @@ -205,7 +207,7 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) break; #endif default: - cycle_now = 0; + cycle_now = __VDSO_USE_SYSCALL; break; } diff --git a/arch/mips/include/asm/vdso/vsyscall.h b/arch/mips/include/asm/vdso/vsyscall.h index 195314732233..00d41b94ba31 100644 --- a/arch/mips/include/asm/vdso/vsyscall.h +++ b/arch/mips/include/asm/vdso/vsyscall.h @@ -28,13 +28,6 @@ int __mips_get_clock_mode(struct timekeeper *tk) } #define __arch_get_clock_mode __mips_get_clock_mode -static __always_inline -int __mips_use_vsyscall(struct vdso_data *vdata) -{ - return (vdata[CS_HRES_COARSE].clock_mode != VDSO_CLOCK_NONE); -} -#define __arch_use_vsyscall __mips_use_vsyscall - /* The asm-generic header needs to be included after the definitions above */ #include <asm-generic/vdso/vsyscall.h> diff --git a/arch/mips/include/uapi/asm/hwcap.h b/arch/mips/include/uapi/asm/hwcap.h index a2aba4b059e6..1ade1daa4921 100644 --- a/arch/mips/include/uapi/asm/hwcap.h +++ b/arch/mips/include/uapi/asm/hwcap.h @@ -6,5 +6,16 @@ #define HWCAP_MIPS_R6 (1 << 0) #define HWCAP_MIPS_MSA (1 << 1) #define HWCAP_MIPS_CRC32 (1 << 2) +#define HWCAP_MIPS_MIPS16 (1 << 3) +#define HWCAP_MIPS_MDMX (1 << 4) +#define HWCAP_MIPS_MIPS3D (1 << 5) +#define HWCAP_MIPS_SMARTMIPS (1 << 6) +#define HWCAP_MIPS_DSP (1 << 7) +#define HWCAP_MIPS_DSP2 (1 << 8) +#define HWCAP_MIPS_DSP3 (1 << 9) +#define HWCAP_MIPS_MIPS16E2 (1 << 10) +#define HWCAP_LOONGSON_MMI (1 << 11) +#define HWCAP_LOONGSON_EXT (1 << 12) +#define HWCAP_LOONGSON_EXT2 (1 << 13) #endif /* _UAPI_ASM_HWCAP_H */ diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile index 89b07ea8d249..d6e97df51cfb 100644 --- a/arch/mips/kernel/Makefile +++ b/arch/mips/kernel/Makefile @@ -80,7 +80,7 @@ obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_MAGIC_SYSRQ) += sysrq.o -obj-$(CONFIG_64BIT) += cpu-bugs64.o +obj-$(CONFIG_CPU_R4X00_BUGS64) += r4k-bugs64.o obj-$(CONFIG_I8253) += i8253.o diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index c2eb392597bf..c54332697673 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -608,7 +608,7 @@ static int set_ftlb_enable(struct cpuinfo_mips *c, enum ftlb_flags flags) if (!(flags & FTLB_EN)) return 1; return 0; - case CPU_LOONGSON3: + case CPU_LOONGSON64: /* Flush ITLB, DTLB, VTLB and FTLB */ write_c0_diag(LOONGSON_DIAG_ITLB | LOONGSON_DIAG_DTLB | LOONGSON_DIAG_VTLB | LOONGSON_DIAG_FTLB); @@ -1526,24 +1526,24 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) MIPS_CPU_LLSC | MIPS_CPU_BP_GHIST; c->tlbsize = 64; break; - case PRID_IMP_LOONGSON_64: /* Loongson-2/3 */ + case PRID_IMP_LOONGSON_64C: /* Loongson-2/3 */ switch (c->processor_id & PRID_REV_MASK) { case PRID_REV_LOONGSON2E: - c->cputype = CPU_LOONGSON2; + c->cputype = CPU_LOONGSON2EF; __cpu_name[cpu] = "ICT Loongson-2"; set_elf_platform(cpu, "loongson2e"); set_isa(c, MIPS_CPU_ISA_III); c->fpu_msk31 |= FPU_CSR_CONDX; break; case PRID_REV_LOONGSON2F: - c->cputype = CPU_LOONGSON2; + c->cputype = CPU_LOONGSON2EF; __cpu_name[cpu] = "ICT Loongson-2"; set_elf_platform(cpu, "loongson2f"); set_isa(c, MIPS_CPU_ISA_III); c->fpu_msk31 |= FPU_CSR_CONDX; break; case PRID_REV_LOONGSON3A_R1: - c->cputype = CPU_LOONGSON3; + c->cputype = CPU_LOONGSON64; __cpu_name[cpu] = "ICT Loongson-3"; set_elf_platform(cpu, "loongson3a"); set_isa(c, MIPS_CPU_ISA_M64R1); @@ -1552,7 +1552,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) break; case PRID_REV_LOONGSON3B_R1: case PRID_REV_LOONGSON3B_R2: - c->cputype = CPU_LOONGSON3; + c->cputype = CPU_LOONGSON64; __cpu_name[cpu] = "ICT Loongson-3"; set_elf_platform(cpu, "loongson3b"); set_isa(c, MIPS_CPU_ISA_M64R1); @@ -1565,12 +1565,13 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) MIPS_CPU_FPU | MIPS_CPU_LLSC | MIPS_CPU_32FPR; c->tlbsize = 64; + set_cpu_asid_mask(c, MIPS_ENTRYHI_ASID); c->writecombine = _CACHE_UNCACHED_ACCELERATED; break; case PRID_IMP_LOONGSON_32: /* Loongson-1 */ decode_configs(c); - c->cputype = CPU_LOONGSON1; + c->cputype = CPU_LOONGSON32; switch (c->processor_id & PRID_REV_MASK) { case PRID_REV_LOONGSON1B: @@ -1903,18 +1904,18 @@ platform: static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) { switch (c->processor_id & PRID_IMP_MASK) { - case PRID_IMP_LOONGSON_64: /* Loongson-2/3 */ + case PRID_IMP_LOONGSON_64C: /* Loongson-2/3 */ switch (c->processor_id & PRID_REV_MASK) { case PRID_REV_LOONGSON3A_R2_0: case PRID_REV_LOONGSON3A_R2_1: - c->cputype = CPU_LOONGSON3; + c->cputype = CPU_LOONGSON64; __cpu_name[cpu] = "ICT Loongson-3"; set_elf_platform(cpu, "loongson3a"); set_isa(c, MIPS_CPU_ISA_M64R2); break; case PRID_REV_LOONGSON3A_R3_0: case PRID_REV_LOONGSON3A_R3_1: - c->cputype = CPU_LOONGSON3; + c->cputype = CPU_LOONGSON64; __cpu_name[cpu] = "ICT Loongson-3"; set_elf_platform(cpu, "loongson3a"); set_isa(c, MIPS_CPU_ISA_M64R2); @@ -1927,6 +1928,17 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) c->ases |= (MIPS_ASE_LOONGSON_MMI | MIPS_ASE_LOONGSON_CAM | MIPS_ASE_LOONGSON_EXT | MIPS_ASE_LOONGSON_EXT2); break; + case PRID_IMP_LOONGSON_64G: + c->cputype = CPU_LOONGSON64; + __cpu_name[cpu] = "ICT Loongson-3"; + set_elf_platform(cpu, "loongson3a"); + set_isa(c, MIPS_CPU_ISA_M64R2); + decode_configs(c); + c->options |= MIPS_CPU_FTLB | MIPS_CPU_TLBINV | MIPS_CPU_LDPTE; + c->writecombine = _CACHE_UNCACHED_ACCELERATED; + c->ases |= (MIPS_ASE_LOONGSON_MMI | MIPS_ASE_LOONGSON_CAM | + MIPS_ASE_LOONGSON_EXT | MIPS_ASE_LOONGSON_EXT2); + break; default: panic("Unknown Loongson Processor ID!"); break; @@ -1965,13 +1977,30 @@ static inline void cpu_probe_ingenic(struct cpuinfo_mips *c, unsigned int cpu) break; } + switch (c->processor_id & PRID_COMP_MASK) { /* - * The config0 register in the Xburst CPUs with a processor ID of + * The config0 register in the XBurst CPUs with a processor ID of + * PRID_COMP_INGENIC_D1 has an abandoned huge page tlb mode, this + * mode is not compatible with the MIPS standard, it will cause + * tlbmiss and into an infinite loop (line 21 in the tlb-funcs.S) + * when starting the init process. After chip reset, the default + * is HPTLB mode, Write 0xa9000000 to cp0 register 5 sel 4 to + * switch back to VTLB mode to prevent getting stuck. + */ + case PRID_COMP_INGENIC_D1: + write_c0_page_ctrl(XBURST_PAGECTRL_HPTLB_DIS); + break; + /* + * The config0 register in the XBurst CPUs with a processor ID of * PRID_COMP_INGENIC_D0 report themselves as MIPS32r2 compatible, * but they don't actually support this ISA. */ - if ((c->processor_id & PRID_COMP_MASK) == PRID_COMP_INGENIC_D0) + case PRID_COMP_INGENIC_D0: c->isa_level &= ~MIPS_CPU_ISA_M32R2; + break; + default: + break; + } } static inline void cpu_probe_netlogic(struct cpuinfo_mips *c, int cpu) @@ -2180,6 +2209,39 @@ void cpu_probe(void) elf_hwcap |= HWCAP_MIPS_MSA; } + if (cpu_has_mips16) + elf_hwcap |= HWCAP_MIPS_MIPS16; + + if (cpu_has_mdmx) + elf_hwcap |= HWCAP_MIPS_MDMX; + + if (cpu_has_mips3d) + elf_hwcap |= HWCAP_MIPS_MIPS3D; + + if (cpu_has_smartmips) + elf_hwcap |= HWCAP_MIPS_SMARTMIPS; + + if (cpu_has_dsp) + elf_hwcap |= HWCAP_MIPS_DSP; + + if (cpu_has_dsp2) + elf_hwcap |= HWCAP_MIPS_DSP2; + + if (cpu_has_dsp3) + elf_hwcap |= HWCAP_MIPS_DSP3; + + if (cpu_has_mips16e2) + elf_hwcap |= HWCAP_MIPS_MIPS16E2; + + if (cpu_has_loongson_mmi) + elf_hwcap |= HWCAP_LOONGSON_MMI; + + if (cpu_has_loongson_ext) + elf_hwcap |= HWCAP_LOONGSON_EXT; + + if (cpu_has_loongson_ext2) + elf_hwcap |= HWCAP_LOONGSON_EXT2; + if (cpu_has_vz) cpu_probe_vz(c); diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S index efde27c99414..0a43c9125267 100644 --- a/arch/mips/kernel/genex.S +++ b/arch/mips/kernel/genex.S @@ -18,6 +18,7 @@ #include <asm/fpregdef.h> #include <asm/mipsregs.h> #include <asm/stackframe.h> +#include <asm/sync.h> #include <asm/war.h> #include <asm/thread_info.h> @@ -353,8 +354,9 @@ NESTED(ejtag_debug_handler, PT_SIZE, sp) #ifdef CONFIG_SMP 1: PTR_LA k0, ejtag_debug_buffer_spinlock - ll k0, 0(k0) - bnez k0, 1b + __SYNC(full, loongson3_war) +2: ll k0, 0(k0) + bnez k0, 2b PTR_LA k0, ejtag_debug_buffer_spinlock sc k0, 0(k0) beqz k0, 1b @@ -657,7 +659,7 @@ isrdhwr: .set pop END(handle_ri_rdhwr) -#ifdef CONFIG_64BIT +#ifdef CONFIG_CPU_R4X00_BUGS64 /* A temporary overflow handler used by check_daddi(). */ __INIT diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c index eb2afc0b8db1..37f8e78e2869 100644 --- a/arch/mips/kernel/idle.c +++ b/arch/mips/kernel/idle.c @@ -173,13 +173,14 @@ void __init check_wait(void) case CPU_CAVIUM_OCTEON2: case CPU_CAVIUM_OCTEON3: case CPU_XBURST: - case CPU_LOONGSON1: + case CPU_LOONGSON32: case CPU_XLR: case CPU_XLP: cpu_wait = r4k_wait; break; - case CPU_LOONGSON3: - if ((c->processor_id & PRID_REV_MASK) >= PRID_REV_LOONGSON3A_R2_0) + case CPU_LOONGSON64: + if ((c->processor_id & (PRID_IMP_MASK | PRID_REV_MASK)) >= + (PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0)) cpu_wait = r4k_wait; break; diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index a3e2da8391ea..128fc9999c56 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c @@ -1623,7 +1623,7 @@ static const struct mips_perf_event *mipsxx_pmu_map_raw_event(u64 config) raw_event.cntr_mask = raw_id > 127 ? CNTR_ODD : CNTR_EVEN; break; - case CPU_LOONGSON3: + case CPU_LOONGSON64: raw_event.cntr_mask = raw_id > 127 ? CNTR_ODD : CNTR_EVEN; break; } @@ -1764,12 +1764,12 @@ init_hw_perf_events(void) mipspmu.general_event_map = &mipsxxcore_event_map; mipspmu.cache_event_map = &mipsxxcore_cache_map; break; - case CPU_LOONGSON1: + case CPU_LOONGSON32: mipspmu.name = "mips/loongson1"; mipspmu.general_event_map = &mipsxxcore_event_map; mipspmu.cache_event_map = &mipsxxcore_cache_map; break; - case CPU_LOONGSON3: + case CPU_LOONGSON64: mipspmu.name = "mips/loongson3"; mipspmu.general_event_map = &loongson3_event_map; mipspmu.cache_event_map = &loongson3_cache_map; diff --git a/arch/mips/kernel/pm-cps.c b/arch/mips/kernel/pm-cps.c index a26f40db15d0..9bf60d7d44d3 100644 --- a/arch/mips/kernel/pm-cps.c +++ b/arch/mips/kernel/pm-cps.c @@ -307,7 +307,7 @@ static int cps_gen_flush_fsb(u32 **pp, struct uasm_label **pl, } /* Barrier ensuring previous cache invalidates are complete */ - uasm_i_sync(pp, STYPE_SYNC); + uasm_i_sync(pp, __SYNC_full); uasm_i_ehb(pp); /* Check whether the pipeline stalled due to the FSB being full */ @@ -397,7 +397,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state) if (coupled_coherence) { /* Increment ready_count */ - uasm_i_sync(&p, STYPE_SYNC_MB); + uasm_i_sync(&p, __SYNC_mb); uasm_build_label(&l, p, lbl_incready); uasm_i_ll(&p, t1, 0, r_nc_count); uasm_i_addiu(&p, t2, t1, 1); @@ -406,7 +406,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state) uasm_i_addiu(&p, t1, t1, 1); /* Barrier ensuring all CPUs see the updated r_nc_count value */ - uasm_i_sync(&p, STYPE_SYNC_MB); + uasm_i_sync(&p, __SYNC_mb); /* * If this is the last VPE to become ready for non-coherence @@ -473,7 +473,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state) Index_Writeback_Inv_D, lbl_flushdcache); /* Barrier ensuring previous cache invalidates are complete */ - uasm_i_sync(&p, STYPE_SYNC); + uasm_i_sync(&p, __SYNC_full); uasm_i_ehb(&p); if (mips_cm_revision() < CM_REV_CM3) { @@ -487,7 +487,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state) uasm_i_lw(&p, t0, 0, r_pcohctl); /* Barrier to ensure write to coherence control is complete */ - uasm_i_sync(&p, STYPE_SYNC); + uasm_i_sync(&p, __SYNC_full); uasm_i_ehb(&p); } @@ -534,7 +534,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state) } /* Barrier to ensure write to CPC command is complete */ - uasm_i_sync(&p, STYPE_SYNC); + uasm_i_sync(&p, __SYNC_full); uasm_i_ehb(&p); } @@ -572,13 +572,13 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state) uasm_i_lw(&p, t0, 0, r_pcohctl); /* Barrier to ensure write to coherence control is complete */ - uasm_i_sync(&p, STYPE_SYNC); + uasm_i_sync(&p, __SYNC_full); uasm_i_ehb(&p); if (coupled_coherence && (state == CPS_PM_NC_WAIT)) { /* Decrement ready_count */ uasm_build_label(&l, p, lbl_decready); - uasm_i_sync(&p, STYPE_SYNC_MB); + uasm_i_sync(&p, __SYNC_mb); uasm_i_ll(&p, t1, 0, r_nc_count); uasm_i_addiu(&p, t2, t1, -1); uasm_i_sc(&p, t2, 0, r_nc_count); @@ -586,7 +586,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state) uasm_i_andi(&p, v0, t1, (1 << fls(smp_num_siblings)) - 1); /* Barrier ensuring all CPUs see the updated r_nc_count value */ - uasm_i_sync(&p, STYPE_SYNC_MB); + uasm_i_sync(&p, __SYNC_mb); } if (coupled_coherence && (state == CPS_PM_CLOCK_GATED)) { @@ -608,7 +608,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state) uasm_build_label(&l, p, lbl_secondary_cont); /* Barrier ensuring all CPUs see the updated r_nc_count value */ - uasm_i_sync(&p, STYPE_SYNC_MB); + uasm_i_sync(&p, __SYNC_mb); } /* The core is coherent, time to return to C code */ diff --git a/arch/mips/kernel/cpu-bugs64.c b/arch/mips/kernel/r4k-bugs64.c index fa62cd1dff93..1ff19f1ea5ca 100644 --- a/arch/mips/kernel/cpu-bugs64.c +++ b/arch/mips/kernel/r4k-bugs64.c @@ -24,7 +24,8 @@ static char r4kwar[] __initdata = static char daddiwar[] __initdata = "Enable CPU_DADDI_WORKAROUNDS to rectify."; -static inline void align_mod(const int align, const int mod) +static __always_inline __init +void align_mod(const int align, const int mod) { asm volatile( ".set push\n\t" @@ -38,8 +39,9 @@ static inline void align_mod(const int align, const int mod) : "n"(align), "n"(mod)); } -static __always_inline void mult_sh_align_mod(long *v1, long *v2, long *w, - const int align, const int mod) +static __always_inline __init +void mult_sh_align_mod(long *v1, long *v2, long *w, + const int align, const int mod) { unsigned long flags; int m1, m2; @@ -113,7 +115,7 @@ static __always_inline void mult_sh_align_mod(long *v1, long *v2, long *w, *w = lw; } -static inline void check_mult_sh(void) +static __always_inline __init void check_mult_sh(void) { long v1[8], v2[8], w[8]; int bug, fix, i; @@ -176,7 +178,7 @@ asmlinkage void __init do_daddi_ov(struct pt_regs *regs) exception_exit(prev_state); } -static inline void check_daddi(void) +static __init void check_daddi(void) { extern asmlinkage void handle_daddi_ov(void); unsigned long flags; @@ -240,9 +242,9 @@ static inline void check_daddi(void) panic(bug64hit, !DADDI_WAR ? daddiwar : nowar); } -int daddiu_bug = IS_ENABLED(CONFIG_CPU_MIPSR6) ? 0 : -1; +int daddiu_bug = -1; -static inline void check_daddiu(void) +static __init void check_daddiu(void) { long v, w, tmp; @@ -310,14 +312,11 @@ static inline void check_daddiu(void) void __init check_bugs64_early(void) { - if (!IS_ENABLED(CONFIG_CPU_MIPSR6)) { - check_mult_sh(); - check_daddiu(); - } + check_mult_sh(); + check_daddiu(); } void __init check_bugs64(void) { - if (!IS_ENABLED(CONFIG_CPU_MIPSR6)) - check_daddi(); + check_daddi(); } diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index b8249c233754..c3d4212b5f1d 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -67,7 +67,9 @@ static char __initdata command_line[COMMAND_LINE_SIZE]; char __initdata arcs_cmdline[COMMAND_LINE_SIZE]; #ifdef CONFIG_CMDLINE_BOOL -static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; +static const char builtin_cmdline[] __initconst = CONFIG_CMDLINE; +#else +static const char builtin_cmdline[] __initconst = ""; #endif /* @@ -108,6 +110,9 @@ void __init add_memory_region(phys_addr_t start, phys_addr_t size, long type) return; } + if (start < PHYS_OFFSET) + return; + memblock_add(start, size); /* Reserve any memory except the ordinary RAM ranges. */ switch (type) { @@ -282,7 +287,7 @@ static unsigned long __init init_initrd(void) * Initialize the bootmem allocator. It also setup initrd related data * if needed. */ -#if defined(CONFIG_SGI_IP27) || (defined(CONFIG_CPU_LOONGSON3) && defined(CONFIG_NUMA)) +#if defined(CONFIG_SGI_IP27) || (defined(CONFIG_CPU_LOONGSON64) && defined(CONFIG_NUMA)) static void __init bootmem_init(void) { @@ -321,7 +326,7 @@ static void __init bootmem_init(void) * Reserve any memory between the start of RAM and PHYS_OFFSET */ if (ramstart > PHYS_OFFSET) - memblock_reserve(PHYS_OFFSET, PFN_UP(ramstart) - PHYS_OFFSET); + memblock_reserve(PHYS_OFFSET, ramstart - PHYS_OFFSET); if (PFN_UP(ramstart) > ARCH_PFN_OFFSET) { pr_info("Wasting %lu bytes for tracking %lu unused pages\n", @@ -535,11 +540,94 @@ static void __init check_kernel_sections_mem(void) } } -#define USE_PROM_CMDLINE IS_ENABLED(CONFIG_MIPS_CMDLINE_FROM_BOOTLOADER) -#define USE_DTB_CMDLINE IS_ENABLED(CONFIG_MIPS_CMDLINE_FROM_DTB) -#define EXTEND_WITH_PROM IS_ENABLED(CONFIG_MIPS_CMDLINE_DTB_EXTEND) -#define BUILTIN_EXTEND_WITH_PROM \ - IS_ENABLED(CONFIG_MIPS_CMDLINE_BUILTIN_EXTEND) +static void __init bootcmdline_append(const char *s, size_t max) +{ + if (!s[0] || !max) + return; + + if (boot_command_line[0]) + strlcat(boot_command_line, " ", COMMAND_LINE_SIZE); + + strlcat(boot_command_line, s, max); +} + +#ifdef CONFIG_OF_EARLY_FLATTREE + +static int __init bootcmdline_scan_chosen(unsigned long node, const char *uname, + int depth, void *data) +{ + bool *dt_bootargs = data; + const char *p; + int l; + + if (depth != 1 || !data || + (strcmp(uname, "chosen") != 0 && strcmp(uname, "chosen@0") != 0)) + return 0; + + p = of_get_flat_dt_prop(node, "bootargs", &l); + if (p != NULL && l > 0) { + bootcmdline_append(p, min(l, COMMAND_LINE_SIZE)); + *dt_bootargs = true; + } + + return 1; +} + +#endif /* CONFIG_OF_EARLY_FLATTREE */ + +static void __init bootcmdline_init(char **cmdline_p) +{ + bool dt_bootargs = false; + + /* + * If CMDLINE_OVERRIDE is enabled then initializing the command line is + * trivial - we simply use the built-in command line unconditionally & + * unmodified. + */ + if (IS_ENABLED(CONFIG_CMDLINE_OVERRIDE)) { + strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); + return; + } + + /* + * If the user specified a built-in command line & + * MIPS_CMDLINE_BUILTIN_EXTEND, then the built-in command line is + * prepended to arguments from the bootloader or DT so we'll copy them + * to the start of boot_command_line here. Otherwise, empty + * boot_command_line to undo anything early_init_dt_scan_chosen() did. + */ + if (IS_ENABLED(CONFIG_MIPS_CMDLINE_BUILTIN_EXTEND)) + strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); + else + boot_command_line[0] = 0; + +#ifdef CONFIG_OF_EARLY_FLATTREE + /* + * If we're configured to take boot arguments from DT, look for those + * now. + */ + if (IS_ENABLED(CONFIG_MIPS_CMDLINE_FROM_DTB)) + of_scan_flat_dt(bootcmdline_scan_chosen, &dt_bootargs); +#endif + + /* + * If we didn't get any arguments from DT (regardless of whether that's + * because we weren't configured to look for them, or because we looked + * & found none) then we'll take arguments from the bootloader. + * plat_mem_setup() should have filled arcs_cmdline with arguments from + * the bootloader. + */ + if (IS_ENABLED(CONFIG_MIPS_CMDLINE_DTB_EXTEND) || !dt_bootargs) + bootcmdline_append(arcs_cmdline, COMMAND_LINE_SIZE); + + /* + * If the user specified a built-in command line & we didn't already + * prepend it, we append it to boot_command_line here. + */ + if (IS_ENABLED(CONFIG_CMDLINE_BOOL) && + !IS_ENABLED(CONFIG_MIPS_CMDLINE_BUILTIN_EXTEND)) + bootcmdline_append(builtin_cmdline, COMMAND_LINE_SIZE); +} /* * arch_mem_init - initialize memory management subsystem @@ -567,48 +655,12 @@ static void __init arch_mem_init(char **cmdline_p) { extern void plat_mem_setup(void); - /* - * Initialize boot_command_line to an innocuous but non-empty string in - * order to prevent early_init_dt_scan_chosen() from copying - * CONFIG_CMDLINE into it without our knowledge. We handle - * CONFIG_CMDLINE ourselves below & don't want to duplicate its - * content because repeating arguments can be problematic. - */ - strlcpy(boot_command_line, " ", COMMAND_LINE_SIZE); - /* call board setup routine */ plat_mem_setup(); memblock_set_bottom_up(true); -#if defined(CONFIG_CMDLINE_BOOL) && defined(CONFIG_CMDLINE_OVERRIDE) - strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); -#else - if ((USE_PROM_CMDLINE && arcs_cmdline[0]) || - (USE_DTB_CMDLINE && !boot_command_line[0])) - strlcpy(boot_command_line, arcs_cmdline, COMMAND_LINE_SIZE); - - if (EXTEND_WITH_PROM && arcs_cmdline[0]) { - if (boot_command_line[0]) - strlcat(boot_command_line, " ", COMMAND_LINE_SIZE); - strlcat(boot_command_line, arcs_cmdline, COMMAND_LINE_SIZE); - } - -#if defined(CONFIG_CMDLINE_BOOL) - if (builtin_cmdline[0]) { - if (boot_command_line[0]) - strlcat(boot_command_line, " ", COMMAND_LINE_SIZE); - strlcat(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); - } - - if (BUILTIN_EXTEND_WITH_PROM && arcs_cmdline[0]) { - if (boot_command_line[0]) - strlcat(boot_command_line, " ", COMMAND_LINE_SIZE); - strlcat(boot_command_line, arcs_cmdline, COMMAND_LINE_SIZE); - } -#endif -#endif + bootcmdline_init(cmdline_p); strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); - *cmdline_p = command_line; parse_early_param(); diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index 76fae9b79f13..9058e9dcf080 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -31,7 +31,6 @@ #include <asm/pgtable.h> #include <asm/processor.h> #include <asm/bootinfo.h> -#include <asm/pmon.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> #include <asm/mipsregs.h> @@ -464,10 +463,10 @@ static void bmips_wr_vec(unsigned long dst, char *start, char *end) static inline void bmips_nmi_handler_setup(void) { - bmips_wr_vec(BMIPS_NMI_RESET_VEC, &bmips_reset_nmi_vec, - &bmips_reset_nmi_vec_end); - bmips_wr_vec(BMIPS_WARM_RESTART_VEC, &bmips_smp_int_vec, - &bmips_smp_int_vec_end); + bmips_wr_vec(BMIPS_NMI_RESET_VEC, bmips_reset_nmi_vec, + bmips_reset_nmi_vec_end); + bmips_wr_vec(BMIPS_WARM_RESTART_VEC, bmips_smp_int_vec, + bmips_smp_int_vec_end); } struct reset_vec_info { diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index b0e25e913bdb..c333e5788664 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -37,6 +37,7 @@ #include <asm/signal.h> #include <asm/sim.h> #include <asm/shmparam.h> +#include <asm/sync.h> #include <asm/sysmips.h> #include <asm/switch_to.h> @@ -80,6 +81,7 @@ SYSCALL_DEFINE6(mips_mmap2, unsigned long, addr, unsigned long, len, save_static_function(sys_fork); save_static_function(sys_clone); +save_static_function(sys_clone3); SYSCALL_DEFINE1(set_thread_area, unsigned long, addr) { @@ -132,12 +134,12 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new) [efault] "i" (-EFAULT) : "memory"); } else if (cpu_has_llsc) { - loongson_llsc_mb(); __asm__ __volatile__ ( " .set push \n" " .set "MIPS_ISA_ARCH_LEVEL" \n" " li %[err], 0 \n" "1: \n" + " " __SYNC(full, loongson3_war) " \n" user_ll("%[old]", "(%[addr])") " move %[tmp], %[new] \n" "2: \n" diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index c9c879ec9b6d..e7c5ab38e403 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -373,4 +373,4 @@ 432 n32 fsmount sys_fsmount 433 n32 fspick sys_fspick 434 n32 pidfd_open sys_pidfd_open -# 435 reserved for clone3 +435 n32 clone3 __sys_clone3 diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index bbce9159caa1..13cd66581f3b 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -349,4 +349,4 @@ 432 n64 fsmount sys_fsmount 433 n64 fspick sys_fspick 434 n64 pidfd_open sys_pidfd_open -# 435 reserved for clone3 +435 n64 clone3 __sys_clone3 diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 9653591428ec..353539ea4140 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -422,4 +422,4 @@ 432 o32 fsmount sys_fsmount 433 o32 fspick sys_fspick 434 o32 pidfd_open sys_pidfd_open -# 435 reserved for clone3 +435 o32 clone3 __sys_clone3 diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 342e41de9d64..83f2a437d9e2 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -1761,7 +1761,7 @@ static inline void parity_protection_init(void) case CPU_5KC: case CPU_5KE: - case CPU_LOONGSON1: + case CPU_LOONGSON32: write_c0_ecc(0x80000000); back_to_back_c0_hazard(); /* Set the PE bit (bit 31) in the c0_errctl register. */ @@ -2394,7 +2394,7 @@ void __init trap_init(void) else { if (cpu_has_vtag_icache) set_except_vector(EXCCODE_RI, handle_ri_rdhwr_tlbp); - else if (current_cpu_type() == CPU_LOONGSON3) + else if (current_cpu_type() == CPU_LOONGSON64) set_except_vector(EXCCODE_RI, handle_ri_rdhwr_tlbp); else set_except_vector(EXCCODE_RI, handle_ri_rdhwr); diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c index 97e538a8c1be..7dad7a293eae 100644 --- a/arch/mips/kvm/mmu.c +++ b/arch/mips/kvm/mmu.c @@ -136,6 +136,7 @@ pgd_t *kvm_pgd_alloc(void) static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache, unsigned long addr) { + p4d_t *p4d; pud_t *pud; pmd_t *pmd; @@ -145,7 +146,8 @@ static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache, BUG(); return NULL; } - pud = pud_offset(pgd, addr); + p4d = p4d_offset(pgd, addr); + pud = pud_offset(p4d, addr); if (pud_none(*pud)) { pmd_t *new_pmd; @@ -204,8 +206,8 @@ static bool kvm_mips_flush_gpa_pmd(pmd_t *pmd, unsigned long start_gpa, { pte_t *pte; unsigned long end = ~0ul; - int i_min = __pmd_offset(start_gpa); - int i_max = __pmd_offset(end_gpa); + int i_min = pmd_index(start_gpa); + int i_max = pmd_index(end_gpa); bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1); int i; @@ -232,8 +234,8 @@ static bool kvm_mips_flush_gpa_pud(pud_t *pud, unsigned long start_gpa, { pmd_t *pmd; unsigned long end = ~0ul; - int i_min = __pud_offset(start_gpa); - int i_max = __pud_offset(end_gpa); + int i_min = pud_index(start_gpa); + int i_max = pud_index(end_gpa); bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1); int i; @@ -258,6 +260,7 @@ static bool kvm_mips_flush_gpa_pud(pud_t *pud, unsigned long start_gpa, static bool kvm_mips_flush_gpa_pgd(pgd_t *pgd, unsigned long start_gpa, unsigned long end_gpa) { + p4d_t *p4d; pud_t *pud; unsigned long end = ~0ul; int i_min = pgd_index(start_gpa); @@ -269,7 +272,8 @@ static bool kvm_mips_flush_gpa_pgd(pgd_t *pgd, unsigned long start_gpa, if (!pgd_present(pgd[i])) continue; - pud = pud_offset(pgd + i, 0); + p4d = p4d_offset(pgd, 0); + pud = pud_offset(p4d + i, 0); if (i == i_max) end = end_gpa; @@ -334,8 +338,8 @@ static int kvm_mips_##name##_pmd(pmd_t *pmd, unsigned long start, \ int ret = 0; \ pte_t *pte; \ unsigned long cur_end = ~0ul; \ - int i_min = __pmd_offset(start); \ - int i_max = __pmd_offset(end); \ + int i_min = pmd_index(start); \ + int i_max = pmd_index(end); \ int i; \ \ for (i = i_min; i <= i_max; ++i, start = 0) { \ @@ -357,8 +361,8 @@ static int kvm_mips_##name##_pud(pud_t *pud, unsigned long start, \ int ret = 0; \ pmd_t *pmd; \ unsigned long cur_end = ~0ul; \ - int i_min = __pud_offset(start); \ - int i_max = __pud_offset(end); \ + int i_min = pud_index(start); \ + int i_max = pud_index(end); \ int i; \ \ for (i = i_min; i <= i_max; ++i, start = 0) { \ @@ -378,6 +382,7 @@ static int kvm_mips_##name##_pgd(pgd_t *pgd, unsigned long start, \ unsigned long end) \ { \ int ret = 0; \ + p4d_t *p4d; \ pud_t *pud; \ unsigned long cur_end = ~0ul; \ int i_min = pgd_index(start); \ @@ -388,7 +393,8 @@ static int kvm_mips_##name##_pgd(pgd_t *pgd, unsigned long start, \ if (!pgd_present(pgd[i])) \ continue; \ \ - pud = pud_offset(pgd + i, 0); \ + p4d = p4d_offset(pgd, 0); \ + pud = pud_offset(p4d + i, 0); \ if (i == i_max) \ cur_end = end; \ \ @@ -862,8 +868,8 @@ static bool kvm_mips_flush_gva_pmd(pmd_t *pmd, unsigned long start_gva, { pte_t *pte; unsigned long end = ~0ul; - int i_min = __pmd_offset(start_gva); - int i_max = __pmd_offset(end_gva); + int i_min = pmd_index(start_gva); + int i_max = pmd_index(end_gva); bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1); int i; @@ -890,8 +896,8 @@ static bool kvm_mips_flush_gva_pud(pud_t *pud, unsigned long start_gva, { pmd_t *pmd; unsigned long end = ~0ul; - int i_min = __pud_offset(start_gva); - int i_max = __pud_offset(end_gva); + int i_min = pud_index(start_gva); + int i_max = pud_index(end_gva); bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1); int i; @@ -916,6 +922,7 @@ static bool kvm_mips_flush_gva_pud(pud_t *pud, unsigned long start_gva, static bool kvm_mips_flush_gva_pgd(pgd_t *pgd, unsigned long start_gva, unsigned long end_gva) { + p4d_t *p4d; pud_t *pud; unsigned long end = ~0ul; int i_min = pgd_index(start_gva); @@ -927,7 +934,8 @@ static bool kvm_mips_flush_gva_pgd(pgd_t *pgd, unsigned long start_gva, if (!pgd_present(pgd[i])) continue; - pud = pud_offset(pgd + i, 0); + p4d = p4d_offset(pgd, 0); + pud = pud_offset(p4d + i, 0); if (i == i_max) end = end_gva; diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c index 73daa6ad33af..5a11e83dffe6 100644 --- a/arch/mips/kvm/trap_emul.c +++ b/arch/mips/kvm/trap_emul.c @@ -564,6 +564,7 @@ static void kvm_mips_emul_free_gva_pt(pgd_t *pgd) /* Don't free host kernel page tables copied from init_mm.pgd */ const unsigned long end = 0x80000000; unsigned long pgd_va, pud_va, pmd_va; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; @@ -576,7 +577,8 @@ static void kvm_mips_emul_free_gva_pt(pgd_t *pgd) pgd_va = (unsigned long)i << PGDIR_SHIFT; if (pgd_va >= end) break; - pud = pud_offset(pgd + i, 0); + p4d = p4d_offset(pgd, 0); + pud = pud_offset(p4d + i, 0); for (j = 0; j < PTRS_PER_PUD; j++) { if (pud_none(pud[j])) continue; diff --git a/arch/mips/lib/bitops.c b/arch/mips/lib/bitops.c index 3b2a1e78a543..116d0bd8b2ae 100644 --- a/arch/mips/lib/bitops.c +++ b/arch/mips/lib/bitops.c @@ -7,6 +7,7 @@ * Copyright (c) 1999, 2000 Silicon Graphics, Inc. */ #include <linux/bitops.h> +#include <linux/bits.h> #include <linux/irqflags.h> #include <linux/export.h> @@ -19,12 +20,11 @@ */ void __mips_set_bit(unsigned long nr, volatile unsigned long *addr) { - unsigned long *a = (unsigned long *)addr; - unsigned bit = nr & SZLONG_MASK; + volatile unsigned long *a = &addr[BIT_WORD(nr)]; + unsigned int bit = nr % BITS_PER_LONG; unsigned long mask; unsigned long flags; - a += nr >> SZLONG_LOG; mask = 1UL << bit; raw_local_irq_save(flags); *a |= mask; @@ -41,12 +41,11 @@ EXPORT_SYMBOL(__mips_set_bit); */ void __mips_clear_bit(unsigned long nr, volatile unsigned long *addr) { - unsigned long *a = (unsigned long *)addr; - unsigned bit = nr & SZLONG_MASK; + volatile unsigned long *a = &addr[BIT_WORD(nr)]; + unsigned int bit = nr % BITS_PER_LONG; unsigned long mask; unsigned long flags; - a += nr >> SZLONG_LOG; mask = 1UL << bit; raw_local_irq_save(flags); *a &= ~mask; @@ -63,12 +62,11 @@ EXPORT_SYMBOL(__mips_clear_bit); */ void __mips_change_bit(unsigned long nr, volatile unsigned long *addr) { - unsigned long *a = (unsigned long *)addr; - unsigned bit = nr & SZLONG_MASK; + volatile unsigned long *a = &addr[BIT_WORD(nr)]; + unsigned int bit = nr % BITS_PER_LONG; unsigned long mask; unsigned long flags; - a += nr >> SZLONG_LOG; mask = 1UL << bit; raw_local_irq_save(flags); *a ^= mask; @@ -78,32 +76,6 @@ EXPORT_SYMBOL(__mips_change_bit); /** - * __mips_test_and_set_bit - Set a bit and return its old value. This is - * called by test_and_set_bit() if it cannot find a faster solution. - * @nr: Bit to set - * @addr: Address to count from - */ -int __mips_test_and_set_bit(unsigned long nr, - volatile unsigned long *addr) -{ - unsigned long *a = (unsigned long *)addr; - unsigned bit = nr & SZLONG_MASK; - unsigned long mask; - unsigned long flags; - int res; - - a += nr >> SZLONG_LOG; - mask = 1UL << bit; - raw_local_irq_save(flags); - res = (mask & *a) != 0; - *a |= mask; - raw_local_irq_restore(flags); - return res; -} -EXPORT_SYMBOL(__mips_test_and_set_bit); - - -/** * __mips_test_and_set_bit_lock - Set a bit and return its old value. This is * called by test_and_set_bit_lock() if it cannot find a faster solution. * @nr: Bit to set @@ -112,13 +84,12 @@ EXPORT_SYMBOL(__mips_test_and_set_bit); int __mips_test_and_set_bit_lock(unsigned long nr, volatile unsigned long *addr) { - unsigned long *a = (unsigned long *)addr; - unsigned bit = nr & SZLONG_MASK; + volatile unsigned long *a = &addr[BIT_WORD(nr)]; + unsigned int bit = nr % BITS_PER_LONG; unsigned long mask; unsigned long flags; int res; - a += nr >> SZLONG_LOG; mask = 1UL << bit; raw_local_irq_save(flags); res = (mask & *a) != 0; @@ -137,13 +108,12 @@ EXPORT_SYMBOL(__mips_test_and_set_bit_lock); */ int __mips_test_and_clear_bit(unsigned long nr, volatile unsigned long *addr) { - unsigned long *a = (unsigned long *)addr; - unsigned bit = nr & SZLONG_MASK; + volatile unsigned long *a = &addr[BIT_WORD(nr)]; + unsigned int bit = nr % BITS_PER_LONG; unsigned long mask; unsigned long flags; int res; - a += nr >> SZLONG_LOG; mask = 1UL << bit; raw_local_irq_save(flags); res = (mask & *a) != 0; @@ -162,13 +132,12 @@ EXPORT_SYMBOL(__mips_test_and_clear_bit); */ int __mips_test_and_change_bit(unsigned long nr, volatile unsigned long *addr) { - unsigned long *a = (unsigned long *)addr; - unsigned bit = nr & SZLONG_MASK; + volatile unsigned long *a = &addr[BIT_WORD(nr)]; + unsigned int bit = nr % BITS_PER_LONG; unsigned long mask; unsigned long flags; int res; - a += nr >> SZLONG_LOG; mask = 1UL << bit; raw_local_irq_save(flags); res = (mask & *a) != 0; diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S index 2ff84f4b1717..fda7b57b826e 100644 --- a/arch/mips/lib/csum_partial.S +++ b/arch/mips/lib/csum_partial.S @@ -279,7 +279,7 @@ EXPORT_SYMBOL(csum_partial) #endif /* odd buffer alignment? */ -#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_LOONGSON3) +#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_LOONGSON64) .set push .set arch=mips32r2 wsbh v1, sum @@ -732,7 +732,7 @@ EXPORT_SYMBOL(csum_partial) addu sum, v1 #endif -#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_LOONGSON3) +#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_LOONGSON64) .set push .set arch=mips32r2 wsbh v1, sum diff --git a/arch/mips/loongson2ef/Kconfig b/arch/mips/loongson2ef/Kconfig new file mode 100644 index 000000000000..595dd48e1e4d --- /dev/null +++ b/arch/mips/loongson2ef/Kconfig @@ -0,0 +1,95 @@ +# SPDX-License-Identifier: GPL-2.0 +if MACH_LOONGSON2EF + +choice + prompt "Machine Type" + +config LEMOTE_FULOONG2E + bool "Lemote Fuloong(2e) mini-PC" + select ARCH_SPARSEMEM_ENABLE + select ARCH_MIGHT_HAVE_PC_PARPORT + select ARCH_MIGHT_HAVE_PC_SERIO + select CEVT_R4K + select CSRC_R4K + select SYS_HAS_CPU_LOONGSON2E + select DMA_NONCOHERENT + select BOOT_ELF32 + select BOARD_SCACHE + select FORCE_PCI + select I8259 + select ISA + select IRQ_MIPS_CPU + select SYS_SUPPORTS_64BIT_KERNEL + select SYS_SUPPORTS_LITTLE_ENDIAN + select SYS_SUPPORTS_HIGHMEM + select SYS_HAS_EARLY_PRINTK + select USE_GENERIC_EARLY_PRINTK_8250 + select GENERIC_ISA_DMA_SUPPORT_BROKEN + select CPU_HAS_WB + select LOONGSON_MC146818 + help + Lemote Fuloong(2e) mini-PC board based on the Chinese Loongson-2E CPU and + an FPGA northbridge + + Lemote Fuloong(2e) mini PC have a VIA686B south bridge. + +config LEMOTE_MACH2F + bool "Lemote Loongson 2F family machines" + select ARCH_SPARSEMEM_ENABLE + select ARCH_MIGHT_HAVE_PC_PARPORT + select ARCH_MIGHT_HAVE_PC_SERIO + select BOARD_SCACHE + select BOOT_ELF32 + select CEVT_R4K if ! MIPS_EXTERNAL_TIMER + select CPU_HAS_WB + select CS5536 + select CSRC_R4K if ! MIPS_EXTERNAL_TIMER + select DMA_NONCOHERENT + select GENERIC_ISA_DMA_SUPPORT_BROKEN + select HAVE_CLK + select FORCE_PCI + select I8259 + select IRQ_MIPS_CPU + select ISA + select SYS_HAS_CPU_LOONGSON2F + select SYS_HAS_EARLY_PRINTK + select USE_GENERIC_EARLY_PRINTK_8250 + select SYS_SUPPORTS_64BIT_KERNEL + select SYS_SUPPORTS_HIGHMEM + select SYS_SUPPORTS_LITTLE_ENDIAN + select LOONGSON_MC146818 + help + Lemote Loongson 2F family machines utilize the 2F revision of + Loongson processor and the AMD CS5536 south bridge. + + These family machines include fuloong2f mini PC, yeeloong2f notebook, + LingLoong allinone PC and so forth. + +endchoice + +config CS5536 + bool + +config CS5536_MFGPT + bool "CS5536 MFGPT Timer" + depends on CS5536 && !HIGH_RES_TIMERS + select MIPS_EXTERNAL_TIMER + help + This option enables the mfgpt0 timer of AMD CS5536. With this timer + switched on you can not use high resolution timers. + + If you want to enable the Loongson2 CPUFreq Driver, Please enable + this option at first, otherwise, You will get wrong system time. + + If unsure, say Yes. + +config LOONGSON_UART_BASE + bool + default y + depends on EARLY_PRINTK || SERIAL_8250 + +config LOONGSON_MC146818 + bool + default n + +endif # MACH_LOONGSON2EF diff --git a/arch/mips/loongson2ef/Makefile b/arch/mips/loongson2ef/Makefile new file mode 100644 index 000000000000..d4af1605cc9b --- /dev/null +++ b/arch/mips/loongson2ef/Makefile @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Common code for all Loongson based systems +# + +obj-$(CONFIG_MACH_LOONGSON2EF) += common/ + +# +# Lemote Fuloong mini-PC (Loongson 2E-based) +# + +obj-$(CONFIG_LEMOTE_FULOONG2E) += fuloong-2e/ + +# +# Lemote loongson2f family machines +# + +obj-$(CONFIG_LEMOTE_MACH2F) += lemote-2f/ diff --git a/arch/mips/loongson2ef/Platform b/arch/mips/loongson2ef/Platform new file mode 100644 index 000000000000..3aca42963f35 --- /dev/null +++ b/arch/mips/loongson2ef/Platform @@ -0,0 +1,32 @@ +# +# Loongson Processors' Support +# + +# Only gcc >= 4.4 have Loongson specific support +cflags-$(CONFIG_CPU_LOONGSON2EF) += -Wa,--trap +cflags-$(CONFIG_CPU_LOONGSON2E) += \ + $(call cc-option,-march=loongson2e,-march=r4600) +cflags-$(CONFIG_CPU_LOONGSON2F) += \ + $(call cc-option,-march=loongson2f,-march=r4600) +# Enable the workarounds for Loongson2f +ifdef CONFIG_CPU_LOONGSON2F_WORKAROUNDS + ifeq ($(call as-option,-Wa$(comma)-mfix-loongson2f-nop,),) + $(error only binutils >= 2.20.2 have needed option -mfix-loongson2f-nop) + else + cflags-$(CONFIG_CPU_NOP_WORKAROUNDS) += -Wa$(comma)-mfix-loongson2f-nop + endif + ifeq ($(call as-option,-Wa$(comma)-mfix-loongson2f-jump,),) + $(error only binutils >= 2.20.2 have needed option -mfix-loongson2f-jump) + else + cflags-$(CONFIG_CPU_JUMP_WORKAROUNDS) += -Wa$(comma)-mfix-loongson2f-jump + endif +endif + +# +# Loongson Machines' Support +# + +platform-$(CONFIG_MACH_LOONGSON2EF) += loongson2ef/ +cflags-$(CONFIG_MACH_LOONGSON2EF) += -I$(srctree)/arch/mips/include/asm/mach-loongson2ef -mno-branch-likely +load-$(CONFIG_LEMOTE_FULOONG2E) += 0xffffffff80100000 +load-$(CONFIG_LEMOTE_MACH2F) += 0xffffffff80200000 diff --git a/arch/mips/loongson64/common/Makefile b/arch/mips/loongson2ef/common/Makefile index 684624f61f5a..d5ab3e543ea3 100644 --- a/arch/mips/loongson64/common/Makefile +++ b/arch/mips/loongson2ef/common/Makefile @@ -3,14 +3,13 @@ # Makefile for loongson based machines. # -obj-y += setup.o init.o cmdline.o env.o time.o reset.o irq.o \ +obj-y += setup.o init.o env.o time.o reset.o irq.o \ bonito-irq.o mem.o machtype.o platform.o serial.o obj-$(CONFIG_PCI) += pci.o # # Serial port support # -obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_LOONGSON_UART_BASE) += uart_base.o obj-$(CONFIG_LOONGSON_MC146818) += rtc.o diff --git a/arch/mips/loongson64/common/bonito-irq.c b/arch/mips/loongson2ef/common/bonito-irq.c index 82352cc25e4c..82352cc25e4c 100644 --- a/arch/mips/loongson64/common/bonito-irq.c +++ b/arch/mips/loongson2ef/common/bonito-irq.c diff --git a/arch/mips/loongson64/common/cs5536/Makefile b/arch/mips/loongson2ef/common/cs5536/Makefile index b32b29661245..b32b29661245 100644 --- a/arch/mips/loongson64/common/cs5536/Makefile +++ b/arch/mips/loongson2ef/common/cs5536/Makefile diff --git a/arch/mips/loongson64/common/cs5536/cs5536_acc.c b/arch/mips/loongson2ef/common/cs5536/cs5536_acc.c index ff50aae72916..ff50aae72916 100644 --- a/arch/mips/loongson64/common/cs5536/cs5536_acc.c +++ b/arch/mips/loongson2ef/common/cs5536/cs5536_acc.c diff --git a/arch/mips/loongson64/common/cs5536/cs5536_ehci.c b/arch/mips/loongson2ef/common/cs5536/cs5536_ehci.c index bd4c39fe6109..bd4c39fe6109 100644 --- a/arch/mips/loongson64/common/cs5536/cs5536_ehci.c +++ b/arch/mips/loongson2ef/common/cs5536/cs5536_ehci.c diff --git a/arch/mips/loongson64/common/cs5536/cs5536_ide.c b/arch/mips/loongson2ef/common/cs5536/cs5536_ide.c index bb933294b092..bb933294b092 100644 --- a/arch/mips/loongson64/common/cs5536/cs5536_ide.c +++ b/arch/mips/loongson2ef/common/cs5536/cs5536_ide.c diff --git a/arch/mips/loongson64/common/cs5536/cs5536_isa.c b/arch/mips/loongson2ef/common/cs5536/cs5536_isa.c index 5ad38f86ee62..5ad38f86ee62 100644 --- a/arch/mips/loongson64/common/cs5536/cs5536_isa.c +++ b/arch/mips/loongson2ef/common/cs5536/cs5536_isa.c diff --git a/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c b/arch/mips/loongson2ef/common/cs5536/cs5536_mfgpt.c index 30af1b7c7529..30af1b7c7529 100644 --- a/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c +++ b/arch/mips/loongson2ef/common/cs5536/cs5536_mfgpt.c diff --git a/arch/mips/loongson64/common/cs5536/cs5536_ohci.c b/arch/mips/loongson2ef/common/cs5536/cs5536_ohci.c index 71a52b120317..71a52b120317 100644 --- a/arch/mips/loongson64/common/cs5536/cs5536_ohci.c +++ b/arch/mips/loongson2ef/common/cs5536/cs5536_ohci.c diff --git a/arch/mips/loongson64/common/cs5536/cs5536_pci.c b/arch/mips/loongson2ef/common/cs5536/cs5536_pci.c index 202c89b568ba..202c89b568ba 100644 --- a/arch/mips/loongson64/common/cs5536/cs5536_pci.c +++ b/arch/mips/loongson2ef/common/cs5536/cs5536_pci.c diff --git a/arch/mips/loongson2ef/common/env.c b/arch/mips/loongson2ef/common/env.c new file mode 100644 index 000000000000..6f20bdf9b242 --- /dev/null +++ b/arch/mips/loongson2ef/common/env.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Based on Ocelot Linux port, which is + * Copyright 2001 MontaVista Software Inc. + * Author: jsun@mvista.com or jsun@junsun.net + * + * Copyright 2003 ICT CAS + * Author: Michael Guo <guoyi@ict.ac.cn> + * + * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology + * Author: Fuxin Zhang, zhangfx@lemote.com + * + * Copyright (C) 2009 Lemote Inc. + * Author: Wu Zhangjin, wuzhangjin@gmail.com + */ +#include <linux/export.h> +#include <asm/bootinfo.h> +#include <asm/fw/fw.h> +#include <loongson.h> + +u32 cpu_clock_freq; +EXPORT_SYMBOL(cpu_clock_freq); + +void __init prom_init_env(void) +{ + /* pmon passes arguments in 32bit pointers */ + unsigned int processor_id; + + cpu_clock_freq = fw_getenvl("cpuclock"); + memsize = fw_getenvl("memsize"); + highmemsize = fw_getenvl("highmemsize"); + + if (memsize == 0) + memsize = 256; + + pr_info("memsize=%u, highmemsize=%u\n", memsize, highmemsize); + + if (cpu_clock_freq == 0) { + processor_id = (¤t_cpu_data)->processor_id; + switch (processor_id & PRID_REV_MASK) { + case PRID_REV_LOONGSON2E: + cpu_clock_freq = 533080000; + break; + case PRID_REV_LOONGSON2F: + cpu_clock_freq = 797000000; + break; + default: + cpu_clock_freq = 100000000; + break; + } + } + pr_info("CpuClock = %u\n", cpu_clock_freq); +} diff --git a/arch/mips/loongson64/common/init.c b/arch/mips/loongson2ef/common/init.c index 912fe61c4fc7..45512178be77 100644 --- a/arch/mips/loongson64/common/init.c +++ b/arch/mips/loongson2ef/common/init.c @@ -9,6 +9,7 @@ #include <asm/traps.h> #include <asm/smp-ops.h> #include <asm/cacheflush.h> +#include <asm/fw/fw.h> #include <loongson.h> @@ -32,22 +33,17 @@ void __init prom_init(void) ioremap(LOONGSON_ADDRWINCFG_BASE, LOONGSON_ADDRWINCFG_SIZE); #endif - prom_init_cmdline(); + fw_init_cmdline(); + prom_init_machtype(); prom_init_env(); /* init base address of io space */ set_io_port_base((unsigned long) ioremap(LOONGSON_PCIIO_BASE, LOONGSON_PCIIO_SIZE)); - -#ifdef CONFIG_NUMA - prom_init_numa_memory(); -#else prom_init_memory(); -#endif /*init the uart base address */ prom_init_uart_base(); - register_smp_ops(&loongson3_smp_ops); board_nmi_handler_setup = mips_nmi_setup; } diff --git a/arch/mips/loongson64/common/irq.c b/arch/mips/loongson2ef/common/irq.c index 0ea93c1c0a97..0ea93c1c0a97 100644 --- a/arch/mips/loongson64/common/irq.c +++ b/arch/mips/loongson2ef/common/irq.c diff --git a/arch/mips/loongson64/common/machtype.c b/arch/mips/loongson2ef/common/machtype.c index 4e42d929f1c7..82f6de49f20f 100644 --- a/arch/mips/loongson64/common/machtype.c +++ b/arch/mips/loongson2ef/common/machtype.c @@ -23,7 +23,6 @@ static const char *system_types[] = { [MACH_DEXXON_GDIUM2F10] = "dexxon-gdium-2f", [MACH_LEMOTE_NAS] = "lemote-nas-2f", [MACH_LEMOTE_LL2F] = "lemote-lynloong-2f", - [MACH_LOONGSON_GENERIC] = "generic-loongson-machine", [MACH_LOONGSON_END] = NULL, }; diff --git a/arch/mips/loongson2ef/common/mem.c b/arch/mips/loongson2ef/common/mem.c new file mode 100644 index 000000000000..ae21f1c62baa --- /dev/null +++ b/arch/mips/loongson2ef/common/mem.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + */ +#include <linux/fs.h> +#include <linux/fcntl.h> +#include <linux/memblock.h> +#include <linux/mm.h> + +#include <asm/bootinfo.h> + +#include <loongson.h> +#include <mem.h> +#include <pci.h> + + +u32 memsize, highmemsize; + +void __init prom_init_memory(void) +{ + add_memory_region(0x0, (memsize << 20), BOOT_MEM_RAM); + + add_memory_region(memsize << 20, LOONGSON_PCI_MEM_START - (memsize << + 20), BOOT_MEM_RESERVED); + +#ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG + { + int bit; + + bit = fls(memsize + highmemsize); + if (bit != ffs(memsize + highmemsize)) + bit += 20; + else + bit = bit + 20 - 1; + + /* set cpu window3 to map CPU to DDR: 2G -> 2G */ + LOONGSON_ADDRWIN_CPUTODDR(ADDRWIN_WIN3, 0x80000000ul, + 0x80000000ul, (1 << bit)); + mmiowb(); + } +#endif /* !CONFIG_CPU_SUPPORTS_ADDRWINCFG */ + +#ifdef CONFIG_64BIT + if (highmemsize > 0) + add_memory_region(LOONGSON_HIGHMEM_START, + highmemsize << 20, BOOT_MEM_RAM); + + add_memory_region(LOONGSON_PCI_MEM_END + 1, LOONGSON_HIGHMEM_START - + LOONGSON_PCI_MEM_END - 1, BOOT_MEM_RESERVED); + +#endif /* !CONFIG_64BIT */ +} + +/* override of arch/mips/mm/cache.c: __uncached_access */ +int __uncached_access(struct file *file, unsigned long addr) +{ + if (file->f_flags & O_DSYNC) + return 1; + + return addr >= __pa(high_memory) || + ((addr >= LOONGSON_MMIO_MEM_START) && + (addr < LOONGSON_MMIO_MEM_END)); +} diff --git a/arch/mips/loongson64/common/pci.c b/arch/mips/loongson2ef/common/pci.c index c47bb7bf3aa4..200916925e95 100644 --- a/arch/mips/loongson64/common/pci.c +++ b/arch/mips/loongson2ef/common/pci.c @@ -7,7 +7,6 @@ #include <pci.h> #include <loongson.h> -#include <boot_param.h> static struct resource loongson_pci_mem_resource = { .name = "pci memory space", @@ -81,15 +80,8 @@ static int __init pcibios_init(void) setup_pcimap(); loongson_pci_controller.io_map_base = mips_io_port_base; -#ifdef CONFIG_LEFI_FIRMWARE_INTERFACE - loongson_pci_mem_resource.start = loongson_sysconf.pci_mem_start_addr; - loongson_pci_mem_resource.end = loongson_sysconf.pci_mem_end_addr; -#endif register_pci_controller(&loongson_pci_controller); -#ifdef CONFIG_CPU_LOONGSON3 - sbx00_acpi_init(); -#endif return 0; } diff --git a/arch/mips/loongson64/common/platform.c b/arch/mips/loongson2ef/common/platform.c index 0084820cffaa..0084820cffaa 100644 --- a/arch/mips/loongson64/common/platform.c +++ b/arch/mips/loongson2ef/common/platform.c diff --git a/arch/mips/loongson64/common/pm.c b/arch/mips/loongson2ef/common/pm.c index b8aed878d912..11f4cfd581fb 100644 --- a/arch/mips/loongson64/common/pm.c +++ b/arch/mips/loongson2ef/common/pm.c @@ -75,7 +75,7 @@ int __weak wakeup_loongson(void) static void wait_for_wakeup_events(void) { while (!wakeup_loongson()) - LOONGSON_CHIPCFG(0) &= ~0x7; + writel(readl(LOONGSON_CHIPCFG) & ~0x7, LOONGSON_CHIPCFG); } /* @@ -98,15 +98,16 @@ static void loongson_suspend_enter(void) stop_perf_counters(); - cached_cpu_freq = LOONGSON_CHIPCFG(0); + cached_cpu_freq = readl(LOONGSON_CHIPCFG); /* Put CPU into wait mode */ - LOONGSON_CHIPCFG(0) &= ~0x7; + writel(readl(LOONGSON_CHIPCFG) & ~0x7, LOONGSON_CHIPCFG); /* wait for the given events to wakeup cpu from wait mode */ wait_for_wakeup_events(); - LOONGSON_CHIPCFG(0) = cached_cpu_freq; + writel(cached_cpu_freq, LOONGSON_CHIPCFG); + mmiowb(); } diff --git a/arch/mips/loongson64/common/reset.c b/arch/mips/loongson2ef/common/reset.c index ce39e918e4d5..e7c87161ce00 100644 --- a/arch/mips/loongson64/common/reset.c +++ b/arch/mips/loongson2ef/common/reset.c @@ -13,7 +13,6 @@ #include <asm/reboot.h> #include <loongson.h> -#include <boot_param.h> static inline void loongson_reboot(void) { @@ -35,26 +34,15 @@ static inline void loongson_reboot(void) static void loongson_restart(char *command) { -#ifndef CONFIG_LEFI_FIRMWARE_INTERFACE /* do preparation for reboot */ mach_prepare_reboot(); /* reboot via jumping to boot base address */ loongson_reboot(); -#else - void (*fw_restart)(void) = (void *)loongson_sysconf.restart_addr; - - fw_restart(); - while (1) { - if (cpu_wait) - cpu_wait(); - } -#endif } static void loongson_poweroff(void) { -#ifndef CONFIG_LEFI_FIRMWARE_INTERFACE mach_prepare_shutdown(); /* @@ -62,15 +50,6 @@ static void loongson_poweroff(void) * a generic delay loop, machine_hang(), so simply return. */ return; -#else - void (*fw_poweroff)(void) = (void *)loongson_sysconf.poweroff_addr; - - fw_poweroff(); - while (1) { - if (cpu_wait) - cpu_wait(); - } -#endif } static void loongson_halt(void) diff --git a/arch/mips/loongson64/common/rtc.c b/arch/mips/loongson2ef/common/rtc.c index 8d7628c0f513..8d7628c0f513 100644 --- a/arch/mips/loongson64/common/rtc.c +++ b/arch/mips/loongson2ef/common/rtc.c diff --git a/arch/mips/loongson2ef/common/serial.c b/arch/mips/loongson2ef/common/serial.c new file mode 100644 index 000000000000..ac4f6e3ebc3e --- /dev/null +++ b/arch/mips/loongson2ef/common/serial.c @@ -0,0 +1,86 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2007 Ralf Baechle (ralf@linux-mips.org) + * + * Copyright (C) 2009 Lemote, Inc. + * Author: Yan hua (yanhua@lemote.com) + * Author: Wu Zhangjin (wuzhangjin@gmail.com) + */ + +#include <linux/io.h> +#include <linux/module.h> +#include <linux/serial_8250.h> + +#include <asm/bootinfo.h> + +#include <loongson.h> +#include <machine.h> + +#define PORT(int, clk) \ +{ \ + .irq = int, \ + .uartclk = clk, \ + .iotype = UPIO_PORT, \ + .flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST, \ + .regshift = 0, \ +} + +#define PORT_M(int, clk) \ +{ \ + .irq = MIPS_CPU_IRQ_BASE + (int), \ + .uartclk = clk, \ + .iotype = UPIO_MEM, \ + .membase = (void __iomem *)NULL, \ + .flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST, \ + .regshift = 0, \ +} + +static struct plat_serial8250_port uart8250_data[MACH_LOONGSON_END + 1] = { + [MACH_LOONGSON_UNKNOWN] = {}, + [MACH_LEMOTE_FL2E] = PORT(4, 1843200), + [MACH_LEMOTE_FL2F] = PORT(3, 1843200), + [MACH_LEMOTE_ML2F7] = PORT_M(3, 3686400), + [MACH_LEMOTE_YL2F89] = PORT_M(3, 3686400), + [MACH_DEXXON_GDIUM2F10] = PORT_M(3, 3686400), + [MACH_LEMOTE_NAS] = PORT_M(3, 3686400), + [MACH_LEMOTE_LL2F] = PORT(3, 1843200), + [MACH_LOONGSON_END] = {}, +}; + +static struct platform_device uart8250_device = { + .name = "serial8250", + .id = PLAT8250_DEV_PLATFORM, +}; + +static int __init serial_init(void) +{ + unsigned char iotype; + + iotype = uart8250_data[mips_machtype].iotype; + + if (UPIO_MEM == iotype) { + uart8250_data[mips_machtype].mapbase = + loongson_uart_base; + uart8250_data[mips_machtype].membase = + (void __iomem *)_loongson_uart_base; + } + else if (UPIO_PORT == iotype) + uart8250_data[mips_machtype].iobase = + loongson_uart_base - LOONGSON_PCIIO_BASE; + + memset(&uart8250_data[mips_machtype + 1], 0, + sizeof(struct plat_serial8250_port)); + uart8250_device.dev.platform_data = &uart8250_data[mips_machtype]; + + return platform_device_register(&uart8250_device); +} +module_init(serial_init); + +static void __exit serial_exit(void) +{ + platform_device_unregister(&uart8250_device); +} +module_exit(serial_exit); diff --git a/arch/mips/loongson64/common/setup.c b/arch/mips/loongson2ef/common/setup.c index bc2da4c140c4..4fd27f4f90ed 100644 --- a/arch/mips/loongson64/common/setup.c +++ b/arch/mips/loongson2ef/common/setup.c @@ -11,11 +11,6 @@ #include <loongson.h> -#ifdef CONFIG_VT -#include <linux/console.h> -#include <linux/screen_info.h> -#endif - static void wbflush_loongson(void) { asm(".set\tpush\n\t" @@ -32,20 +27,4 @@ EXPORT_SYMBOL(__wbflush); void __init plat_mem_setup(void) { -#ifdef CONFIG_VT -#if defined(CONFIG_VGA_CONSOLE) - conswitchp = &vga_con; - - screen_info = (struct screen_info) { - .orig_x = 0, - .orig_y = 25, - .orig_video_cols = 80, - .orig_video_lines = 25, - .orig_video_isVGA = VIDEO_TYPE_VGAC, - .orig_video_points = 16, - }; -#elif defined(CONFIG_DUMMY_CONSOLE) - conswitchp = &dummy_con; -#endif -#endif } diff --git a/arch/mips/loongson64/common/time.c b/arch/mips/loongson2ef/common/time.c index e78760ce475b..585741af42a9 100644 --- a/arch/mips/loongson64/common/time.c +++ b/arch/mips/loongson2ef/common/time.c @@ -18,11 +18,7 @@ void __init plat_time_init(void) /* setup mips r4k timer */ mips_hpt_frequency = cpu_clock_freq / 2; -#ifdef CONFIG_RS780_HPET - setup_hpet_timer(); -#else setup_mfgpt0_timer(); -#endif } void read_persistent_clock64(struct timespec64 *ts) diff --git a/arch/mips/loongson64/common/uart_base.c b/arch/mips/loongson2ef/common/uart_base.c index e88d937f10fe..522bea6ad7b0 100644 --- a/arch/mips/loongson64/common/uart_base.c +++ b/arch/mips/loongson2ef/common/uart_base.c @@ -6,13 +6,14 @@ #include <linux/export.h> #include <asm/bootinfo.h> +#include <asm/setup.h> #include <loongson.h> /* raw */ -unsigned long loongson_uart_base[MAX_UARTS] = {}; +unsigned long loongson_uart_base; /* ioremapped */ -unsigned long _loongson_uart_base[MAX_UARTS] = {}; +unsigned long _loongson_uart_base; EXPORT_SYMBOL(loongson_uart_base); EXPORT_SYMBOL(_loongson_uart_base); @@ -20,16 +21,12 @@ EXPORT_SYMBOL(_loongson_uart_base); void prom_init_loongson_uart_base(void) { switch (mips_machtype) { - case MACH_LOONGSON_GENERIC: - /* The CPU provided serial port (CPU) */ - loongson_uart_base[0] = LOONGSON_REG_BASE + 0x1e0; - break; case MACH_LEMOTE_FL2E: - loongson_uart_base[0] = LOONGSON_PCIIO_BASE + 0x3f8; + loongson_uart_base = LOONGSON_PCIIO_BASE + 0x3f8; break; case MACH_LEMOTE_FL2F: case MACH_LEMOTE_LL2F: - loongson_uart_base[0] = LOONGSON_PCIIO_BASE + 0x2f8; + loongson_uart_base = LOONGSON_PCIIO_BASE + 0x2f8; break; case MACH_LEMOTE_ML2F7: case MACH_LEMOTE_YL2F89: @@ -37,10 +34,10 @@ void prom_init_loongson_uart_base(void) case MACH_LEMOTE_NAS: default: /* The CPU provided serial port (LPC) */ - loongson_uart_base[0] = LOONGSON_LIO1_BASE + 0x3f8; + loongson_uart_base = LOONGSON_LIO1_BASE + 0x3f8; break; } - _loongson_uart_base[0] = - (unsigned long)ioremap_nocache(loongson_uart_base[0], 8); + _loongson_uart_base = TO_UNCAC(loongson_uart_base); + setup_8250_early_printk_port(_loongson_uart_base, 0, 1024); } diff --git a/arch/mips/loongson64/fuloong-2e/Makefile b/arch/mips/loongson2ef/fuloong-2e/Makefile index bb58edb3bea7..bb58edb3bea7 100644 --- a/arch/mips/loongson64/fuloong-2e/Makefile +++ b/arch/mips/loongson2ef/fuloong-2e/Makefile diff --git a/arch/mips/loongson64/fuloong-2e/dma.c b/arch/mips/loongson2ef/fuloong-2e/dma.c index e122292bf666..e122292bf666 100644 --- a/arch/mips/loongson64/fuloong-2e/dma.c +++ b/arch/mips/loongson2ef/fuloong-2e/dma.c diff --git a/arch/mips/loongson64/fuloong-2e/irq.c b/arch/mips/loongson2ef/fuloong-2e/irq.c index 32278e7bf85c..32278e7bf85c 100644 --- a/arch/mips/loongson64/fuloong-2e/irq.c +++ b/arch/mips/loongson2ef/fuloong-2e/irq.c diff --git a/arch/mips/loongson64/fuloong-2e/reset.c b/arch/mips/loongson2ef/fuloong-2e/reset.c index 8273de1cf4bb..8273de1cf4bb 100644 --- a/arch/mips/loongson64/fuloong-2e/reset.c +++ b/arch/mips/loongson2ef/fuloong-2e/reset.c diff --git a/arch/mips/loongson64/lemote-2f/Makefile b/arch/mips/loongson2ef/lemote-2f/Makefile index 881a0ec06d1f..881a0ec06d1f 100644 --- a/arch/mips/loongson64/lemote-2f/Makefile +++ b/arch/mips/loongson2ef/lemote-2f/Makefile diff --git a/arch/mips/loongson64/lemote-2f/clock.c b/arch/mips/loongson2ef/lemote-2f/clock.c index 8281334df9c8..414f282c8ab5 100644 --- a/arch/mips/loongson64/lemote-2f/clock.c +++ b/arch/mips/loongson2ef/lemote-2f/clock.c @@ -15,7 +15,7 @@ #include <linux/spinlock.h> #include <asm/clock.h> -#include <asm/mach-loongson64/loongson.h> +#include <asm/mach-loongson2ef/loongson.h> static LIST_HEAD(clock_list); static DEFINE_SPINLOCK(clock_lock); @@ -118,9 +118,9 @@ int clk_set_rate(struct clk *clk, unsigned long rate) clk->rate = rate; - regval = LOONGSON_CHIPCFG(0); + regval = readl(LOONGSON_CHIPCFG); regval = (regval & ~0x7) | (pos->driver_data - 1); - LOONGSON_CHIPCFG(0) = regval; + writel(regval, LOONGSON_CHIPCFG); return ret; } diff --git a/arch/mips/loongson64/lemote-2f/dma.c b/arch/mips/loongson2ef/lemote-2f/dma.c index abf0e39d7e46..abf0e39d7e46 100644 --- a/arch/mips/loongson64/lemote-2f/dma.c +++ b/arch/mips/loongson2ef/lemote-2f/dma.c diff --git a/arch/mips/loongson64/lemote-2f/ec_kb3310b.c b/arch/mips/loongson2ef/lemote-2f/ec_kb3310b.c index d138220e96a2..d138220e96a2 100644 --- a/arch/mips/loongson64/lemote-2f/ec_kb3310b.c +++ b/arch/mips/loongson2ef/lemote-2f/ec_kb3310b.c diff --git a/arch/mips/loongson64/lemote-2f/ec_kb3310b.h b/arch/mips/loongson2ef/lemote-2f/ec_kb3310b.h index aecdbc9c875a..aecdbc9c875a 100644 --- a/arch/mips/loongson64/lemote-2f/ec_kb3310b.h +++ b/arch/mips/loongson2ef/lemote-2f/ec_kb3310b.h diff --git a/arch/mips/loongson64/lemote-2f/irq.c b/arch/mips/loongson2ef/lemote-2f/irq.c index c58a044c6c07..c58a044c6c07 100644 --- a/arch/mips/loongson64/lemote-2f/irq.c +++ b/arch/mips/loongson2ef/lemote-2f/irq.c diff --git a/arch/mips/loongson64/lemote-2f/machtype.c b/arch/mips/loongson2ef/lemote-2f/machtype.c index 9462a3ab57be..9462a3ab57be 100644 --- a/arch/mips/loongson64/lemote-2f/machtype.c +++ b/arch/mips/loongson2ef/lemote-2f/machtype.c diff --git a/arch/mips/loongson64/lemote-2f/pm.c b/arch/mips/loongson2ef/lemote-2f/pm.c index 3d0027229e3c..3d0027229e3c 100644 --- a/arch/mips/loongson64/lemote-2f/pm.c +++ b/arch/mips/loongson2ef/lemote-2f/pm.c diff --git a/arch/mips/loongson64/lemote-2f/reset.c b/arch/mips/loongson2ef/lemote-2f/reset.c index 0db0934302ea..197dae4ffd23 100644 --- a/arch/mips/loongson64/lemote-2f/reset.c +++ b/arch/mips/loongson2ef/lemote-2f/reset.c @@ -24,7 +24,7 @@ static void reset_cpu(void) * reset cpu to full speed, this is needed when enabling cpu frequency * scalling */ - LOONGSON_CHIPCFG(0) |= 0x7; + writel(readl(LOONGSON_CHIPCFG) | 0x7, LOONGSON_CHIPCFG); } /* reset support for fuloong2f */ diff --git a/arch/mips/loongson32/Kconfig b/arch/mips/loongson32/Kconfig index 6dacc1438906..e27879b4813b 100644 --- a/arch/mips/loongson32/Kconfig +++ b/arch/mips/loongson32/Kconfig @@ -38,7 +38,7 @@ endchoice menuconfig CEVT_CSRC_LS1X bool "Use PWM Timer for clockevent/clocksource" select MIPS_EXTERNAL_TIMER - depends on CPU_LOONGSON1 + depends on CPU_LOONGSON32 help This option changes the default clockevent/clocksource to PWM Timer, and is required by Loongson1 CPUFreq support. diff --git a/arch/mips/loongson32/Platform b/arch/mips/loongson32/Platform index 333215593092..7f8e342f1ef5 100644 --- a/arch/mips/loongson32/Platform +++ b/arch/mips/loongson32/Platform @@ -1,4 +1,4 @@ -cflags-$(CONFIG_CPU_LOONGSON1) += -march=mips32r2 -Wa,--trap +cflags-$(CONFIG_CPU_LOONGSON32) += -march=mips32r2 -Wa,--trap platform-$(CONFIG_MACH_LOONGSON32) += loongson32/ cflags-$(CONFIG_MACH_LOONGSON32) += -I$(srctree)/arch/mips/include/asm/mach-loongson32 -load-$(CONFIG_CPU_LOONGSON1) += 0xffffffff80200000 +load-$(CONFIG_CPU_LOONGSON32) += 0xffffffff80200000 diff --git a/arch/mips/loongson32/common/prom.c b/arch/mips/loongson32/common/prom.c index c4e043ee53ff..73dd25142484 100644 --- a/arch/mips/loongson32/common/prom.c +++ b/arch/mips/loongson32/common/prom.c @@ -5,63 +5,25 @@ * Modified from arch/mips/pnx833x/common/prom.c. */ +#include <linux/io.h> +#include <linux/init.h> #include <linux/serial_reg.h> #include <asm/bootinfo.h> +#include <asm/fw/fw.h> #include <loongson1.h> -#include <prom.h> -int prom_argc; -char **prom_argv, **prom_envp; -unsigned long memsize, highmemsize; - -char *prom_getenv(char *envname) -{ - char **env = prom_envp; - int i; - - i = strlen(envname); - - while (*env) { - if (strncmp(envname, *env, i) == 0 && *(*env + i) == '=') - return *env + i + 1; - env++; - } - - return 0; -} - -static inline unsigned long env_or_default(char *env, unsigned long dfl) -{ - char *str = prom_getenv(env); - return str ? simple_strtol(str, 0, 0) : dfl; -} - -void __init prom_init_cmdline(void) -{ - char *c = &(arcs_cmdline[0]); - int i; - - for (i = 1; i < prom_argc; i++) { - strcpy(c, prom_argv[i]); - c += strlen(prom_argv[i]); - if (i < prom_argc - 1) - *c++ = ' '; - } - *c = 0; -} +unsigned long memsize; void __init prom_init(void) { void __iomem *uart_base; - prom_argc = fw_arg0; - prom_argv = (char **)fw_arg1; - prom_envp = (char **)fw_arg2; - prom_init_cmdline(); + fw_init_cmdline(); - memsize = env_or_default("memsize", DEFAULT_MEMSIZE); - highmemsize = env_or_default("highmemsize", 0x0); + memsize = fw_getenvl("memsize"); + if(!memsize) + memsize = DEFAULT_MEMSIZE; if (strstr(arcs_cmdline, "console=ttyS3")) uart_base = ioremap_nocache(LS1X_UART3_BASE, 0x0f); @@ -77,3 +39,8 @@ void __init prom_init(void) void __init prom_free_prom_memory(void) { } + +void __init plat_mem_setup(void) +{ + add_memory_region(0x0, (memsize << 20), BOOT_MEM_RAM); +} diff --git a/arch/mips/loongson32/common/setup.c b/arch/mips/loongson32/common/setup.c index 8b03e18fc4d8..4733fe037176 100644 --- a/arch/mips/loongson32/common/setup.c +++ b/arch/mips/loongson32/common/setup.c @@ -3,15 +3,12 @@ * Copyright (c) 2011 Zhang, Keguang <keguang.zhang@gmail.com> */ +#include <linux/io.h> +#include <linux/init.h> +#include <linux/smp.h> +#include <asm/cpu-info.h> #include <asm/bootinfo.h> -#include <prom.h> - -void __init plat_mem_setup(void) -{ - add_memory_region(0x0, (memsize << 20), BOOT_MEM_RAM); -} - const char *get_system_type(void) { unsigned int processor_id = (¤t_cpu_data)->processor_id; diff --git a/arch/mips/loongson64/Kconfig b/arch/mips/loongson64/Kconfig index 4c14a11525f4..48b29c198acf 100644 --- a/arch/mips/loongson64/Kconfig +++ b/arch/mips/loongson64/Kconfig @@ -1,119 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 if MACH_LOONGSON64 -choice - prompt "Machine Type" - -config LEMOTE_FULOONG2E - bool "Lemote Fuloong(2e) mini-PC" - select ARCH_SPARSEMEM_ENABLE - select ARCH_MIGHT_HAVE_PC_PARPORT - select ARCH_MIGHT_HAVE_PC_SERIO - select CEVT_R4K - select CSRC_R4K - select SYS_HAS_CPU_LOONGSON2E - select DMA_NONCOHERENT - select BOOT_ELF32 - select BOARD_SCACHE - select HAVE_PCI - select I8259 - select ISA - select IRQ_MIPS_CPU - select SYS_SUPPORTS_64BIT_KERNEL - select SYS_SUPPORTS_LITTLE_ENDIAN - select SYS_SUPPORTS_HIGHMEM - select SYS_HAS_EARLY_PRINTK - select GENERIC_ISA_DMA_SUPPORT_BROKEN - select CPU_HAS_WB - select LOONGSON_MC146818 - help - Lemote Fuloong(2e) mini-PC board based on the Chinese Loongson-2E CPU and - an FPGA northbridge - - Lemote Fuloong(2e) mini PC have a VIA686B south bridge. - -config LEMOTE_MACH2F - bool "Lemote Loongson 2F family machines" - select ARCH_SPARSEMEM_ENABLE - select ARCH_MIGHT_HAVE_PC_PARPORT - select ARCH_MIGHT_HAVE_PC_SERIO - select BOARD_SCACHE - select BOOT_ELF32 - select CEVT_R4K if ! MIPS_EXTERNAL_TIMER - select CPU_HAS_WB - select CS5536 - select CSRC_R4K if ! MIPS_EXTERNAL_TIMER - select DMA_NONCOHERENT - select GENERIC_ISA_DMA_SUPPORT_BROKEN - select HAVE_CLK - select HAVE_PCI - select I8259 - select IRQ_MIPS_CPU - select ISA - select SYS_HAS_CPU_LOONGSON2F - select SYS_HAS_EARLY_PRINTK - select SYS_SUPPORTS_64BIT_KERNEL - select SYS_SUPPORTS_HIGHMEM - select SYS_SUPPORTS_LITTLE_ENDIAN - select LOONGSON_MC146818 - help - Lemote Loongson 2F family machines utilize the 2F revision of - Loongson processor and the AMD CS5536 south bridge. - - These family machines include fuloong2f mini PC, yeeloong2f notebook, - LingLoong allinone PC and so forth. - -config LOONGSON_MACH3X - bool "Generic Loongson 3 family machines" - select ARCH_SPARSEMEM_ENABLE - select ARCH_MIGHT_HAVE_PC_PARPORT - select ARCH_MIGHT_HAVE_PC_SERIO - select GENERIC_ISA_DMA_SUPPORT_BROKEN - select BOOT_ELF32 - select BOARD_SCACHE - select CSRC_R4K - select CEVT_R4K - select CPU_HAS_WB - select FORCE_PCI - select ISA - select I8259 - select IRQ_MIPS_CPU - select NR_CPUS_DEFAULT_4 - select SYS_HAS_CPU_LOONGSON3 - select SYS_HAS_EARLY_PRINTK - select SYS_SUPPORTS_SMP - select SYS_SUPPORTS_HOTPLUG_CPU - select SYS_SUPPORTS_NUMA - select SYS_SUPPORTS_64BIT_KERNEL - select SYS_SUPPORTS_HIGHMEM - select SYS_SUPPORTS_LITTLE_ENDIAN - select LOONGSON_MC146818 - select ZONE_DMA32 - select LEFI_FIRMWARE_INTERFACE - help - Generic Loongson 3 family machines utilize the 3A/3B revision - of Loongson processor and RS780/SBX00 chipset. -endchoice - -config CS5536 - bool - -config CS5536_MFGPT - bool "CS5536 MFGPT Timer" - depends on CS5536 && !HIGH_RES_TIMERS - select MIPS_EXTERNAL_TIMER - help - This option enables the mfgpt0 timer of AMD CS5536. With this timer - switched on you can not use high resolution timers. - - If you want to enable the Loongson2 CPUFreq Driver, Please enable - this option at first, otherwise, You will get wrong system time. - - If unsure, say Yes. - config RS780_HPET bool "RS780/SBX00 HPET Timer" - depends on LOONGSON_MACH3X + depends on MACH_LOONGSON64 select MIPS_EXTERNAL_TIMER help This option enables the hpet timer of AMD RS780/SBX00. @@ -123,16 +13,9 @@ config RS780_HPET If unsure, say Yes. -config LOONGSON_UART_BASE - bool - default y - depends on EARLY_PRINTK || SERIAL_8250 config LOONGSON_MC146818 bool default n -config LEFI_FIRMWARE_INTERFACE - bool - endif # MACH_LOONGSON64 diff --git a/arch/mips/loongson64/Makefile b/arch/mips/loongson64/Makefile index 1a5df773707d..7821891bc5d0 100644 --- a/arch/mips/loongson64/Makefile +++ b/arch/mips/loongson64/Makefile @@ -1,24 +1,13 @@ # SPDX-License-Identifier: GPL-2.0-only # -# Common code for all Loongson based systems +# Makefile for Loongson-3 family machines # +obj-$(CONFIG_MACH_LOONGSON64) += irq.o cop2-ex.o platform.o acpi_init.o dma.o \ + setup.o init.o env.o time.o reset.o \ -obj-$(CONFIG_MACH_LOONGSON64) += common/ - -# -# Lemote Fuloong mini-PC (Loongson 2E-based) -# - -obj-$(CONFIG_LEMOTE_FULOONG2E) += fuloong-2e/ - -# -# Lemote loongson2f family machines -# - -obj-$(CONFIG_LEMOTE_MACH2F) += lemote-2f/ - -# -# All Loongson-3 family machines -# - -obj-$(CONFIG_CPU_LOONGSON3) += loongson-3/ +obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_NUMA) += numa.o +obj-$(CONFIG_RS780_HPET) += hpet.o +obj-$(CONFIG_PCI) += pci.o +obj-$(CONFIG_LOONGSON_MC146818) += rtc.o +obj-$(CONFIG_SUSPEND) += pm.o diff --git a/arch/mips/loongson64/Platform b/arch/mips/loongson64/Platform index c1a4d4dc4665..d5eb94c9edb4 100644 --- a/arch/mips/loongson64/Platform +++ b/arch/mips/loongson64/Platform @@ -2,32 +2,13 @@ # Loongson Processors' Support # -# Only gcc >= 4.4 have Loongson specific support -cflags-$(CONFIG_CPU_LOONGSON2) += -Wa,--trap -cflags-$(CONFIG_CPU_LOONGSON2E) += \ - $(call cc-option,-march=loongson2e,-march=r4600) -cflags-$(CONFIG_CPU_LOONGSON2F) += \ - $(call cc-option,-march=loongson2f,-march=r4600) -# Enable the workarounds for Loongson2f -ifdef CONFIG_CPU_LOONGSON2F_WORKAROUNDS - ifeq ($(call as-option,-Wa$(comma)-mfix-loongson2f-nop,),) - $(error only binutils >= 2.20.2 have needed option -mfix-loongson2f-nop) - else - cflags-$(CONFIG_CPU_NOP_WORKAROUNDS) += -Wa$(comma)-mfix-loongson2f-nop - endif - ifeq ($(call as-option,-Wa$(comma)-mfix-loongson2f-jump,),) - $(error only binutils >= 2.20.2 have needed option -mfix-loongson2f-jump) - else - cflags-$(CONFIG_CPU_JUMP_WORKAROUNDS) += -Wa$(comma)-mfix-loongson2f-jump - endif -endif -cflags-$(CONFIG_CPU_LOONGSON3) += -Wa,--trap +cflags-$(CONFIG_CPU_LOONGSON64) += -Wa,--trap # # Some versions of binutils, not currently mainline as of 2019/02/04, support # an -mfix-loongson3-llsc flag which emits a sync prior to each ll instruction -# to work around a CPU bug (see loongson_llsc_mb() in asm/barrier.h for a +# to work around a CPU bug (see __SYNC_loongson3_war in asm/sync.h for a # description). # # We disable this in order to prevent the assembler meddling with the @@ -44,7 +25,7 @@ cflags-$(CONFIG_CPU_LOONGSON3) += -Wa,--trap # binutils does not merge support for the flag then we can revisit & remove # this later - for now it ensures vendor toolchains don't cause problems. # -cflags-$(CONFIG_CPU_LOONGSON3) += $(call as-option,-Wa$(comma)-mno-fix-loongson3-llsc,) +cflags-$(CONFIG_CPU_LOONGSON64) += $(call as-option,-Wa$(comma)-mno-fix-loongson3-llsc,) # # binutils from v2.25 on and gcc starting from v4.9.0 treat -march=loongson3a @@ -55,23 +36,25 @@ cflags-$(CONFIG_CPU_LOONGSON3) += $(call as-option,-Wa$(comma)-mno-fix-loongson3 # ifeq ($(call cc-ifversion, -ge, 0409, y), y) ifeq ($(call ld-ifversion, -ge, 225000000, y), y) - cflags-$(CONFIG_CPU_LOONGSON3) += \ + cflags-$(CONFIG_CPU_LOONGSON64) += \ $(call cc-option,-march=loongson3a -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64) else - cflags-$(CONFIG_CPU_LOONGSON3) += \ + cflags-$(CONFIG_CPU_LOONGSON64) += \ $(call cc-option,-march=mips64r2,-mips64r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64) endif else - cflags-$(CONFIG_CPU_LOONGSON3) += \ + cflags-$(CONFIG_CPU_LOONGSON64) += \ $(call cc-option,-march=mips64r2,-mips64r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64) endif +# Some -march= flags enable MMI instructions, and GCC complains about that +# support being enabled alongside -msoft-float. Thus explicitly disable MMI. +cflags-y += $(call cc-option,-mno-loongson-mmi) + # # Loongson Machines' Support # platform-$(CONFIG_MACH_LOONGSON64) += loongson64/ cflags-$(CONFIG_MACH_LOONGSON64) += -I$(srctree)/arch/mips/include/asm/mach-loongson64 -mno-branch-likely -load-$(CONFIG_LEMOTE_FULOONG2E) += 0xffffffff80100000 -load-$(CONFIG_LEMOTE_MACH2F) += 0xffffffff80200000 -load-$(CONFIG_LOONGSON_MACH3X) += 0xffffffff80200000 +load-$(CONFIG_CPU_LOONGSON64) += 0xffffffff80200000 diff --git a/arch/mips/loongson64/loongson-3/acpi_init.c b/arch/mips/loongson64/acpi_init.c index 8d7c119ddf91..8d7c119ddf91 100644 --- a/arch/mips/loongson64/loongson-3/acpi_init.c +++ b/arch/mips/loongson64/acpi_init.c diff --git a/arch/mips/loongson64/common/cmdline.c b/arch/mips/loongson64/common/cmdline.c deleted file mode 100644 index a735460682cf..000000000000 --- a/arch/mips/loongson64/common/cmdline.c +++ /dev/null @@ -1,44 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Based on Ocelot Linux port, which is - * Copyright 2001 MontaVista Software Inc. - * Author: jsun@mvista.com or jsun@junsun.net - * - * Copyright 2003 ICT CAS - * Author: Michael Guo <guoyi@ict.ac.cn> - * - * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology - * Author: Fuxin Zhang, zhangfx@lemote.com - * - * Copyright (C) 2009 Lemote Inc. - * Author: Wu Zhangjin, wuzhangjin@gmail.com - */ -#include <asm/bootinfo.h> - -#include <loongson.h> - -void __init prom_init_cmdline(void) -{ - int prom_argc; - /* pmon passes arguments in 32bit pointers */ - int *_prom_argv; - int i; - long l; - - /* firmware arguments are initialized in head.S */ - prom_argc = fw_arg0; - _prom_argv = (int *)fw_arg1; - - /* arg[0] is "g", the rest is boot parameters */ - arcs_cmdline[0] = '\0'; - for (i = 1; i < prom_argc; i++) { - l = (long)_prom_argv[i]; - if (strlen(arcs_cmdline) + strlen(((char *)l) + 1) - >= sizeof(arcs_cmdline)) - break; - strcat(arcs_cmdline, ((char *)l)); - strcat(arcs_cmdline, " "); - } - - prom_init_machtype(); -} diff --git a/arch/mips/loongson64/common/early_printk.c b/arch/mips/loongson64/common/early_printk.c deleted file mode 100644 index 5e2a151aa30c..000000000000 --- a/arch/mips/loongson64/common/early_printk.c +++ /dev/null @@ -1,38 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* early printk support - * - * Copyright (c) 2009 Philippe Vachon <philippe@cowpig.ca> - * Copyright (c) 2009 Lemote Inc. - * Author: Wu Zhangjin, wuzhangjin@gmail.com - */ -#include <linux/serial_reg.h> -#include <asm/setup.h> - -#include <loongson.h> - -#define PORT(base, offset) (u8 *)(base + offset) - -static inline unsigned int serial_in(unsigned char *base, int offset) -{ - return readb(PORT(base, offset)); -} - -static inline void serial_out(unsigned char *base, int offset, int value) -{ - writeb(value, PORT(base, offset)); -} - -void prom_putchar(char c) -{ - int timeout; - unsigned char *uart_base; - - uart_base = (unsigned char *)_loongson_uart_base[0]; - timeout = 1024; - - while (((serial_in(uart_base, UART_LSR) & UART_LSR_THRE) == 0) && - (timeout-- > 0)) - ; - - serial_out(uart_base, UART_TX, c); -} diff --git a/arch/mips/loongson64/common/mem.c b/arch/mips/loongson64/common/mem.c deleted file mode 100644 index 4abb92e0fc39..000000000000 --- a/arch/mips/loongson64/common/mem.c +++ /dev/null @@ -1,158 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - */ -#include <linux/fs.h> -#include <linux/fcntl.h> -#include <linux/mm.h> - -#include <asm/bootinfo.h> - -#include <loongson.h> -#include <boot_param.h> -#include <mem.h> -#include <pci.h> - -#ifndef CONFIG_LEFI_FIRMWARE_INTERFACE - -u32 memsize, highmemsize; - -void __init prom_init_memory(void) -{ - add_memory_region(0x0, (memsize << 20), BOOT_MEM_RAM); - - add_memory_region(memsize << 20, LOONGSON_PCI_MEM_START - (memsize << - 20), BOOT_MEM_RESERVED); - -#ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG - { - int bit; - - bit = fls(memsize + highmemsize); - if (bit != ffs(memsize + highmemsize)) - bit += 20; - else - bit = bit + 20 - 1; - - /* set cpu window3 to map CPU to DDR: 2G -> 2G */ - LOONGSON_ADDRWIN_CPUTODDR(ADDRWIN_WIN3, 0x80000000ul, - 0x80000000ul, (1 << bit)); - mmiowb(); - } -#endif /* !CONFIG_CPU_SUPPORTS_ADDRWINCFG */ - -#ifdef CONFIG_64BIT - if (highmemsize > 0) - add_memory_region(LOONGSON_HIGHMEM_START, - highmemsize << 20, BOOT_MEM_RAM); - - add_memory_region(LOONGSON_PCI_MEM_END + 1, LOONGSON_HIGHMEM_START - - LOONGSON_PCI_MEM_END - 1, BOOT_MEM_RESERVED); - -#endif /* !CONFIG_64BIT */ -} - -#else /* CONFIG_LEFI_FIRMWARE_INTERFACE */ - -void __init prom_init_memory(void) -{ - int i; - u32 node_id; - u32 mem_type; - - /* parse memory information */ - for (i = 0; i < loongson_memmap->nr_map; i++) { - node_id = loongson_memmap->map[i].node_id; - mem_type = loongson_memmap->map[i].mem_type; - - if (node_id == 0) { - switch (mem_type) { - case SYSTEM_RAM_LOW: - add_memory_region(loongson_memmap->map[i].mem_start, - (u64)loongson_memmap->map[i].mem_size << 20, - BOOT_MEM_RAM); - break; - case SYSTEM_RAM_HIGH: - add_memory_region(loongson_memmap->map[i].mem_start, - (u64)loongson_memmap->map[i].mem_size << 20, - BOOT_MEM_RAM); - break; - case SYSTEM_RAM_RESERVED: - add_memory_region(loongson_memmap->map[i].mem_start, - (u64)loongson_memmap->map[i].mem_size << 20, - BOOT_MEM_RESERVED); - break; - } - } - } -} - -#endif /* CONFIG_LEFI_FIRMWARE_INTERFACE */ - -/* override of arch/mips/mm/cache.c: __uncached_access */ -int __uncached_access(struct file *file, unsigned long addr) -{ - if (file->f_flags & O_DSYNC) - return 1; - - return addr >= __pa(high_memory) || - ((addr >= LOONGSON_MMIO_MEM_START) && - (addr < LOONGSON_MMIO_MEM_END)); -} - -#ifdef CONFIG_CPU_SUPPORTS_UNCACHED_ACCELERATED - -#include <linux/pci.h> -#include <linux/sched.h> -#include <asm/current.h> - -static unsigned long uca_start, uca_end; - -pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, - unsigned long size, pgprot_t vma_prot) -{ - unsigned long offset = pfn << PAGE_SHIFT; - unsigned long end = offset + size; - - if (__uncached_access(file, offset)) { - if (uca_start && (offset >= uca_start) && - (end <= uca_end)) - return __pgprot((pgprot_val(vma_prot) & - ~_CACHE_MASK) | - _CACHE_UNCACHED_ACCELERATED); - else - return pgprot_noncached(vma_prot); - } - return vma_prot; -} - -static int __init find_vga_mem_init(void) -{ - struct pci_dev *dev = 0; - struct resource *r; - int idx; - - if (uca_start) - return 0; - - for_each_pci_dev(dev) { - if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) { - for (idx = 0; idx < PCI_NUM_RESOURCES; idx++) { - r = &dev->resource[idx]; - if (!r->start && r->end) - continue; - if (r->flags & IORESOURCE_IO) - continue; - if (r->flags & IORESOURCE_MEM) { - uca_start = r->start; - uca_end = r->end; - return 0; - } - } - } - } - - return 0; -} - -late_initcall(find_vga_mem_init); -#endif /* !CONFIG_CPU_SUPPORTS_UNCACHED_ACCELERATED */ diff --git a/arch/mips/loongson64/common/serial.c b/arch/mips/loongson64/common/serial.c deleted file mode 100644 index ffefc1cb2612..000000000000 --- a/arch/mips/loongson64/common/serial.c +++ /dev/null @@ -1,117 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2007 Ralf Baechle (ralf@linux-mips.org) - * - * Copyright (C) 2009 Lemote, Inc. - * Author: Yan hua (yanhua@lemote.com) - * Author: Wu Zhangjin (wuzhangjin@gmail.com) - */ - -#include <linux/io.h> -#include <linux/module.h> -#include <linux/serial_8250.h> - -#include <asm/bootinfo.h> - -#include <loongson.h> -#include <machine.h> - -#define PORT(int, clk) \ -{ \ - .irq = int, \ - .uartclk = clk, \ - .iotype = UPIO_PORT, \ - .flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST, \ - .regshift = 0, \ -} - -#define PORT_M(int, clk) \ -{ \ - .irq = MIPS_CPU_IRQ_BASE + (int), \ - .uartclk = clk, \ - .iotype = UPIO_MEM, \ - .membase = (void __iomem *)NULL, \ - .flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST, \ - .regshift = 0, \ -} - -static struct plat_serial8250_port uart8250_data[][MAX_UARTS + 1] = { - [MACH_LOONGSON_UNKNOWN] = {}, - [MACH_LEMOTE_FL2E] = {PORT(4, 1843200), {} }, - [MACH_LEMOTE_FL2F] = {PORT(3, 1843200), {} }, - [MACH_LEMOTE_ML2F7] = {PORT_M(3, 3686400), {} }, - [MACH_LEMOTE_YL2F89] = {PORT_M(3, 3686400), {} }, - [MACH_DEXXON_GDIUM2F10] = {PORT_M(3, 3686400), {} }, - [MACH_LEMOTE_NAS] = {PORT_M(3, 3686400), {} }, - [MACH_LEMOTE_LL2F] = {PORT(3, 1843200), {} }, - [MACH_LOONGSON_GENERIC] = {PORT_M(2, 25000000), {} }, - [MACH_LOONGSON_END] = {}, -}; - -static struct platform_device uart8250_device = { - .name = "serial8250", - .id = PLAT8250_DEV_PLATFORM, -}; - -static int __init serial_init(void) -{ - int i; - unsigned char iotype; - - iotype = uart8250_data[mips_machtype][0].iotype; - - if (UPIO_MEM == iotype) { - uart8250_data[mips_machtype][0].mapbase = - loongson_uart_base[0]; - uart8250_data[mips_machtype][0].membase = - (void __iomem *)_loongson_uart_base[0]; - } - else if (UPIO_PORT == iotype) - uart8250_data[mips_machtype][0].iobase = - loongson_uart_base[0] - LOONGSON_PCIIO_BASE; - - if (loongson_sysconf.uarts[0].uartclk) - uart8250_data[mips_machtype][0].uartclk = - loongson_sysconf.uarts[0].uartclk; - - for (i = 1; i < loongson_sysconf.nr_uarts; i++) { - iotype = loongson_sysconf.uarts[i].iotype; - uart8250_data[mips_machtype][i].iotype = iotype; - loongson_uart_base[i] = loongson_sysconf.uarts[i].uart_base; - - if (UPIO_MEM == iotype) { - uart8250_data[mips_machtype][i].irq = - MIPS_CPU_IRQ_BASE + loongson_sysconf.uarts[i].int_offset; - uart8250_data[mips_machtype][i].mapbase = - loongson_uart_base[i]; - uart8250_data[mips_machtype][i].membase = - ioremap_nocache(loongson_uart_base[i], 8); - } else if (UPIO_PORT == iotype) { - uart8250_data[mips_machtype][i].irq = - loongson_sysconf.uarts[i].int_offset; - uart8250_data[mips_machtype][i].iobase = - loongson_uart_base[i] - LOONGSON_PCIIO_BASE; - } - - uart8250_data[mips_machtype][i].uartclk = - loongson_sysconf.uarts[i].uartclk; - uart8250_data[mips_machtype][i].flags = - UPF_BOOT_AUTOCONF | UPF_SKIP_TEST; - } - - memset(&uart8250_data[mips_machtype][loongson_sysconf.nr_uarts], - 0, sizeof(struct plat_serial8250_port)); - uart8250_device.dev.platform_data = uart8250_data[mips_machtype]; - - return platform_device_register(&uart8250_device); -} -module_init(serial_init); - -static void __init serial_exit(void) -{ - platform_device_unregister(&uart8250_device); -} -module_exit(serial_exit); diff --git a/arch/mips/loongson64/loongson-3/cop2-ex.c b/arch/mips/loongson64/cop2-ex.c index 9efdfe430ff0..9efdfe430ff0 100644 --- a/arch/mips/loongson64/loongson-3/cop2-ex.c +++ b/arch/mips/loongson64/cop2-ex.c diff --git a/arch/mips/loongson64/loongson-3/dma.c b/arch/mips/loongson64/dma.c index 5e86635f71db..5e86635f71db 100644 --- a/arch/mips/loongson64/loongson-3/dma.c +++ b/arch/mips/loongson64/dma.c diff --git a/arch/mips/loongson64/common/env.c b/arch/mips/loongson64/env.c index 09d5cf4676ca..0daeb7bcf023 100644 --- a/arch/mips/loongson64/common/env.c +++ b/arch/mips/loongson64/env.c @@ -30,41 +30,13 @@ u64 loongson_freqctrl[MAX_PACKAGES]; unsigned long long smp_group[4]; -#define parse_even_earlier(res, option, p) \ -do { \ - unsigned int tmp __maybe_unused; \ - \ - if (strncmp(option, (char *)p, strlen(option)) == 0) \ - tmp = kstrtou32((char *)p + strlen(option"="), 10, &res); \ -} while (0) +const char *get_system_type(void) +{ + return "Generic Loongson64 System"; +} void __init prom_init_env(void) { - /* pmon passes arguments in 32bit pointers */ - unsigned int processor_id; - -#ifndef CONFIG_LEFI_FIRMWARE_INTERFACE - int *_prom_envp; - long l; - - /* firmware arguments are initialized in head.S */ - _prom_envp = (int *)fw_arg2; - - l = (long)*_prom_envp; - while (l != 0) { - parse_even_earlier(cpu_clock_freq, "cpuclock", l); - parse_even_earlier(memsize, "memsize", l); - parse_even_earlier(highmemsize, "highmemsize", l); - _prom_envp++; - l = (long)*_prom_envp; - } - if (memsize == 0) - memsize = 256; - - loongson_sysconf.nr_uarts = 1; - - pr_info("memsize=%u, highmemsize=%u\n", memsize, highmemsize); -#else struct boot_params *boot_p; struct loongson_params *loongson_p; struct system_loongson *esys; @@ -182,31 +154,5 @@ void __init prom_init_env(void) if (loongson_sysconf.nr_sensors) memcpy(loongson_sysconf.sensors, esys->sensors, sizeof(struct sensor_device) * loongson_sysconf.nr_sensors); -#endif - if (cpu_clock_freq == 0) { - processor_id = (¤t_cpu_data)->processor_id; - switch (processor_id & PRID_REV_MASK) { - case PRID_REV_LOONGSON2E: - cpu_clock_freq = 533080000; - break; - case PRID_REV_LOONGSON2F: - cpu_clock_freq = 797000000; - break; - case PRID_REV_LOONGSON3A_R1: - case PRID_REV_LOONGSON3A_R2_0: - case PRID_REV_LOONGSON3A_R2_1: - case PRID_REV_LOONGSON3A_R3_0: - case PRID_REV_LOONGSON3A_R3_1: - cpu_clock_freq = 900000000; - break; - case PRID_REV_LOONGSON3B_R1: - case PRID_REV_LOONGSON3B_R2: - cpu_clock_freq = 1000000000; - break; - default: - cpu_clock_freq = 100000000; - break; - } - } pr_info("CpuClock = %u\n", cpu_clock_freq); } diff --git a/arch/mips/loongson64/loongson-3/hpet.c b/arch/mips/loongson64/hpet.c index ed15430ad64f..ed15430ad64f 100644 --- a/arch/mips/loongson64/loongson-3/hpet.c +++ b/arch/mips/loongson64/hpet.c diff --git a/arch/mips/loongson64/init.c b/arch/mips/loongson64/init.c new file mode 100644 index 000000000000..5ac1a0f35ca4 --- /dev/null +++ b/arch/mips/loongson64/init.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2009 Lemote Inc. + * Author: Wu Zhangjin, wuzhangjin@gmail.com + */ + +#include <linux/memblock.h> +#include <asm/bootinfo.h> +#include <asm/traps.h> +#include <asm/smp-ops.h> +#include <asm/cacheflush.h> +#include <asm/fw/fw.h> + +#include <loongson.h> + +static void __init mips_nmi_setup(void) +{ + void *base; + extern char except_vec_nmi; + + base = (void *)(CAC_BASE + 0x380); + memcpy(base, &except_vec_nmi, 0x80); + flush_icache_range((unsigned long)base, (unsigned long)base + 0x80); +} + +void __init prom_init(void) +{ + fw_init_cmdline(); + prom_init_env(); + + /* init base address of io space */ + set_io_port_base((unsigned long) + ioremap(LOONGSON_PCIIO_BASE, LOONGSON_PCIIO_SIZE)); + + prom_init_numa_memory(); + + /* Hardcode to CPU UART 0 */ + setup_8250_early_printk_port(TO_UNCAC(LOONGSON_REG_BASE + 0x1e0), 0, 1024); + + register_smp_ops(&loongson3_smp_ops); + board_nmi_handler_setup = mips_nmi_setup; +} + +void __init prom_free_prom_memory(void) +{ +} diff --git a/arch/mips/loongson64/loongson-3/irq.c b/arch/mips/loongson64/irq.c index 5605061f5f98..79ad797497e4 100644 --- a/arch/mips/loongson64/loongson-3/irq.c +++ b/arch/mips/loongson64/irq.c @@ -78,8 +78,12 @@ static void ht_irqdispatch(void) #define UNUSED_IPS (CAUSEF_IP5 | CAUSEF_IP4 | CAUSEF_IP1 | CAUSEF_IP0) -void mach_irq_dispatch(unsigned int pending) +asmlinkage void plat_irq_dispatch(void) { + unsigned int pending; + + pending = read_c0_cause() & read_c0_status() & ST0_IM; + if (pending & CAUSEF_IP7) do_IRQ(LOONGSON_TIMER_IRQ); #if defined(CONFIG_SMP) @@ -127,7 +131,7 @@ void irq_router_init(void) LOONGSON_INT_ROUTER_INTEN | (0xffff << 16) | 0x1 << 10; } -void __init mach_init_irq(void) +void __init arch_init_irq(void) { struct irq_chip *chip; diff --git a/arch/mips/loongson64/loongson-3/Makefile b/arch/mips/loongson64/loongson-3/Makefile deleted file mode 100644 index df39598742b2..000000000000 --- a/arch/mips/loongson64/loongson-3/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# Makefile for Loongson-3 family machines -# -obj-y += irq.o cop2-ex.o platform.o acpi_init.o dma.o - -obj-$(CONFIG_SMP) += smp.o - -obj-$(CONFIG_NUMA) += numa.o - -obj-$(CONFIG_RS780_HPET) += hpet.o diff --git a/arch/mips/loongson64/loongson-3/numa.c b/arch/mips/loongson64/numa.c index 414e97de5dc0..ef94a2278561 100644 --- a/arch/mips/loongson64/loongson-3/numa.c +++ b/arch/mips/loongson64/numa.c @@ -26,12 +26,15 @@ #include <asm/wbflush.h> #include <boot_param.h> -static struct node_data prealloc__node_data[MAX_NUMNODES]; +static struct pglist_data prealloc__node_data[MAX_NUMNODES]; unsigned char __node_distances[MAX_NUMNODES][MAX_NUMNODES]; EXPORT_SYMBOL(__node_distances); -struct node_data *__node_data[MAX_NUMNODES]; +struct pglist_data *__node_data[MAX_NUMNODES]; EXPORT_SYMBOL(__node_data); +cpumask_t __node_cpumask[MAX_NUMNODES]; +EXPORT_SYMBOL(__node_cpumask); + static void enable_lpa(void) { unsigned long value; @@ -142,8 +145,6 @@ static void __init szmem(unsigned int node) (u32)node_id, mem_type, mem_start, mem_size); pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n", start_pfn, end_pfn, num_physpages); - add_memory_region((node_id << 44) + mem_start, - (u64)mem_size << 20, BOOT_MEM_RAM); memblock_add_node(PFN_PHYS(start_pfn), PFN_PHYS(end_pfn - start_pfn), node); break; @@ -156,16 +157,12 @@ static void __init szmem(unsigned int node) (u32)node_id, mem_type, mem_start, mem_size); pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n", start_pfn, end_pfn, num_physpages); - add_memory_region((node_id << 44) + mem_start, - (u64)mem_size << 20, BOOT_MEM_RAM); memblock_add_node(PFN_PHYS(start_pfn), PFN_PHYS(end_pfn - start_pfn), node); break; case SYSTEM_RAM_RESERVED: pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx MB\n", (u32)node_id, mem_type, mem_start, mem_size); - add_memory_region((node_id << 44) + mem_start, - (u64)mem_size << 20, BOOT_MEM_RESERVED); memblock_reserve(((node_id << 44) + mem_start), mem_size << 20); break; @@ -191,8 +188,6 @@ static void __init node_mem_init(unsigned int node) NODE_DATA(node)->node_start_pfn = start_pfn; NODE_DATA(node)->node_spanned_pages = end_pfn - start_pfn; - free_bootmem_with_active_regions(node, end_pfn); - if (node == 0) { /* kernel end address */ unsigned long kernel_end_pfn = PFN_UP(__pa_symbol(&_end)); @@ -209,8 +204,6 @@ static void __init node_mem_init(unsigned int node) memblock_reserve((node_addrspace_offset | 0xfe000000), 32 << 20); } - - sparse_memory_present_with_active_regions(node); } static __init void prom_meminit(void) @@ -224,9 +217,10 @@ static __init void prom_meminit(void) if (node_online(node)) { szmem(node); node_mem_init(node); - cpumask_clear(&__node_data[(node)]->cpumask); + cpumask_clear(&__node_cpumask[node]); } } + memblocks_present(); max_low_pfn = PHYS_PFN(memblock_end_of_DRAM()); for (cpu = 0; cpu < loongson_sysconf.nr_cpus; cpu++) { @@ -237,7 +231,7 @@ static __init void prom_meminit(void) if (loongson_sysconf.reserved_cpus_mask & (1<<cpu)) continue; - cpumask_set_cpu(active_cpu, &__node_data[(node)]->cpumask); + cpumask_set_cpu(active_cpu, &__node_cpumask[node]); pr_info("NUMA: set cpumask cpu %d on node %d\n", active_cpu, node); active_cpu++; diff --git a/arch/mips/loongson64/pci.c b/arch/mips/loongson64/pci.c new file mode 100644 index 000000000000..e84ae20c3290 --- /dev/null +++ b/arch/mips/loongson64/pci.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology + * Author: Fuxin Zhang, zhangfx@lemote.com + */ +#include <linux/pci.h> + +#include <pci.h> +#include <loongson.h> +#include <boot_param.h> + +static struct resource loongson_pci_mem_resource = { + .name = "pci memory space", + .start = LOONGSON_PCI_MEM_START, + .end = LOONGSON_PCI_MEM_END, + .flags = IORESOURCE_MEM, +}; + +static struct resource loongson_pci_io_resource = { + .name = "pci io space", + .start = LOONGSON_PCI_IO_START, + .end = IO_SPACE_LIMIT, + .flags = IORESOURCE_IO, +}; + +static struct pci_controller loongson_pci_controller = { + .pci_ops = &loongson_pci_ops, + .io_resource = &loongson_pci_io_resource, + .mem_resource = &loongson_pci_mem_resource, + .mem_offset = 0x00000000UL, + .io_offset = 0x00000000UL, +}; + + +extern int sbx00_acpi_init(void); + +static int __init pcibios_init(void) +{ + + loongson_pci_controller.io_map_base = mips_io_port_base; + loongson_pci_mem_resource.start = loongson_sysconf.pci_mem_start_addr; + loongson_pci_mem_resource.end = loongson_sysconf.pci_mem_end_addr; + + register_pci_controller(&loongson_pci_controller); + + sbx00_acpi_init(); + + return 0; +} + +arch_initcall(pcibios_init); diff --git a/arch/mips/loongson64/loongson-3/platform.c b/arch/mips/loongson64/platform.c index 13f3404f0030..13f3404f0030 100644 --- a/arch/mips/loongson64/loongson-3/platform.c +++ b/arch/mips/loongson64/platform.c diff --git a/arch/mips/loongson64/pm.c b/arch/mips/loongson64/pm.c new file mode 100644 index 000000000000..7c8556f09781 --- /dev/null +++ b/arch/mips/loongson64/pm.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * loongson-specific suspend support + * + * Copyright (C) 2009 Lemote Inc. + * Author: Wu Zhangjin <wuzhangjin@gmail.com> + */ +#include <linux/suspend.h> +#include <linux/interrupt.h> +#include <linux/pm.h> + +#include <asm/i8259.h> +#include <asm/mipsregs.h> + +#include <loongson.h> + +static unsigned int __maybe_unused cached_master_mask; /* i8259A */ +static unsigned int __maybe_unused cached_slave_mask; +static unsigned int __maybe_unused cached_bonito_irq_mask; /* bonito */ + +void arch_suspend_disable_irqs(void) +{ + /* disable all mips events */ + local_irq_disable(); + +#ifdef CONFIG_I8259 + /* disable all events of i8259A */ + cached_slave_mask = inb(PIC_SLAVE_IMR); + cached_master_mask = inb(PIC_MASTER_IMR); + + outb(0xff, PIC_SLAVE_IMR); + inb(PIC_SLAVE_IMR); + outb(0xff, PIC_MASTER_IMR); + inb(PIC_MASTER_IMR); +#endif + /* disable all events of bonito */ + cached_bonito_irq_mask = LOONGSON_INTEN; + LOONGSON_INTENCLR = 0xffff; + (void)LOONGSON_INTENCLR; +} + +void arch_suspend_enable_irqs(void) +{ + /* enable all mips events */ + local_irq_enable(); +#ifdef CONFIG_I8259 + /* only enable the cached events of i8259A */ + outb(cached_slave_mask, PIC_SLAVE_IMR); + outb(cached_master_mask, PIC_MASTER_IMR); +#endif + /* enable all cached events of bonito */ + LOONGSON_INTENSET = cached_bonito_irq_mask; + (void)LOONGSON_INTENSET; +} + +/* + * Setup the board-specific events for waking up loongson from wait mode + */ +void __weak setup_wakeup_events(void) +{ +} + +void __weak mach_suspend(void) +{ +} + +void __weak mach_resume(void) +{ +} + +static int loongson_pm_enter(suspend_state_t state) +{ + mach_suspend(); + + mach_resume(); + + return 0; +} + +static int loongson_pm_valid_state(suspend_state_t state) +{ + switch (state) { + case PM_SUSPEND_ON: + case PM_SUSPEND_STANDBY: + case PM_SUSPEND_MEM: + return 1; + + default: + return 0; + } +} + +static const struct platform_suspend_ops loongson_pm_ops = { + .valid = loongson_pm_valid_state, + .enter = loongson_pm_enter, +}; + +static int __init loongson_pm_init(void) +{ + suspend_set_ops(&loongson_pm_ops); + + return 0; +} +arch_initcall(loongson_pm_init); diff --git a/arch/mips/loongson64/reset.c b/arch/mips/loongson64/reset.c new file mode 100644 index 000000000000..88b3bd5fed25 --- /dev/null +++ b/arch/mips/loongson64/reset.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * + * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology + * Author: Fuxin Zhang, zhangfx@lemote.com + * Copyright (C) 2009 Lemote, Inc. + * Author: Zhangjin Wu, wuzhangjin@gmail.com + */ +#include <linux/init.h> +#include <linux/pm.h> + +#include <asm/idle.h> +#include <asm/reboot.h> + +#include <loongson.h> +#include <boot_param.h> + +static inline void loongson_reboot(void) +{ + ((void (*)(void))ioremap_nocache(LOONGSON_BOOT_BASE, 4)) (); +} + +static void loongson_restart(char *command) +{ + + void (*fw_restart)(void) = (void *)loongson_sysconf.restart_addr; + + fw_restart(); + while (1) { + if (cpu_wait) + cpu_wait(); + } +} + +static void loongson_poweroff(void) +{ + void (*fw_poweroff)(void) = (void *)loongson_sysconf.poweroff_addr; + + fw_poweroff(); + while (1) { + if (cpu_wait) + cpu_wait(); + } +} + +static void loongson_halt(void) +{ + pr_notice("\n\n** You can safely turn off the power now **\n\n"); + while (1) { + if (cpu_wait) + cpu_wait(); + } +} + +static int __init mips_reboot_setup(void) +{ + _machine_restart = loongson_restart; + _machine_halt = loongson_halt; + pm_power_off = loongson_poweroff; + + return 0; +} + +arch_initcall(mips_reboot_setup); diff --git a/arch/mips/loongson64/rtc.c b/arch/mips/loongson64/rtc.c new file mode 100644 index 000000000000..8d7628c0f513 --- /dev/null +++ b/arch/mips/loongson64/rtc.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Lemote Fuloong platform support + * + * Copyright(c) 2010 Arnaud Patard <apatard@mandriva.com> + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/platform_device.h> +#include <linux/mc146818rtc.h> + +static struct resource loongson_rtc_resources[] = { + { + .start = RTC_PORT(0), + .end = RTC_PORT(1), + .flags = IORESOURCE_IO, + }, { + .start = RTC_IRQ, + .end = RTC_IRQ, + .flags = IORESOURCE_IRQ, + } +}; + +static struct platform_device loongson_rtc_device = { + .name = "rtc_cmos", + .id = -1, + .resource = loongson_rtc_resources, + .num_resources = ARRAY_SIZE(loongson_rtc_resources), +}; + + +static int __init loongson_rtc_platform_init(void) +{ + platform_device_register(&loongson_rtc_device); + return 0; +} + +device_initcall(loongson_rtc_platform_init); diff --git a/arch/mips/loongson64/setup.c b/arch/mips/loongson64/setup.c new file mode 100644 index 000000000000..4fd27f4f90ed --- /dev/null +++ b/arch/mips/loongson64/setup.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology + * Author: Fuxin Zhang, zhangfx@lemote.com + */ +#include <linux/export.h> +#include <linux/init.h> + +#include <asm/wbflush.h> +#include <asm/bootinfo.h> + +#include <loongson.h> + +static void wbflush_loongson(void) +{ + asm(".set\tpush\n\t" + ".set\tnoreorder\n\t" + ".set mips3\n\t" + "sync\n\t" + "nop\n\t" + ".set\tpop\n\t" + ".set mips0\n\t"); +} + +void (*__wbflush)(void) = wbflush_loongson; +EXPORT_SYMBOL(__wbflush); + +void __init plat_mem_setup(void) +{ +} diff --git a/arch/mips/loongson64/loongson-3/smp.c b/arch/mips/loongson64/smp.c index ce68cdaaf33c..de8e0741ce2d 100644 --- a/arch/mips/loongson64/loongson-3/smp.c +++ b/arch/mips/loongson64/smp.c @@ -18,6 +18,7 @@ #include <asm/tlbflush.h> #include <asm/cacheflush.h> #include <loongson.h> +#include <loongson_regs.h> #include <workarounds.h> #include "smp.h" @@ -48,6 +49,62 @@ static uint32_t core0_c0count[NR_CPUS]; __wbflush(); \ } while (0) +u32 (*ipi_read_clear)(int cpu); +void (*ipi_write_action)(int cpu, u32 action); + +static u32 csr_ipi_read_clear(int cpu) +{ + u32 action; + + /* Load the ipi register to figure out what we're supposed to do */ + action = csr_readl(LOONGSON_CSR_IPI_STATUS); + /* Clear the ipi register to clear the interrupt */ + csr_writel(action, LOONGSON_CSR_IPI_CLEAR); + + return action; +} + +static void csr_ipi_write_action(int cpu, u32 action) +{ + unsigned int irq = 0; + + while ((irq = ffs(action))) { + uint32_t val = CSR_IPI_SEND_BLOCK; + val |= (irq - 1); + val |= (cpu << CSR_IPI_SEND_CPU_SHIFT); + csr_writel(val, LOONGSON_CSR_IPI_SEND); + action &= ~BIT(irq - 1); + } +} + +static u32 legacy_ipi_read_clear(int cpu) +{ + u32 action; + + /* Load the ipi register to figure out what we're supposed to do */ + action = loongson3_ipi_read32(ipi_status0_regs[cpu_logical_map(cpu)]); + /* Clear the ipi register to clear the interrupt */ + loongson3_ipi_write32(action, ipi_clear0_regs[cpu_logical_map(cpu)]); + + return action; +} + +static void legacy_ipi_write_action(int cpu, u32 action) +{ + loongson3_ipi_write32((u32)action, ipi_set0_regs[cpu]); +} + +static void csr_ipi_probe(void) +{ + if (cpu_has_csr() && csr_readl(LOONGSON_CSR_FEATURES) & LOONGSON_CSRF_IPI) { + ipi_read_clear = csr_ipi_read_clear; + ipi_write_action = csr_ipi_write_action; + } else { + ipi_read_clear = legacy_ipi_read_clear; + ipi_write_action = legacy_ipi_write_action; + } +} + static void ipi_set0_regs_init(void) { ipi_set0_regs[0] = (void *) @@ -233,7 +290,7 @@ static void ipi_mailbox_buf_init(void) */ static void loongson3_send_ipi_single(int cpu, unsigned int action) { - loongson3_ipi_write32((u32)action, ipi_set0_regs[cpu_logical_map(cpu)]); + ipi_write_action(cpu_logical_map(cpu), (u32)action); } static void @@ -242,14 +299,14 @@ loongson3_send_ipi_mask(const struct cpumask *mask, unsigned int action) unsigned int i; for_each_cpu(i, mask) - loongson3_ipi_write32((u32)action, ipi_set0_regs[cpu_logical_map(i)]); + ipi_write_action(cpu_logical_map(i), (u32)action); } #define IPI_IRQ_OFFSET 6 void loongson3_send_irq_by_ipi(int cpu, int irqs) { - loongson3_ipi_write32(irqs << IPI_IRQ_OFFSET, ipi_set0_regs[cpu_logical_map(cpu)]); + ipi_write_action(cpu_logical_map(cpu), irqs << IPI_IRQ_OFFSET); } void loongson3_ipi_interrupt(struct pt_regs *regs) @@ -257,13 +314,9 @@ void loongson3_ipi_interrupt(struct pt_regs *regs) int i, cpu = smp_processor_id(); unsigned int action, c0count, irqs; - /* Load the ipi register to figure out what we're supposed to do */ - action = loongson3_ipi_read32(ipi_status0_regs[cpu_logical_map(cpu)]); + action = ipi_read_clear(cpu); irqs = action >> IPI_IRQ_OFFSET; - /* Clear the ipi register to clear the interrupt */ - loongson3_ipi_write32((u32)action, ipi_clear0_regs[cpu_logical_map(cpu)]); - if (action & SMP_RESCHEDULE_YOURSELF) scheduler_ipi(); @@ -372,6 +425,7 @@ static void __init loongson3_smp_setup(void) num++; } + csr_ipi_probe(); ipi_set0_regs_init(); ipi_clear0_regs_init(); ipi_status0_regs_init(); @@ -450,7 +504,7 @@ static void loongson3_cpu_die(unsigned int cpu) * flush all L1 entries at first. Then, another core (usually Core 0) can * safely disable the clock of the target core. loongson3_play_dead() is * called via CKSEG1 (uncached and unmmaped) */ -static void loongson3a_r1_play_dead(int *state_addr) +static void loongson3_type1_play_dead(int *state_addr) { register int val; register long cpuid, core, node, count; @@ -512,7 +566,7 @@ static void loongson3a_r1_play_dead(int *state_addr) : "a1"); } -static void loongson3a_r2r3_play_dead(int *state_addr) +static void loongson3_type2_play_dead(int *state_addr) { register int val; register long cpuid, core, node, count; @@ -532,27 +586,7 @@ static void loongson3a_r2r3_play_dead(int *state_addr) " cache 1, 3(%[addr]) \n" " addiu %[sets], %[sets], -1 \n" " bnez %[sets], 1b \n" - " addiu %[addr], %[addr], 0x40 \n" - " li %[addr], 0x80000000 \n" /* KSEG0 */ - "2: cache 2, 0(%[addr]) \n" /* flush L1 VCache */ - " cache 2, 1(%[addr]) \n" - " cache 2, 2(%[addr]) \n" - " cache 2, 3(%[addr]) \n" - " cache 2, 4(%[addr]) \n" - " cache 2, 5(%[addr]) \n" - " cache 2, 6(%[addr]) \n" - " cache 2, 7(%[addr]) \n" - " cache 2, 8(%[addr]) \n" - " cache 2, 9(%[addr]) \n" - " cache 2, 10(%[addr]) \n" - " cache 2, 11(%[addr]) \n" - " cache 2, 12(%[addr]) \n" - " cache 2, 13(%[addr]) \n" - " cache 2, 14(%[addr]) \n" - " cache 2, 15(%[addr]) \n" - " addiu %[vsets], %[vsets], -1 \n" - " bnez %[vsets], 2b \n" - " addiu %[addr], %[addr], 0x40 \n" + " addiu %[addr], %[addr], 0x20 \n" " li %[val], 0x7 \n" /* *state_addr = CPU_DEAD; */ " sw %[val], (%[state_addr]) \n" " sync \n" @@ -560,8 +594,7 @@ static void loongson3a_r2r3_play_dead(int *state_addr) " .set pop \n" : [addr] "=&r" (addr), [val] "=&r" (val) : [state_addr] "r" (state_addr), - [sets] "r" (cpu_data[smp_processor_id()].dcache.sets), - [vsets] "r" (cpu_data[smp_processor_id()].vcache.sets)); + [sets] "r" (cpu_data[smp_processor_id()].dcache.sets)); __asm__ __volatile__( " .set push \n" @@ -576,6 +609,8 @@ static void loongson3a_r2r3_play_dead(int *state_addr) " andi %[node], %[cpuid], 0xc \n" " dsll %[node], 42 \n" /* get node id */ " or %[base], %[base], %[node] \n" + " dsrl %[node], 30 \n" /* 15:14 */ + " or %[base], %[base], %[node] \n" "1: li %[count], 0x100 \n" /* wait for init loop */ "2: bnez %[count], 2b \n" /* limit mailbox access */ " addiu %[count], -1 \n" @@ -595,7 +630,7 @@ static void loongson3a_r2r3_play_dead(int *state_addr) : "a1"); } -static void loongson3b_play_dead(int *state_addr) +static void loongson3_type3_play_dead(int *state_addr) { register int val; register long cpuid, core, node, count; @@ -615,7 +650,27 @@ static void loongson3b_play_dead(int *state_addr) " cache 1, 3(%[addr]) \n" " addiu %[sets], %[sets], -1 \n" " bnez %[sets], 1b \n" - " addiu %[addr], %[addr], 0x20 \n" + " addiu %[addr], %[addr], 0x40 \n" + " li %[addr], 0x80000000 \n" /* KSEG0 */ + "2: cache 2, 0(%[addr]) \n" /* flush L1 VCache */ + " cache 2, 1(%[addr]) \n" + " cache 2, 2(%[addr]) \n" + " cache 2, 3(%[addr]) \n" + " cache 2, 4(%[addr]) \n" + " cache 2, 5(%[addr]) \n" + " cache 2, 6(%[addr]) \n" + " cache 2, 7(%[addr]) \n" + " cache 2, 8(%[addr]) \n" + " cache 2, 9(%[addr]) \n" + " cache 2, 10(%[addr]) \n" + " cache 2, 11(%[addr]) \n" + " cache 2, 12(%[addr]) \n" + " cache 2, 13(%[addr]) \n" + " cache 2, 14(%[addr]) \n" + " cache 2, 15(%[addr]) \n" + " addiu %[vsets], %[vsets], -1 \n" + " bnez %[vsets], 2b \n" + " addiu %[addr], %[addr], 0x40 \n" " li %[val], 0x7 \n" /* *state_addr = CPU_DEAD; */ " sw %[val], (%[state_addr]) \n" " sync \n" @@ -623,7 +678,8 @@ static void loongson3b_play_dead(int *state_addr) " .set pop \n" : [addr] "=&r" (addr), [val] "=&r" (val) : [state_addr] "r" (state_addr), - [sets] "r" (cpu_data[smp_processor_id()].dcache.sets)); + [sets] "r" (cpu_data[smp_processor_id()].dcache.sets), + [vsets] "r" (cpu_data[smp_processor_id()].vcache.sets)); __asm__ __volatile__( " .set push \n" @@ -638,8 +694,6 @@ static void loongson3b_play_dead(int *state_addr) " andi %[node], %[cpuid], 0xc \n" " dsll %[node], 42 \n" /* get node id */ " or %[base], %[base], %[node] \n" - " dsrl %[node], 30 \n" /* 15:14 */ - " or %[base], %[base], %[node] \n" "1: li %[count], 0x100 \n" /* wait for init loop */ "2: bnez %[count], 2b \n" /* limit mailbox access */ " addiu %[count], -1 \n" @@ -661,30 +715,42 @@ static void loongson3b_play_dead(int *state_addr) void play_dead(void) { - int *state_addr; + int prid_imp, prid_rev, *state_addr; unsigned int cpu = smp_processor_id(); void (*play_dead_at_ckseg1)(int *); idle_task_exit(); - switch (read_c0_prid() & PRID_REV_MASK) { + + prid_imp = read_c0_prid() & PRID_IMP_MASK; + prid_rev = read_c0_prid() & PRID_REV_MASK; + + if (prid_imp == PRID_IMP_LOONGSON_64G) { + play_dead_at_ckseg1 = + (void *)CKSEG1ADDR((unsigned long)loongson3_type3_play_dead); + goto out; + } + + switch (prid_rev) { case PRID_REV_LOONGSON3A_R1: default: play_dead_at_ckseg1 = - (void *)CKSEG1ADDR((unsigned long)loongson3a_r1_play_dead); + (void *)CKSEG1ADDR((unsigned long)loongson3_type1_play_dead); + break; + case PRID_REV_LOONGSON3B_R1: + case PRID_REV_LOONGSON3B_R2: + play_dead_at_ckseg1 = + (void *)CKSEG1ADDR((unsigned long)loongson3_type2_play_dead); break; case PRID_REV_LOONGSON3A_R2_0: case PRID_REV_LOONGSON3A_R2_1: case PRID_REV_LOONGSON3A_R3_0: case PRID_REV_LOONGSON3A_R3_1: play_dead_at_ckseg1 = - (void *)CKSEG1ADDR((unsigned long)loongson3a_r2r3_play_dead); - break; - case PRID_REV_LOONGSON3B_R1: - case PRID_REV_LOONGSON3B_R2: - play_dead_at_ckseg1 = - (void *)CKSEG1ADDR((unsigned long)loongson3b_play_dead); + (void *)CKSEG1ADDR((unsigned long)loongson3_type3_play_dead); break; } + +out: state_addr = &per_cpu(cpu_state, cpu); mb(); play_dead_at_ckseg1(state_addr); diff --git a/arch/mips/loongson64/loongson-3/smp.h b/arch/mips/loongson64/smp.h index 957bde81e0e4..957bde81e0e4 100644 --- a/arch/mips/loongson64/loongson-3/smp.h +++ b/arch/mips/loongson64/smp.h diff --git a/arch/mips/loongson64/time.c b/arch/mips/loongson64/time.c new file mode 100644 index 000000000000..1245f22cec84 --- /dev/null +++ b/arch/mips/loongson64/time.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology + * Author: Fuxin Zhang, zhangfx@lemote.com + * + * Copyright (C) 2009 Lemote Inc. + * Author: Wu Zhangjin, wuzhangjin@gmail.com + */ +#include <asm/mc146818-time.h> +#include <asm/time.h> +#include <asm/hpet.h> + +#include <loongson.h> + +void __init plat_time_init(void) +{ + /* setup mips r4k timer */ + mips_hpt_frequency = cpu_clock_freq / 2; + +#ifdef CONFIG_RS780_HPET + setup_hpet_timer(); +#endif +} + +void read_persistent_clock64(struct timespec64 *ts) +{ + ts->tv_sec = mc146818_get_cmos_time(); + ts->tv_nsec = 0; +} diff --git a/arch/mips/math-emu/me-debugfs.c b/arch/mips/math-emu/me-debugfs.c index 387724860fa6..d5ad76b2bb67 100644 --- a/arch/mips/math-emu/me-debugfs.c +++ b/arch/mips/math-emu/me-debugfs.c @@ -189,6 +189,7 @@ static int __init debugfs_fpuemu(void) { struct dentry *fpuemu_debugfs_base_dir; struct dentry *fpuemu_debugfs_inst_dir; + char name[32]; fpuemu_debugfs_base_dir = debugfs_create_dir("fpuemustats", mips_debugfs_dir); @@ -225,8 +226,6 @@ do { \ #define FPU_STAT_CREATE_EX(m) \ do { \ - char name[32]; \ - \ adjust_instruction_counter_name(name, #m); \ \ debugfs_create_file(name, 0444, fpuemu_debugfs_inst_dir, \ diff --git a/arch/mips/mm/c-r3k.c b/arch/mips/mm/c-r3k.c index 0ca401ddf3b7..15bb8cf59828 100644 --- a/arch/mips/mm/c-r3k.c +++ b/arch/mips/mm/c-r3k.c @@ -241,6 +241,7 @@ static void r3k_flush_cache_page(struct vm_area_struct *vma, int exec = vma->vm_flags & VM_EXEC; struct mm_struct *mm = vma->vm_mm; pgd_t *pgdp; + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; @@ -253,7 +254,8 @@ static void r3k_flush_cache_page(struct vm_area_struct *vma, return; pgdp = pgd_offset(mm, addr); - pudp = pud_offset(pgdp, addr); + p4dp = p4d_offset(pgdp, addr); + pudp = pud_offset(p4dp, addr); pmdp = pmd_offset(pudp, addr); ptep = pte_offset(pmdp, addr); diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 89b9c851d822..5f3d0103b95d 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -271,12 +271,14 @@ static inline void tx49_blast_icache32(void) /* I'm in even chunk. blast odd chunks */ for (ws = 0; ws < ws_end; ws += ws_inc) for (addr = start + 0x400; addr < end; addr += 0x400 * 2) - cache32_unroll32(addr|ws, Index_Invalidate_I); + cache_unroll(32, kernel_cache, Index_Invalidate_I, + addr | ws, 32); CACHE32_UNROLL32_ALIGN; /* I'm in odd chunk. blast even chunks */ for (ws = 0; ws < ws_end; ws += ws_inc) for (addr = start; addr < end; addr += 0x400 * 2) - cache32_unroll32(addr|ws, Index_Invalidate_I); + cache_unroll(32, kernel_cache, Index_Invalidate_I, + addr | ws, 32); } static inline void blast_icache32_r4600_v1_page_indexed(unsigned long page) @@ -302,12 +304,14 @@ static inline void tx49_blast_icache32_page_indexed(unsigned long page) /* I'm in even chunk. blast odd chunks */ for (ws = 0; ws < ws_end; ws += ws_inc) for (addr = start + 0x400; addr < end; addr += 0x400 * 2) - cache32_unroll32(addr|ws, Index_Invalidate_I); + cache_unroll(32, kernel_cache, Index_Invalidate_I, + addr | ws, 32); CACHE32_UNROLL32_ALIGN; /* I'm in odd chunk. blast even chunks */ for (ws = 0; ws < ws_end; ws += ws_inc) for (addr = start; addr < end; addr += 0x400 * 2) - cache32_unroll32(addr|ws, Index_Invalidate_I); + cache_unroll(32, kernel_cache, Index_Invalidate_I, + addr | ws, 32); } static void (* r4k_blast_icache_page)(unsigned long addr); @@ -320,7 +324,7 @@ static void r4k_blast_icache_page_setup(void) r4k_blast_icache_page = (void *)cache_noop; else if (ic_lsize == 16) r4k_blast_icache_page = blast_icache16_page; - else if (ic_lsize == 32 && current_cpu_type() == CPU_LOONGSON2) + else if (ic_lsize == 32 && current_cpu_type() == CPU_LOONGSON2EF) r4k_blast_icache_page = loongson2_blast_icache32_page; else if (ic_lsize == 32) r4k_blast_icache_page = blast_icache32_page; @@ -369,7 +373,7 @@ static void r4k_blast_icache_page_indexed_setup(void) else if (TX49XX_ICACHE_INDEX_INV_WAR) r4k_blast_icache_page_indexed = tx49_blast_icache32_page_indexed; - else if (current_cpu_type() == CPU_LOONGSON2) + else if (current_cpu_type() == CPU_LOONGSON2EF) r4k_blast_icache_page_indexed = loongson2_blast_icache32_page_indexed; else @@ -395,7 +399,7 @@ static void r4k_blast_icache_setup(void) r4k_blast_icache = blast_r4600_v1_icache32; else if (TX49XX_ICACHE_INDEX_INV_WAR) r4k_blast_icache = tx49_blast_icache32; - else if (current_cpu_type() == CPU_LOONGSON2) + else if (current_cpu_type() == CPU_LOONGSON2EF) r4k_blast_icache = loongson2_blast_icache32; else r4k_blast_icache = blast_icache32; @@ -465,7 +469,7 @@ static void r4k_blast_scache_node_setup(void) { unsigned long sc_lsize = cpu_scache_line_size(); - if (current_cpu_type() != CPU_LOONGSON3) + if (current_cpu_type() != CPU_LOONGSON64) r4k_blast_scache_node = (void *)cache_noop; else if (sc_lsize == 16) r4k_blast_scache_node = blast_scache16_node; @@ -480,7 +484,7 @@ static void r4k_blast_scache_node_setup(void) static inline void local_r4k___flush_cache_all(void * args) { switch (current_cpu_type()) { - case CPU_LOONGSON2: + case CPU_LOONGSON2EF: case CPU_R4000SC: case CPU_R4000MC: case CPU_R4400SC: @@ -497,7 +501,7 @@ static inline void local_r4k___flush_cache_all(void * args) r4k_blast_scache(); break; - case CPU_LOONGSON3: + case CPU_LOONGSON64: /* Use get_ebase_cpunum() for both NUMA=y/n */ r4k_blast_scache_node(get_ebase_cpunum() >> 2); break; @@ -650,6 +654,7 @@ static inline void local_r4k_flush_cache_page(void *args) struct mm_struct *mm = vma->vm_mm; int map_coherent = 0; pgd_t *pgdp; + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; @@ -664,7 +669,8 @@ static inline void local_r4k_flush_cache_page(void *args) addr &= PAGE_MASK; pgdp = pgd_offset(mm, addr); - pudp = pud_offset(pgdp, addr); + p4dp = p4d_offset(pgdp, addr); + pudp = pud_offset(p4dp, addr); pmdp = pmd_offset(pudp, addr); ptep = pte_offset(pmdp, addr); @@ -770,7 +776,7 @@ static inline void __local_r4k_flush_icache_range(unsigned long start, r4k_blast_icache(); else { switch (boot_cpu_type()) { - case CPU_LOONGSON2: + case CPU_LOONGSON2EF: protected_loongson2_blast_icache_range(start, end); break; @@ -863,7 +869,7 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) preempt_disable(); if (cpu_has_inclusive_pcaches) { if (size >= scache_size) { - if (current_cpu_type() != CPU_LOONGSON3) + if (current_cpu_type() != CPU_LOONGSON64) r4k_blast_scache(); else r4k_blast_scache_node(pa_to_nid(addr)); @@ -904,7 +910,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) preempt_disable(); if (cpu_has_inclusive_pcaches) { if (size >= scache_size) { - if (current_cpu_type() != CPU_LOONGSON3) + if (current_cpu_type() != CPU_LOONGSON64) r4k_blast_scache(); else r4k_blast_scache_node(pa_to_nid(addr)); @@ -1224,7 +1230,7 @@ static void probe_pcache(void) c->options |= MIPS_CPU_PREFETCH; break; - case CPU_LOONGSON2: + case CPU_LOONGSON2EF: icache_size = 1 << (12 + ((config & CONF_IC) >> 9)); c->icache.linesz = 16 << ((config & CONF_IB) >> 5); if (prid & 0x3) @@ -1242,7 +1248,7 @@ static void probe_pcache(void) c->dcache.waybit = 0; break; - case CPU_LOONGSON3: + case CPU_LOONGSON64: config1 = read_c0_config1(); lsize = (config1 >> 19) & 7; if (lsize) @@ -1267,7 +1273,8 @@ static void probe_pcache(void) c->dcache.ways * c->dcache.linesz; c->dcache.waybit = 0; - if ((prid & PRID_REV_MASK) >= PRID_REV_LOONGSON3A_R2_0) + if ((c->processor_id & (PRID_IMP_MASK | PRID_REV_MASK)) >= + (PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0)) c->options |= MIPS_CPU_PREFETCH; break; @@ -1452,7 +1459,7 @@ static void probe_pcache(void) c->dcache.flags &= ~MIPS_CACHE_ALIASES; break; - case CPU_LOONGSON2: + case CPU_LOONGSON2EF: /* * LOONGSON2 has 4 way icache, but when using indexed cache op, * one op will act on all 4 ways @@ -1478,7 +1485,7 @@ static void probe_vcache(void) struct cpuinfo_mips *c = ¤t_cpu_data; unsigned int config2, lsize; - if (current_cpu_type() != CPU_LOONGSON3) + if (current_cpu_type() != CPU_LOONGSON64) return; config2 = read_c0_config2(); @@ -1653,11 +1660,11 @@ static void setup_scache(void) #endif return; - case CPU_LOONGSON2: + case CPU_LOONGSON2EF: loongson2_sc_init(); return; - case CPU_LOONGSON3: + case CPU_LOONGSON64: loongson3_sc_init(); return; @@ -1926,7 +1933,7 @@ void r4k_cache_init(void) /* Optimization: an L2 flush implicitly flushes the L1 */ current_cpu_data.options |= MIPS_CPU_INCLUSIVE_CACHES; break; - case CPU_LOONGSON3: + case CPU_LOONGSON64: /* Loongson-3 maintains cache coherency by hardware */ __flush_cache_all = cache_noop; __flush_cache_vmap = cache_noop; diff --git a/arch/mips/mm/c-tx39.c b/arch/mips/mm/c-tx39.c index b7c8a9d79c35..686867270627 100644 --- a/arch/mips/mm/c-tx39.c +++ b/arch/mips/mm/c-tx39.c @@ -170,6 +170,7 @@ static void tx39_flush_cache_page(struct vm_area_struct *vma, unsigned long page int exec = vma->vm_flags & VM_EXEC; struct mm_struct *mm = vma->vm_mm; pgd_t *pgdp; + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; @@ -183,7 +184,8 @@ static void tx39_flush_cache_page(struct vm_area_struct *vma, unsigned long page page &= PAGE_MASK; pgdp = pgd_offset(mm, page); - pudp = pud_offset(pgdp, page); + p4dp = p4d_offset(pgdp, page); + pudp = pud_offset(p4dp, page); pmdp = pmd_offset(pudp, page); ptep = pte_offset(pmdp, page); diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index f589aa8f47d9..1e8d00793784 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -292,8 +292,9 @@ vmalloc_fault: * Do _not_ use "tsk" here. We might be inside * an interrupt in the middle of a task switch.. */ - int offset = __pgd_offset(address); + int offset = pgd_index(address); pgd_t *pgd, *pgd_k; + p4d_t *p4d, *p4d_k; pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; pte_t *pte_k; @@ -305,8 +306,13 @@ vmalloc_fault: goto no_context; set_pgd(pgd, *pgd_k); - pud = pud_offset(pgd, address); - pud_k = pud_offset(pgd_k, address); + p4d = p4d_offset(pgd, address); + p4d_k = p4d_offset(pgd_k, address); + if (!p4d_present(*p4d_k)) + goto no_context; + + pud = pud_offset(p4d, address); + pud_k = pud_offset(p4d_k, address); if (!pud_present(*pud_k)) goto no_context; diff --git a/arch/mips/mm/hugetlbpage.c b/arch/mips/mm/hugetlbpage.c index cef152234312..77ffece9c270 100644 --- a/arch/mips/mm/hugetlbpage.c +++ b/arch/mips/mm/hugetlbpage.c @@ -25,11 +25,13 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pte_t *pte = NULL; pgd = pgd_offset(mm, addr); - pud = pud_alloc(mm, pgd, addr); + p4d = p4d_alloc(mm, pgd, addr); + pud = pud_alloc(mm, p4d, addr); if (pud) pte = (pte_t *)pmd_alloc(mm, pud, addr); @@ -40,14 +42,18 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd = NULL; pgd = pgd_offset(mm, addr); if (pgd_present(*pgd)) { - pud = pud_offset(pgd, addr); - if (pud_present(*pud)) - pmd = pmd_offset(pud, addr); + p4d = p4d_offset(pgd, addr); + if (p4d_present(*p4d)) { + pud = pud_offset(p4d, addr); + if (pud_present(*pud)) + pmd = pmd_offset(pud, addr); + } } return (pte_t *) pmd; } diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 090fa653dfa9..50f9ed8c6c1b 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -239,9 +239,9 @@ void __init fixrange_init(unsigned long start, unsigned long end, unsigned long vaddr; vaddr = start; - i = __pgd_offset(vaddr); - j = __pud_offset(vaddr); - k = __pmd_offset(vaddr); + i = pgd_index(vaddr); + j = pud_index(vaddr); + k = pmd_index(vaddr); pgd = pgd_base + i; for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) { diff --git a/arch/mips/mm/ioremap.c b/arch/mips/mm/ioremap.c index 1601d90b087b..8317f337a86e 100644 --- a/arch/mips/mm/ioremap.c +++ b/arch/mips/mm/ioremap.c @@ -78,11 +78,15 @@ static int remap_area_pages(unsigned long address, phys_addr_t phys_addr, flush_cache_all(); BUG_ON(address >= end); do { + p4d_t *p4d; pud_t *pud; pmd_t *pmd; error = -ENOMEM; - pud = pud_alloc(&init_mm, dir, address); + p4d = p4d_alloc(&init_mm, dir, address); + if (!p4d) + break; + pud = pud_alloc(&init_mm, p4d, address); if (!pud) break; pmd = pmd_alloc(&init_mm, pud, address); diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c index 56e4f8bffd4c..c5578897a4fa 100644 --- a/arch/mips/mm/page.c +++ b/arch/mips/mm/page.c @@ -187,7 +187,7 @@ static void set_prefetch_parameters(void) } break; - case CPU_LOONGSON3: + case CPU_LOONGSON64: /* Loongson-3 only support the Pref_Load/Pref_Store. */ pref_bias_clear_store = 128; pref_bias_copy_load = 128; diff --git a/arch/mips/mm/pgtable-32.c b/arch/mips/mm/pgtable-32.c index 6416a531a4c3..37c7a01427d2 100644 --- a/arch/mips/mm/pgtable-32.c +++ b/arch/mips/mm/pgtable-32.c @@ -56,6 +56,7 @@ void __init pagetable_init(void) pgd_t *pgd_base; #ifdef CONFIG_HIGHMEM pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; @@ -81,8 +82,9 @@ void __init pagetable_init(void) vaddr = PKMAP_BASE; fixrange_init(vaddr & PMD_MASK, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); - pgd = swapper_pg_dir + __pgd_offset(vaddr); - pud = pud_offset(pgd, vaddr); + pgd = swapper_pg_dir + pgd_index(vaddr); + p4d = p4d_offset(pgd, vaddr); + pud = pud_offset(p4d, vaddr); pmd = pmd_offset(pud, vaddr); pte = pte_offset_kernel(pmd, vaddr); pkmap_page_table = pte; diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c index c13e46ced425..d7a9d5f211f0 100644 --- a/arch/mips/mm/tlb-r4k.c +++ b/arch/mips/mm/tlb-r4k.c @@ -35,10 +35,10 @@ extern void build_tlb_refill_handler(void); static inline void flush_micro_tlb(void) { switch (current_cpu_type()) { - case CPU_LOONGSON2: + case CPU_LOONGSON2EF: write_c0_diag(LOONGSON_DIAG_ITLB); break; - case CPU_LOONGSON3: + case CPU_LOONGSON64: write_c0_diag(LOONGSON_DIAG_ITLB | LOONGSON_DIAG_DTLB); break; default: @@ -295,6 +295,7 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte) { unsigned long flags; pgd_t *pgdp; + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; @@ -320,7 +321,8 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte) mtc0_tlbw_hazard(); tlb_probe(); tlb_probe_hazard(); - pudp = pud_offset(pgdp, address); + p4dp = p4d_offset(pgdp, address); + pudp = pud_offset(p4dp, address); pmdp = pmd_offset(pudp, address); idx = read_c0_index(); #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index e01cb33bfa1a..344e6e9ea43b 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -571,8 +571,8 @@ void build_tlb_write_entry(u32 **p, struct uasm_label **l, case CPU_BMIPS4350: case CPU_BMIPS4380: case CPU_BMIPS5000: - case CPU_LOONGSON2: - case CPU_LOONGSON3: + case CPU_LOONGSON2EF: + case CPU_LOONGSON64: case CPU_R5500: if (m4kc_tlbp_war()) uasm_i_nop(p); @@ -653,6 +653,13 @@ static void build_restore_pagemask(u32 **p, struct uasm_reloc **r, int restore_scratch) { if (restore_scratch) { + /* + * Ensure the MFC0 below observes the value written to the + * KScratch register by the prior MTC0. + */ + if (scratch_reg >= 0) + uasm_i_ehb(p); + /* Reset default page size */ if (PM_DEFAULT_MASK >> 16) { uasm_i_lui(p, tmp, PM_DEFAULT_MASK >> 16); @@ -667,12 +674,10 @@ static void build_restore_pagemask(u32 **p, struct uasm_reloc **r, uasm_i_mtc0(p, 0, C0_PAGEMASK); uasm_il_b(p, r, lid); } - if (scratch_reg >= 0) { - uasm_i_ehb(p); + if (scratch_reg >= 0) UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); - } else { + else UASM_i_LW(p, 1, scratchpad_offset(0), 0); - } } else { /* Reset default page size */ if (PM_DEFAULT_MASK >> 16) { @@ -921,6 +926,10 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, } if (mode != not_refill && check_for_high_segbits) { uasm_l_large_segbits_fault(l, *p); + + if (mode == refill_scratch && scratch_reg >= 0) + uasm_i_ehb(p); + /* * We get here if we are an xsseg address, or if we are * an xuseg address above (PGDIR_SHIFT+PGDIR_BITS) boundary. @@ -939,12 +948,10 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, uasm_i_jr(p, ptr); if (mode == refill_scratch) { - if (scratch_reg >= 0) { - uasm_i_ehb(p); + if (scratch_reg >= 0) UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); - } else { + else UASM_i_LW(p, 1, scratchpad_offset(0), 0); - } } else { uasm_i_nop(p); } @@ -1370,7 +1377,7 @@ static void build_r4000_tlb_refill_handler(void) switch (boot_cpu_type()) { default: if (sizeof(long) == 4) { - case CPU_LOONGSON2: + case CPU_LOONGSON2EF: /* Loongson2 ebase is different than r4k, we have more space */ if ((p - tlb_handler) > 64) panic("TLB refill handler space exceeded"); diff --git a/arch/mips/oprofile/Makefile b/arch/mips/oprofile/Makefile index 011cf9f891e7..e10f216d0422 100644 --- a/arch/mips/oprofile/Makefile +++ b/arch/mips/oprofile/Makefile @@ -14,5 +14,5 @@ oprofile-$(CONFIG_CPU_MIPS64) += op_model_mipsxx.o oprofile-$(CONFIG_CPU_R10000) += op_model_mipsxx.o oprofile-$(CONFIG_CPU_SB1) += op_model_mipsxx.o oprofile-$(CONFIG_CPU_XLR) += op_model_mipsxx.o -oprofile-$(CONFIG_CPU_LOONGSON2) += op_model_loongson2.o -oprofile-$(CONFIG_CPU_LOONGSON3) += op_model_loongson3.o +oprofile-$(CONFIG_CPU_LOONGSON2EF) += op_model_loongson2.o +oprofile-$(CONFIG_CPU_LOONGSON64) += op_model_loongson3.o diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c index 2f33992f6dff..03db268cba5c 100644 --- a/arch/mips/oprofile/common.c +++ b/arch/mips/oprofile/common.c @@ -93,7 +93,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) case CPU_P5600: case CPU_I6400: case CPU_M5150: - case CPU_LOONGSON1: + case CPU_LOONGSON32: case CPU_SB1: case CPU_SB1A: case CPU_R10000: @@ -104,10 +104,10 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) lmodel = &op_model_mipsxx_ops; break; - case CPU_LOONGSON2: + case CPU_LOONGSON2EF: lmodel = &op_model_loongson2_ops; break; - case CPU_LOONGSON3: + case CPU_LOONGSON64: lmodel = &op_model_loongson3_ops; break; }; diff --git a/arch/mips/oprofile/op_model_mipsxx.c b/arch/mips/oprofile/op_model_mipsxx.c index 96c13a0ab078..a537bf98912c 100644 --- a/arch/mips/oprofile/op_model_mipsxx.c +++ b/arch/mips/oprofile/op_model_mipsxx.c @@ -420,7 +420,7 @@ static int __init mipsxx_init(void) op_model_mipsxx_ops.cpu_type = "mips/sb1"; break; - case CPU_LOONGSON1: + case CPU_LOONGSON32: op_model_mipsxx_ops.cpu_type = "mips/loongson1"; break; diff --git a/arch/mips/pci/Makefile b/arch/mips/pci/Makefile index d6de4cb2e31c..342ce10ef593 100644 --- a/arch/mips/pci/Makefile +++ b/arch/mips/pci/Makefile @@ -35,7 +35,7 @@ obj-$(CONFIG_LASAT) += pci-lasat.o obj-$(CONFIG_MIPS_COBALT) += fixup-cobalt.o obj-$(CONFIG_LEMOTE_FULOONG2E) += fixup-fuloong2e.o ops-loongson2.o obj-$(CONFIG_LEMOTE_MACH2F) += fixup-lemote2f.o ops-loongson2.o -obj-$(CONFIG_LOONGSON_MACH3X) += fixup-loongson3.o ops-loongson3.o +obj-$(CONFIG_MACH_LOONGSON64) += fixup-loongson3.o ops-loongson3.o obj-$(CONFIG_MIPS_MALTA) += fixup-malta.o pci-malta.o obj-$(CONFIG_PMC_MSP7120_GW) += fixup-pmcmsp.o ops-pmcmsp.o obj-$(CONFIG_PMC_MSP7120_EVAL) += fixup-pmcmsp.o ops-pmcmsp.o diff --git a/arch/mips/pci/pci-ip27.c b/arch/mips/pci/pci-ip27.c index 441eb9383b20..8e26b120f994 100644 --- a/arch/mips/pci/pci-ip27.c +++ b/arch/mips/pci/pci-ip27.c @@ -7,21 +7,13 @@ * Copyright (C) 1999, 2000, 04 Ralf Baechle (ralf@linux-mips.org) * Copyright (C) 1999, 2000 Silicon Graphics, Inc. */ +#include <asm/sn/addrs.h> +#include <asm/sn/types.h> +#include <asm/sn/klconfig.h> +#include <asm/sn/hub.h> +#include <asm/sn/ioc3.h> #include <asm/pci/bridge.h> -dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) -{ - struct pci_dev *pdev = to_pci_dev(dev); - struct bridge_controller *bc = BRIDGE_CONTROLLER(pdev->bus); - - return bc->baddr + paddr; -} - -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr) -{ - return dma_addr & ~(0xffUL << 56); -} - #ifdef CONFIG_NUMA int pcibus_to_node(struct pci_bus *bus) { @@ -31,3 +23,20 @@ int pcibus_to_node(struct pci_bus *bus) } EXPORT_SYMBOL(pcibus_to_node); #endif /* CONFIG_NUMA */ + +static void ip29_fixup_phy(struct pci_dev *dev) +{ + int nasid = pcibus_to_node(dev->bus); + u32 sid; + + if (nasid != 1) + return; /* only needed on second module */ + + /* enable ethernet PHY on IP29 systemboard */ + pci_read_config_dword(dev, PCI_SUBSYSTEM_VENDOR_ID, &sid); + if (sid == (PCI_VENDOR_ID_SGI | (IOC3_SUBSYS_IP29_SYSBOARD) << 16)) + REMOTE_HUB_S(nasid, MD_LED0, 0x09); +} + +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SGI, PCI_DEVICE_ID_SGI_IOC3, + ip29_fixup_phy); diff --git a/arch/mips/pci/pci-xtalk-bridge.c b/arch/mips/pci/pci-xtalk-bridge.c index 7b4d40354ee7..5c1a196be0c5 100644 --- a/arch/mips/pci/pci-xtalk-bridge.c +++ b/arch/mips/pci/pci-xtalk-bridge.c @@ -11,16 +11,38 @@ #include <linux/dma-direct.h> #include <linux/platform_device.h> #include <linux/platform_data/xtalk-bridge.h> +#include <linux/nvmem-consumer.h> +#include <linux/crc16.h> #include <asm/pci/bridge.h> #include <asm/paccess.h> #include <asm/sn/irq_alloc.h> +#include <asm/sn/ioc3.h> + +#define CRC16_INIT 0 +#define CRC16_VALID 0xb001 + +/* + * Common phys<->dma mapping for platforms using pci xtalk bridge + */ +dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct bridge_controller *bc = BRIDGE_CONTROLLER(pdev->bus); + + return bc->baddr + paddr; +} + +phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr) +{ + return dma_addr & ~(0xffUL << 56); +} /* * Most of the IOC3 PCI config register aren't present * we emulate what is needed for a normal PCI enumeration */ -static int ioc3_cfg_rd(void *addr, int where, int size, u32 *value) +static int ioc3_cfg_rd(void *addr, int where, int size, u32 *value, u32 sid) { u32 cf, shift, mask; @@ -30,6 +52,9 @@ static int ioc3_cfg_rd(void *addr, int where, int size, u32 *value) if (get_dbe(cf, (u32 *)addr)) return PCIBIOS_DEVICE_NOT_FOUND; break; + case 0x2c: + cf = sid; + break; case 0x3c: /* emulate sane interrupt pin value */ cf = 0x00000100; @@ -111,7 +136,8 @@ static int pci_conf0_read_config(struct pci_bus *bus, unsigned int devfn, */ if (cf == (PCI_VENDOR_ID_SGI | (PCI_DEVICE_ID_SGI_IOC3 << 16))) { addr = &bridge->b_type0_cfg_dev[slot].f[fn].l[where >> 2]; - return ioc3_cfg_rd(addr, where, size, value); + return ioc3_cfg_rd(addr, where, size, value, + bc->ioc3_sid[slot]); } addr = &bridge->b_type0_cfg_dev[slot].f[fn].c[where ^ (4 - size)]; @@ -149,7 +175,8 @@ static int pci_conf1_read_config(struct pci_bus *bus, unsigned int devfn, */ if (cf == (PCI_VENDOR_ID_SGI | (PCI_DEVICE_ID_SGI_IOC3 << 16))) { addr = &bridge->b_type1_cfg.c[(fn << 8) | (where & ~3)]; - return ioc3_cfg_rd(addr, where, size, value); + return ioc3_cfg_rd(addr, where, size, value, + bc->ioc3_sid[slot]); } addr = &bridge->b_type1_cfg.c[(fn << 8) | (where ^ (4 - size))]; @@ -279,16 +306,15 @@ static int bridge_set_affinity(struct irq_data *d, const struct cpumask *mask, struct bridge_irq_chip_data *data = d->chip_data; int bit = d->parent_data->hwirq; int pin = d->hwirq; - nasid_t nasid; int ret, cpu; ret = irq_chip_set_affinity_parent(d, mask, force); if (ret >= 0) { cpu = cpumask_first_and(mask, cpu_online_mask); - nasid = COMPACT_TO_NASID_NODEID(cpu_to_node(cpu)); + data->nasid = cpu_to_node(cpu); bridge_write(data->bc, b_int_addr[pin].addr, (((data->bc->intr_addr >> 30) & 0x30000) | - bit | (nasid << 8))); + bit | (data->nasid << 8))); bridge_read(data->bc, b_wid_tflush); } return ret; @@ -426,6 +452,117 @@ static int bridge_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) return irq; } +#define IOC3_SID(sid) (PCI_VENDOR_ID_SGI | ((sid) << 16)) + +static void bridge_setup_ip27_baseio6g(struct bridge_controller *bc) +{ + bc->ioc3_sid[2] = IOC3_SID(IOC3_SUBSYS_IP27_BASEIO6G); + bc->ioc3_sid[6] = IOC3_SID(IOC3_SUBSYS_IP27_MIO); +} + +static void bridge_setup_ip27_baseio(struct bridge_controller *bc) +{ + bc->ioc3_sid[2] = IOC3_SID(IOC3_SUBSYS_IP27_BASEIO); +} + +static void bridge_setup_ip29_baseio(struct bridge_controller *bc) +{ + bc->ioc3_sid[2] = IOC3_SID(IOC3_SUBSYS_IP29_SYSBOARD); +} + +static void bridge_setup_ip30_sysboard(struct bridge_controller *bc) +{ + bc->ioc3_sid[2] = IOC3_SID(IOC3_SUBSYS_IP30_SYSBOARD); +} + +static void bridge_setup_menet(struct bridge_controller *bc) +{ + bc->ioc3_sid[0] = IOC3_SID(IOC3_SUBSYS_MENET); + bc->ioc3_sid[1] = IOC3_SID(IOC3_SUBSYS_MENET); + bc->ioc3_sid[2] = IOC3_SID(IOC3_SUBSYS_MENET); + bc->ioc3_sid[3] = IOC3_SID(IOC3_SUBSYS_MENET4); +} + +#define BRIDGE_BOARD_SETUP(_partno, _setup) \ + { .match = _partno, .setup = _setup } + +static const struct { + char *match; + void (*setup)(struct bridge_controller *bc); +} bridge_ioc3_devid[] = { + BRIDGE_BOARD_SETUP("030-0734-", bridge_setup_ip27_baseio6g), + BRIDGE_BOARD_SETUP("030-0880-", bridge_setup_ip27_baseio6g), + BRIDGE_BOARD_SETUP("030-1023-", bridge_setup_ip27_baseio), + BRIDGE_BOARD_SETUP("030-1124-", bridge_setup_ip27_baseio), + BRIDGE_BOARD_SETUP("030-1025-", bridge_setup_ip29_baseio), + BRIDGE_BOARD_SETUP("030-1244-", bridge_setup_ip29_baseio), + BRIDGE_BOARD_SETUP("030-1389-", bridge_setup_ip29_baseio), + BRIDGE_BOARD_SETUP("030-0887-", bridge_setup_ip30_sysboard), + BRIDGE_BOARD_SETUP("030-1467-", bridge_setup_ip30_sysboard), + BRIDGE_BOARD_SETUP("030-0873-", bridge_setup_menet), +}; + +static void bridge_setup_board(struct bridge_controller *bc, char *partnum) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(bridge_ioc3_devid); i++) + if (!strncmp(partnum, bridge_ioc3_devid[i].match, + strlen(bridge_ioc3_devid[i].match))) { + bridge_ioc3_devid[i].setup(bc); + } +} + +static int bridge_nvmem_match(struct device *dev, const void *data) +{ + const char *name = dev_name(dev); + const char *prefix = data; + + if (strlen(name) < strlen(prefix)) + return 0; + + return memcmp(prefix, dev_name(dev), strlen(prefix)) == 0; +} + +static int bridge_get_partnum(u64 baddr, char *partnum) +{ + struct nvmem_device *nvmem; + char prefix[24]; + u8 prom[64]; + int i, j; + int ret; + + snprintf(prefix, sizeof(prefix), "bridge-%012llx-0b-", baddr); + + nvmem = nvmem_device_find(prefix, bridge_nvmem_match); + if (IS_ERR(nvmem)) + return PTR_ERR(nvmem); + + ret = nvmem_device_read(nvmem, 0, 64, prom); + nvmem_device_put(nvmem); + + if (ret != 64) + return ret; + + if (crc16(CRC16_INIT, prom, 32) != CRC16_VALID || + crc16(CRC16_INIT, prom + 32, 32) != CRC16_VALID) + return -EINVAL; + + /* Assemble part number */ + j = 0; + for (i = 0; i < 19; i++) + if (prom[i + 11] != ' ') + partnum[j++] = prom[i + 11]; + + for (i = 0; i < 6; i++) + if (prom[i + 32] != ' ') + partnum[j++] = prom[i + 32]; + + partnum[j] = 0; + + return 0; +} + static int bridge_probe(struct platform_device *pdev) { struct xtalk_bridge_platform_data *bd = dev_get_platdata(&pdev->dev); @@ -434,9 +571,14 @@ static int bridge_probe(struct platform_device *pdev) struct pci_host_bridge *host; struct irq_domain *domain, *parent; struct fwnode_handle *fn; + char partnum[26]; int slot; int err; + /* get part number from one wire prom */ + if (bridge_get_partnum(virt_to_phys((void *)bd->bridge_addr), partnum)) + return -EPROBE_DEFER; /* not available yet */ + parent = irq_get_default_host(); if (!parent) return -ENODEV; @@ -517,6 +659,8 @@ static int bridge_probe(struct platform_device *pdev) } bridge_read(bc, b_wid_tflush); /* wait until Bridge PIO complete */ + bridge_setup_board(bc, partnum); + host->dev.parent = dev; host->sysdata = bc; host->busnr = 0; diff --git a/arch/mips/pmcs-msp71xx/msp_prom.c b/arch/mips/pmcs-msp71xx/msp_prom.c index dfb527961a27..800a21b8b8b0 100644 --- a/arch/mips/pmcs-msp71xx/msp_prom.c +++ b/arch/mips/pmcs-msp71xx/msp_prom.c @@ -61,6 +61,7 @@ int init_debug = 1; /* memory blocks */ struct prom_pmemblock mdesc[PROM_MAX_PMEMBLOCKS]; +#define MAX_PROM_MEM 5 static phys_addr_t prom_mem_base[MAX_PROM_MEM] __initdata; static phys_addr_t prom_mem_size[MAX_PROM_MEM] __initdata; static unsigned int nr_prom_mem __initdata; @@ -358,7 +359,7 @@ void __init prom_meminit(void) p++; if (type == BOOT_MEM_ROM_DATA) { - if (nr_prom_mem >= 5) { + if (nr_prom_mem >= MAX_PROM_MEM) { pr_err("Too many ROM DATA regions"); continue; } @@ -377,7 +378,6 @@ void __init prom_free_prom_memory(void) char *ptr; int len = 0; int i; - unsigned long addr; /* * preserve environment variables and command line from pmon/bbload diff --git a/arch/mips/power/cpu.c b/arch/mips/power/cpu.c index 3340a5530de3..a15e29dfc7b3 100644 --- a/arch/mips/power/cpu.c +++ b/arch/mips/power/cpu.c @@ -19,8 +19,8 @@ void save_processor_state(void) if (is_fpu_owner()) save_fp(current); - if (cpu_has_dsp) - save_dsp(current); + + save_dsp(current); } void restore_processor_state(void) @@ -29,8 +29,8 @@ void restore_processor_state(void) if (is_fpu_owner()) restore_fp(current); - if (cpu_has_dsp) - restore_dsp(current); + + restore_dsp(current); } int pfn_is_nosave(unsigned long pfn) diff --git a/arch/mips/sgi-ip22/ip22-mc.c b/arch/mips/sgi-ip22/ip22-mc.c index 1944d41507ef..74e5b9e27d6c 100644 --- a/arch/mips/sgi-ip22/ip22-mc.c +++ b/arch/mips/sgi-ip22/ip22-mc.c @@ -11,6 +11,7 @@ #include <linux/init.h> #include <linux/export.h> #include <linux/kernel.h> +#include <linux/memblock.h> #include <linux/spinlock.h> #include <asm/io.h> @@ -40,70 +41,36 @@ static inline unsigned int get_bank_config(int bank) return bank % 2 ? res & 0xffff : res >> 16; } -struct mem { - unsigned long addr; - unsigned long size; -}; - +#if defined(CONFIG_SGI_IP28) || defined(CONFIG_32BIT) +static void __init probe_memory(void) +{ + /* prom detects all usable memory */ +} +#else /* - * Detect installed memory, do some sanity checks and notify kernel about it + * Detect installed memory, which PROM misses */ static void __init probe_memory(void) { - int i, j, found, cnt = 0; - struct mem bank[4]; - struct mem space[2] = {{SGIMC_SEG0_BADDR, 0}, {SGIMC_SEG1_BADDR, 0}}; + unsigned long addr, size; + int i; printk(KERN_INFO "MC: Probing memory configuration:\n"); - for (i = 0; i < ARRAY_SIZE(bank); i++) { + for (i = 0; i < 4; i++) { unsigned int tmp = get_bank_config(i); if (!(tmp & SGIMC_MCONFIG_BVALID)) continue; - bank[cnt].size = get_bank_size(tmp); - bank[cnt].addr = get_bank_addr(tmp); + size = get_bank_size(tmp); + addr = get_bank_addr(tmp); printk(KERN_INFO " bank%d: %3ldM @ %08lx\n", - i, bank[cnt].size / 1024 / 1024, bank[cnt].addr); - cnt++; - } + i, size / 1024 / 1024, addr); - /* And you thought bubble sort is dead algorithm... */ - do { - unsigned long addr, size; - - found = 0; - for (i = 1; i < cnt; i++) - if (bank[i-1].addr > bank[i].addr) { - addr = bank[i].addr; - size = bank[i].size; - bank[i].addr = bank[i-1].addr; - bank[i].size = bank[i-1].size; - bank[i-1].addr = addr; - bank[i-1].size = size; - found = 1; - } - } while (found); - - /* Figure out how are memory banks mapped into spaces */ - for (i = 0; i < cnt; i++) { - found = 0; - for (j = 0; j < ARRAY_SIZE(space) && !found; j++) - if (space[j].addr + space[j].size == bank[i].addr) { - space[j].size += bank[i].size; - found = 1; - } - /* There is either hole or overlapping memory */ - if (!found) - printk(KERN_CRIT "MC: Memory configuration mismatch " - "(%08lx), expect Bus Error soon\n", - bank[i].addr); + if (addr >= SGIMC_SEG1_BADDR) + memblock_add(addr, size); } - - for (i = 0; i < ARRAY_SIZE(space); i++) - if (space[i].size) - add_memory_region(space[i].addr, space[i].size, - BOOT_MEM_RAM); } +#endif void __init sgimc_init(void) { @@ -205,10 +172,9 @@ void __init sgimc_init(void) probe_memory(); } -void __init prom_meminit(void) {} -void __init prom_free_prom_memory(void) -{ #ifdef CONFIG_SGI_IP28 +void __init prom_cleanup(void) +{ u32 mconfig1; unsigned long flags; spinlock_t lock; @@ -233,5 +199,5 @@ void __init prom_free_prom_memory(void) sgimc->mconfig1 = mconfig1; iob(); spin_unlock_irqrestore(&lock, flags); -#endif } +#endif diff --git a/arch/mips/sgi-ip27/Kconfig b/arch/mips/sgi-ip27/Kconfig index ef3847e7aee0..e5b6cadbec85 100644 --- a/arch/mips/sgi-ip27/Kconfig +++ b/arch/mips/sgi-ip27/Kconfig @@ -38,10 +38,3 @@ config REPLICATE_KTEXT Say Y here to enable replicating the kernel text across multiple nodes in a NUMA cluster. This trades memory for speed. -config REPLICATE_EXHANDLERS - bool "Exception handler replication support" - depends on SGI_IP27 - help - Say Y here to enable replicating the kernel exception handlers - across multiple nodes in a NUMA cluster. This trades memory for - speed. diff --git a/arch/mips/sgi-ip27/ip27-common.h b/arch/mips/sgi-ip27/ip27-common.h new file mode 100644 index 000000000000..3ffbcf9bfd41 --- /dev/null +++ b/arch/mips/sgi-ip27/ip27-common.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __IP27_COMMON_H +#define __IP27_COMMON_H + +extern void ip27_reboot_setup(void); +extern void hub_rt_clock_event_init(void); +extern const struct plat_smp_ops ip27_smp_ops; + +#endif /* __IP27_COMMON_H */ diff --git a/arch/mips/sgi-ip27/ip27-hubio.c b/arch/mips/sgi-ip27/ip27-hubio.c index 6ebb8845a77c..a538d0ceb61d 100644 --- a/arch/mips/sgi-ip27/ip27-hubio.c +++ b/arch/mips/sgi-ip27/ip27-hubio.c @@ -25,10 +25,9 @@ static int force_fire_and_forget = 1; * @size: size of the PIO mapping * **/ -unsigned long hub_pio_map(cnodeid_t cnode, xwidgetnum_t widget, +unsigned long hub_pio_map(nasid_t nasid, xwidgetnum_t widget, unsigned long xtalk_addr, size_t size) { - nasid_t nasid = COMPACT_TO_NASID_NODEID(cnode); unsigned i; /* use small-window mapping if possible */ @@ -44,7 +43,7 @@ unsigned long hub_pio_map(cnodeid_t cnode, xwidgetnum_t widget, xtalk_addr &= ~(BWIN_SIZE-1); for (i = 0; i < HUB_NUM_BIG_WINDOW; i++) { - if (test_and_set_bit(i, hub_data(cnode)->h_bigwin_used)) + if (test_and_set_bit(i, hub_data(nasid)->h_bigwin_used)) continue; /* @@ -171,13 +170,12 @@ static void hub_set_piomode(nasid_t nasid) * * @hub: hubinfo structure for our hub */ -void hub_pio_init(cnodeid_t cnode) +void hub_pio_init(nasid_t nasid) { - nasid_t nasid = COMPACT_TO_NASID_NODEID(cnode); unsigned i; /* initialize big window piomaps for this hub */ - bitmap_zero(hub_data(cnode)->h_bigwin_used, HUB_NUM_BIG_WINDOW); + bitmap_zero(hub_data(nasid)->h_bigwin_used, HUB_NUM_BIG_WINDOW); for (i = 0; i < HUB_NUM_BIG_WINDOW; i++) IIO_ITTE_DISABLE(nasid, i); diff --git a/arch/mips/sgi-ip27/ip27-init.c b/arch/mips/sgi-ip27/ip27-init.c index 59d5375c9021..f597e1ee2df7 100644 --- a/arch/mips/sgi-ip27/ip27-init.c +++ b/arch/mips/sgi-ip27/ip27-init.c @@ -13,9 +13,11 @@ #include <linux/mm.h> #include <linux/export.h> #include <linux/cpumask.h> +#include <asm/bootinfo.h> #include <asm/cpu.h> #include <asm/io.h> #include <asm/pgtable.h> +#include <asm/sgialib.h> #include <asm/time.h> #include <asm/sn/types.h> #include <asm/sn/sn0/addrs.h> @@ -36,30 +38,23 @@ #include <asm/sn/sn0/ip27.h> #include <asm/sn/mapped_kernel.h> +#include "ip27-common.h" + #define CPU_NONE (cpuid_t)-1 -static DECLARE_BITMAP(hub_init_mask, MAX_COMPACT_NODES); +static DECLARE_BITMAP(hub_init_mask, MAX_NUMNODES); nasid_t master_nasid = INVALID_NASID; -cnodeid_t nasid_to_compact_node[MAX_NASIDS]; -nasid_t compact_to_nasid_node[MAX_COMPACT_NODES]; -cnodeid_t cpuid_to_compact_node[MAXCPUS]; - -EXPORT_SYMBOL(nasid_to_compact_node); - struct cpuinfo_ip27 sn_cpu_info[NR_CPUS]; EXPORT_SYMBOL_GPL(sn_cpu_info); -extern void pcibr_setup(cnodeid_t); - -static void per_hub_init(cnodeid_t cnode) +static void per_hub_init(nasid_t nasid) { - struct hub_data *hub = hub_data(cnode); - nasid_t nasid = COMPACT_TO_NASID_NODEID(cnode); + struct hub_data *hub = hub_data(nasid); cpumask_set_cpu(smp_processor_id(), &hub->h_cpus); - if (test_and_set_bit(cnode, hub_init_mask)) + if (test_and_set_bit(nasid, hub_init_mask)) return; /* * Set CRB timeout at 5ms, (< PI timeout of 10ms) @@ -67,40 +62,31 @@ static void per_hub_init(cnodeid_t cnode) REMOTE_HUB_S(nasid, IIO_ICTP, 0x800); REMOTE_HUB_S(nasid, IIO_ICTO, 0xff); - hub_rtc_init(cnode); + hub_rtc_init(nasid); -#ifdef CONFIG_REPLICATE_EXHANDLERS - /* - * If this is not a headless node initialization, - * copy over the caliased exception handlers. - */ - if (get_compact_nodeid() == cnode) { - extern char except_vec2_generic, except_vec3_generic; - extern void build_tlb_refill_handler(void); - - memcpy((void *)(CKSEG0 + 0x100), &except_vec2_generic, 0x80); - memcpy((void *)(CKSEG0 + 0x180), &except_vec3_generic, 0x80); - build_tlb_refill_handler(); - memcpy((void *)(CKSEG0 + 0x100), (void *) CKSEG0, 0x80); - memcpy((void *)(CKSEG0 + 0x180), &except_vec3_generic, 0x100); + if (nasid) { + /* copy exception handlers from first node to current node */ + memcpy((void *)NODE_OFFSET_TO_K0(nasid, 0), + (void *)CKSEG0, 0x200); __flush_cache_all(); + /* switch to node local exception handlers */ + REMOTE_HUB_S(nasid, PI_CALIAS_SIZE, PI_CALIAS_SIZE_8K); } -#endif } void per_cpu_init(void) { int cpu = smp_processor_id(); int slice = LOCAL_HUB_L(PI_CPU_NUM); - cnodeid_t cnode = get_compact_nodeid(); - struct hub_data *hub = hub_data(cnode); + nasid_t nasid = get_nasid(); + struct hub_data *hub = hub_data(nasid); if (test_and_set_bit(slice, &hub->slice_map)) return; clear_c0_status(ST0_IM); - per_hub_init(cnode); + per_hub_init(nasid); cpu_time_init(); install_ipi(); @@ -122,21 +108,13 @@ get_nasid(void) >> NSRI_NODEID_SHFT); } -/* - * Map the physical node id to a virtual node id (virtual node ids are contiguous). - */ -cnodeid_t get_compact_nodeid(void) -{ - return NASID_TO_COMPACT_NODEID(get_nasid()); -} - -extern void ip27_reboot_setup(void); - void __init plat_mem_setup(void) { u64 p, e, n_mode; nasid_t nid; + register_smp_ops(&ip27_smp_ops); + ip27_reboot_setup(); /* @@ -175,3 +153,15 @@ void __init plat_mem_setup(void) ioport_resource.end = ~0UL; set_io_port_base(IO_BASE); } + +const char *get_system_type(void) +{ + return "SGI Origin"; +} + +void __init prom_init(void) +{ + prom_init_cmdline(fw_arg0, (LONG *)fw_arg1); + prom_meminit(); +} + diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c index 37be04975831..c72ae330ea93 100644 --- a/arch/mips/sgi-ip27/ip27-irq.c +++ b/arch/mips/sgi-ip27/ip27-irq.c @@ -73,7 +73,10 @@ static void setup_hub_mask(struct hub_irq_data *hd, const struct cpumask *mask) int cpu; cpu = cpumask_first_and(mask, cpu_online_mask); - nasid = COMPACT_TO_NASID_NODEID(cpu_to_node(cpu)); + if (cpu >= nr_cpu_ids) + cpu = cpumask_any(cpu_online_mask); + + nasid = cpu_to_node(cpu); hd->cpu = cpu; if (!cputoslice(cpu)) { hd->irq_mask[0] = REMOTE_HUB_PTR(nasid, PI_INT_MASK0_A); @@ -137,8 +140,9 @@ static int hub_domain_alloc(struct irq_domain *domain, unsigned int virq, handle_level_irq, NULL, NULL); /* use CPU connected to nearest hub */ - hub = hub_data(NASID_TO_COMPACT_NODEID(info->nasid)); + hub = hub_data(info->nasid); setup_hub_mask(hd, &hub->h_cpus); + info->nasid = cpu_to_node(hd->cpu); /* Make sure it's not already pending when we connect it. */ REMOTE_HUB_CLR_INTR(info->nasid, swlevel); diff --git a/arch/mips/sgi-ip27/ip27-klconfig.c b/arch/mips/sgi-ip27/ip27-klconfig.c index 41171ff0c75e..6cb2160e7689 100644 --- a/arch/mips/sgi-ip27/ip27-klconfig.c +++ b/arch/mips/sgi-ip27/ip27-klconfig.c @@ -73,11 +73,6 @@ lboard_t *find_lboard_class(lboard_t *start, unsigned char brd_type) return (lboard_t *)NULL; } -cnodeid_t get_cpu_cnode(cpuid_t cpu) -{ - return CPUID_TO_COMPACT_NODEID(cpu); -} - klcpu_t *nasid_slice_to_cpuinfo(nasid_t nasid, int slice) { lboard_t *brd; @@ -102,19 +97,14 @@ klcpu_t *sn_get_cpuinfo(cpuid_t cpu) nasid_t nasid; int slice; klcpu_t *acpu; - gda_t *gdap = GDA; - cnodeid_t cnode; if (!(cpu < MAXCPUS)) { printk("sn_get_cpuinfo: illegal cpuid 0x%lx\n", cpu); return NULL; } - cnode = get_cpu_cnode(cpu); - if (cnode == INVALID_CNODEID) - return NULL; - - if ((nasid = gdap->g_nasidtable[cnode]) == INVALID_NASID) + nasid = cputonasid(cpu); + if (nasid == INVALID_NASID) return NULL; for (slice = 0; slice < CPUS_PER_NODE; slice++) { diff --git a/arch/mips/sgi-ip27/ip27-klnuma.c b/arch/mips/sgi-ip27/ip27-klnuma.c index a4f01328de62..ee1c6ff4aa00 100644 --- a/arch/mips/sgi-ip27/ip27-klnuma.c +++ b/arch/mips/sgi-ip27/ip27-klnuma.c @@ -38,13 +38,13 @@ void __init setup_replication_mask(void) #error Kernel replication works with mapped kernel support. No calias support. #endif { - cnodeid_t cnode; + nasid_t nasid; - for_each_online_node(cnode) { - if (cnode == 0) + for_each_online_node(nasid) { + if (nasid == 0) continue; /* Advertise that we have a copy of the kernel */ - cpumask_set_cpu(cnode, &ktext_repmask); + cpumask_set_cpu(nasid, &ktext_repmask); } } #endif @@ -85,7 +85,6 @@ static __init void copy_kernel(nasid_t dest_nasid) void __init replicate_kernel_text(void) { - cnodeid_t cnode; nasid_t client_nasid; nasid_t server_nasid; @@ -94,13 +93,12 @@ void __init replicate_kernel_text(void) /* Record where the master node should get its kernel text */ set_ktext_source(master_nasid, master_nasid); - for_each_online_node(cnode) { - if (cnode == 0) + for_each_online_node(client_nasid) { + if (client_nasid == 0) continue; - client_nasid = COMPACT_TO_NASID_NODEID(cnode); /* Check if this node should get a copy of the kernel */ - if (cpumask_test_cpu(cnode, &ktext_repmask)) { + if (cpumask_test_cpu(client_nasid, &ktext_repmask)) { server_nasid = client_nasid; copy_kernel(server_nasid); } @@ -115,17 +113,16 @@ void __init replicate_kernel_text(void) * data structures on the first couple of pages of the first slot of each * node. If this is the case, getfirstfree(node) > getslotstart(node, 0). */ -unsigned long node_getfirstfree(cnodeid_t cnode) +unsigned long node_getfirstfree(nasid_t nasid) { unsigned long loadbase = REP_BASE; - nasid_t nasid = COMPACT_TO_NASID_NODEID(cnode); unsigned long offset; #ifdef CONFIG_MAPPED_KERNEL loadbase += 16777216; #endif offset = PAGE_ALIGN((unsigned long)(&_end)) - loadbase; - if ((cnode == 0) || (cpumask_test_cpu(cnode, &ktext_repmask))) + if ((nasid == 0) || (cpumask_test_cpu(nasid, &ktext_repmask))) return TO_NODE(nasid, offset) >> PAGE_SHIFT; else return KDM_TO_PHYS(PAGE_ALIGN(SYMMON_STK_ADDR(nasid, 0))) >> PAGE_SHIFT; diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index fb077a947575..563aad5e6398 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -33,7 +33,7 @@ #define SLOT_PFNSHIFT (SLOT_SHIFT - PAGE_SHIFT) #define PFN_NASIDSHFT (NASID_SHFT - PAGE_SHIFT) -struct node_data *__node_data[MAX_COMPACT_NODES]; +struct node_data *__node_data[MAX_NUMNODES]; EXPORT_SYMBOL(__node_data); @@ -44,23 +44,23 @@ static int is_fine_dirmode(void) return ((LOCAL_HUB_L(NI_STATUS_REV_ID) & NSRI_REGIONSIZE_MASK) >> NSRI_REGIONSIZE_SHFT) & REGIONSIZE_FINE; } -static u64 get_region(cnodeid_t cnode) +static u64 get_region(nasid_t nasid) { if (fine_mode) - return COMPACT_TO_NASID_NODEID(cnode) >> NASID_TO_FINEREG_SHFT; + return nasid >> NASID_TO_FINEREG_SHFT; else - return COMPACT_TO_NASID_NODEID(cnode) >> NASID_TO_COARSEREG_SHFT; + return nasid >> NASID_TO_COARSEREG_SHFT; } static u64 region_mask; static void gen_region_mask(u64 *region_mask) { - cnodeid_t cnode; + nasid_t nasid; (*region_mask) = 0; - for_each_online_node(cnode) { - (*region_mask) |= 1ULL << get_region(cnode); + for_each_online_node(nasid) { + (*region_mask) |= 1ULL << get_region(nasid); } } @@ -104,23 +104,18 @@ static void router_recurse(klrou_t *router_a, klrou_t *router_b, int depth) router_a->rou_rflag = 0; } -unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES]; +unsigned char __node_distances[MAX_NUMNODES][MAX_NUMNODES]; EXPORT_SYMBOL(__node_distances); static int __init compute_node_distance(nasid_t nasid_a, nasid_t nasid_b) { klrou_t *router, *router_a = NULL, *router_b = NULL; lboard_t *brd, *dest_brd; - cnodeid_t cnode; nasid_t nasid; int port; /* Figure out which routers nodes in question are connected to */ - for_each_online_node(cnode) { - nasid = COMPACT_TO_NASID_NODEID(cnode); - - if (nasid == -1) continue; - + for_each_online_node(nasid) { brd = find_lboard_class((lboard_t *)KL_CONFIG_INFO(nasid), KLTYPE_ROUTER); @@ -176,19 +171,16 @@ static int __init compute_node_distance(nasid_t nasid_a, nasid_t nasid_b) static void __init init_topology_matrix(void) { - nasid_t nasid, nasid2; - cnodeid_t row, col; + nasid_t row, col; - for (row = 0; row < MAX_COMPACT_NODES; row++) - for (col = 0; col < MAX_COMPACT_NODES; col++) + for (row = 0; row < MAX_NUMNODES; row++) + for (col = 0; col < MAX_NUMNODES; col++) __node_distances[row][col] = -1; for_each_online_node(row) { - nasid = COMPACT_TO_NASID_NODEID(row); for_each_online_node(col) { - nasid2 = COMPACT_TO_NASID_NODEID(col); __node_distances[row][col] = - compute_node_distance(nasid, nasid2); + compute_node_distance(row, col); } } } @@ -196,12 +188,11 @@ static void __init init_topology_matrix(void) static void __init dump_topology(void) { nasid_t nasid; - cnodeid_t cnode; lboard_t *brd, *dest_brd; int port; int router_num = 0; klrou_t *router; - cnodeid_t row, col; + nasid_t row, col; pr_info("************** Topology ********************\n"); @@ -216,11 +207,7 @@ static void __init dump_topology(void) pr_cont("\n"); } - for_each_online_node(cnode) { - nasid = COMPACT_TO_NASID_NODEID(cnode); - - if (nasid == -1) continue; - + for_each_online_node(nasid) { brd = find_lboard_class((lboard_t *)KL_CONFIG_INFO(nasid), KLTYPE_ROUTER); @@ -254,21 +241,17 @@ static void __init dump_topology(void) } } -static unsigned long __init slot_getbasepfn(cnodeid_t cnode, int slot) +static unsigned long __init slot_getbasepfn(nasid_t nasid, int slot) { - nasid_t nasid = COMPACT_TO_NASID_NODEID(cnode); - return ((unsigned long)nasid << PFN_NASIDSHFT) | (slot << SLOT_PFNSHIFT); } -static unsigned long __init slot_psize_compute(cnodeid_t node, int slot) +static unsigned long __init slot_psize_compute(nasid_t nasid, int slot) { - nasid_t nasid; lboard_t *brd; klmembnk_t *banks; unsigned long size; - nasid = COMPACT_TO_NASID_NODEID(node); /* Find the node board */ brd = find_lboard((lboard_t *)KL_CONFIG_INFO(nasid), KLTYPE_IP27); if (!brd) @@ -298,7 +281,7 @@ static unsigned long __init slot_psize_compute(cnodeid_t node, int slot) static void __init mlreset(void) { - int i; + nasid_t nasid; master_nasid = get_nasid(); fine_mode = is_fine_dirmode(); @@ -321,22 +304,14 @@ static void __init mlreset(void) /* * Set all nodes' calias sizes to 8k */ - for_each_online_node(i) { - nasid_t nasid; - - nasid = COMPACT_TO_NASID_NODEID(i); - + for_each_online_node(nasid) { /* * Always have node 0 in the region mask, otherwise * CALIAS accesses get exceptions since the hub * thinks it is a node 0 address. */ REMOTE_HUB_S(nasid, PI_REGION_PRESENT, (region_mask | 1)); -#ifdef CONFIG_REPLICATE_EXHANDLERS - REMOTE_HUB_S(nasid, PI_CALIAS_SIZE, PI_CALIAS_SIZE_8K); -#else REMOTE_HUB_S(nasid, PI_CALIAS_SIZE, PI_CALIAS_SIZE_0); -#endif #ifdef LATER /* @@ -354,7 +329,7 @@ static void __init szmem(void) { unsigned long slot_psize, slot0sz = 0, nodebytes; /* Hack to detect problem configs */ int slot; - cnodeid_t node; + nasid_t node; for_each_online_node(node) { nodebytes = 0; @@ -384,7 +359,7 @@ static void __init szmem(void) } } -static void __init node_mem_init(cnodeid_t node) +static void __init node_mem_init(nasid_t node) { unsigned long slot_firstpfn = slot_getbasepfn(node, 0); unsigned long slot_freepfn = node_getfirstfree(node); @@ -406,12 +381,8 @@ static void __init node_mem_init(cnodeid_t node) slot_freepfn += PFN_UP(sizeof(struct pglist_data) + sizeof(struct hub_data)); - free_bootmem_with_active_regions(node, end_pfn); - memblock_reserve(slot_firstpfn << PAGE_SHIFT, ((slot_freepfn - slot_firstpfn) << PAGE_SHIFT)); - - sparse_memory_present_with_active_regions(node); } /* @@ -431,19 +402,21 @@ static struct node_data null_node = { */ void __init prom_meminit(void) { - cnodeid_t node; + nasid_t node; mlreset(); szmem(); max_low_pfn = PHYS_PFN(memblock_end_of_DRAM()); - for (node = 0; node < MAX_COMPACT_NODES; node++) { + for (node = 0; node < MAX_NUMNODES; node++) { if (node_online(node)) { node_mem_init(node); continue; } __node_data[node] = &null_node; } + + memblocks_present(); } void __init prom_free_prom_memory(void) diff --git a/arch/mips/sgi-ip27/ip27-nmi.c b/arch/mips/sgi-ip27/ip27-nmi.c index 3aae388561d9..daf3670d94e7 100644 --- a/arch/mips/sgi-ip27/ip27-nmi.c +++ b/arch/mips/sgi-ip27/ip27-nmi.c @@ -17,8 +17,6 @@ #define NODE_NUM_CPUS(n) CPUS_PER_NODE #endif -#define CNODEID_NONE (cnodeid_t)-1 - typedef unsigned long machreg_t; static arch_spinlock_t nmi_lock = __ARCH_SPIN_LOCK_UNLOCKED; @@ -152,16 +150,10 @@ void nmi_dump_hub_irq(nasid_t nasid, int slice) * Copy the cpu registers which have been saved in the IP27prom format * into the eframe format for the node under consideration. */ -void nmi_node_eframe_save(cnodeid_t cnode) +void nmi_node_eframe_save(nasid_t nasid) { - nasid_t nasid; int slice; - /* Make sure that we have a valid node */ - if (cnode == CNODEID_NONE) - return; - - nasid = COMPACT_TO_NASID_NODEID(cnode); if (nasid == INVALID_NASID) return; @@ -178,10 +170,10 @@ void nmi_node_eframe_save(cnodeid_t cnode) void nmi_eframes_save(void) { - cnodeid_t cnode; + nasid_t nasid; - for_each_online_node(cnode) - nmi_node_eframe_save(cnode); + for_each_online_node(nasid) + nmi_node_eframe_save(nasid); } void diff --git a/arch/mips/sgi-ip27/ip27-reset.c b/arch/mips/sgi-ip27/ip27-reset.c index e44a15d4f573..74d078247e49 100644 --- a/arch/mips/sgi-ip27/ip27-reset.c +++ b/arch/mips/sgi-ip27/ip27-reset.c @@ -26,6 +26,8 @@ #include <asm/sn/gda.h> #include <asm/sn/sn0/hub.h> +#include "ip27-common.h" + void machine_restart(char *command) __noreturn; void machine_halt(void) __noreturn; void machine_power_off(void) __noreturn; @@ -45,8 +47,7 @@ static void ip27_machine_restart(char *command) #endif #if 0 for_each_online_node(i) - REMOTE_HUB_S(COMPACT_TO_NASID_NODEID(i), PROMOP_REG, - PROMOP_REBOOT); + REMOTE_HUB_S(i, PROMOP_REG, PROMOP_REBOOT); #else LOCAL_HUB_S(NI_PORT_RESET, NPR_PORTRESET | NPR_LOCALRESET); #endif @@ -61,8 +62,7 @@ static void ip27_machine_halt(void) smp_send_stop(); #endif for_each_online_node(i) - REMOTE_HUB_S(COMPACT_TO_NASID_NODEID(i), PROMOP_REG, - PROMOP_RESTART); + REMOTE_HUB_S(i, PROMOP_REG, PROMOP_RESTART); LOCAL_HUB_S(NI_PORT_RESET, NPR_PORTRESET | NPR_LOCALRESET); noreturn; } diff --git a/arch/mips/sgi-ip27/ip27-smp.c b/arch/mips/sgi-ip27/ip27-smp.c index 20b81209c6b8..faa0244c8b0c 100644 --- a/arch/mips/sgi-ip27/ip27-smp.c +++ b/arch/mips/sgi-ip27/ip27-smp.c @@ -27,38 +27,19 @@ #include <asm/sn/sn0/hubio.h> #include <asm/sn/sn0/ip27.h> +#include "ip27-common.h" + /* * Takes as first input the PROM assigned cpu id, and the kernel * assigned cpu id as the second. */ -static void alloc_cpupda(cpuid_t cpu, int cpunum) +static void alloc_cpupda(nasid_t nasid, cpuid_t cpu, int cpunum) { - cnodeid_t node = get_cpu_cnode(cpu); - nasid_t nasid = COMPACT_TO_NASID_NODEID(node); - cputonasid(cpunum) = nasid; - sn_cpu_info[cpunum].p_nodeid = node; cputoslice(cpunum) = get_cpu_slice(cpu); } -static nasid_t get_actual_nasid(lboard_t *brd) -{ - klhub_t *hub; - - if (!brd) - return INVALID_NASID; - - /* find out if we are a completely disabled brd. */ - hub = (klhub_t *)find_first_component(brd, KLSTRUCT_HUB); - if (!hub) - return INVALID_NASID; - if (!(hub->hub_info.flags & KLINFO_ENABLE)) /* disabled node brd */ - return hub->hub_info.physid; - else - return brd->brd_nasid; -} - -static int do_cpumask(cnodeid_t cnode, nasid_t nasid, int highest) +static int do_cpumask(nasid_t nasid, int highest) { static int tot_cpus_found = 0; lboard_t *brd; @@ -72,16 +53,13 @@ static int do_cpumask(cnodeid_t cnode, nasid_t nasid, int highest) acpu = (klcpu_t *)find_first_component(brd, KLSTRUCT_CPU); while (acpu) { cpuid = acpu->cpu_info.virtid; - /* cnode is not valid for completely disabled brds */ - if (get_actual_nasid(brd) == brd->brd_nasid) - cpuid_to_compact_node[cpuid] = cnode; - if (cpuid > highest) - highest = cpuid; /* Only let it join in if it's marked enabled */ if ((acpu->cpu_info.flags & KLINFO_ENABLE) && (tot_cpus_found != NR_CPUS)) { + if (cpuid > highest) + highest = cpuid; set_cpu_possible(cpuid, true); - alloc_cpupda(cpuid, tot_cpus_found); + alloc_cpupda(nasid, cpuid, tot_cpus_found); cpus_found++; tot_cpus_found++; } @@ -103,29 +81,13 @@ void cpu_node_probe(void) int i, highest = 0; gda_t *gdap = GDA; - /* - * Initialize the arrays to invalid nodeid (-1) - */ - for (i = 0; i < MAX_COMPACT_NODES; i++) - compact_to_nasid_node[i] = INVALID_NASID; - for (i = 0; i < MAX_NASIDS; i++) - nasid_to_compact_node[i] = INVALID_CNODEID; - for (i = 0; i < MAXCPUS; i++) - cpuid_to_compact_node[i] = INVALID_CNODEID; - - /* - * MCD - this whole "compact node" stuff can probably be dropped, - * as we can handle sparse numbering now - */ nodes_clear(node_online_map); - for (i = 0; i < MAX_COMPACT_NODES; i++) { + for (i = 0; i < MAX_NUMNODES; i++) { nasid_t nasid = gdap->g_nasidtable[i]; if (nasid == INVALID_NASID) break; - compact_to_nasid_node[i] = nasid; - nasid_to_compact_node[nasid] = i; - node_set_online(num_online_nodes()); - highest = do_cpumask(i, nasid, highest); + node_set_online(nasid); + highest = do_cpumask(nasid, highest); } printk("Discovered %d cpus on %d nodes\n", highest + 1, num_online_nodes()); @@ -162,11 +124,10 @@ static void ip27_send_ipi_single(int destid, unsigned int action) irq += cputoslice(destid); /* - * Convert the compact hub number to the NASID to get the correct - * part of the address space. Then set the interrupt bit associated - * with the CPU we want to send the interrupt to. + * Set the interrupt bit associated with the CPU we want to + * send the interrupt to. */ - REMOTE_HUB_SEND_INTR(COMPACT_TO_NASID_NODEID(cpu_to_node(destid)), irq); + REMOTE_HUB_SEND_INTR(cpu_to_node(destid), irq); } static void ip27_send_ipi_mask(const struct cpumask *mask, unsigned int action) @@ -184,8 +145,6 @@ static void ip27_init_cpu(void) static void ip27_smp_finish(void) { - extern void hub_rt_clock_event_init(void); - hub_rt_clock_event_init(); local_irq_enable(); } @@ -208,23 +167,20 @@ static int ip27_boot_secondary(int cpu, struct task_struct *idle) static void __init ip27_smp_setup(void) { - cnodeid_t cnode; + nasid_t nasid; - for_each_online_node(cnode) { - if (cnode == 0) + for_each_online_node(nasid) { + if (nasid == 0) continue; - intr_clear_all(COMPACT_TO_NASID_NODEID(cnode)); + intr_clear_all(nasid); } replicate_kernel_text(); /* - * Assumption to be fixed: we're always booted on logical / physical - * processor 0. While we're always running on logical processor 0 - * this still means this is physical processor zero; it might for - * example be disabled in the firmware. + * PROM sets up system, that boot cpu is always first CPU on nasid 0 */ - alloc_cpupda(0, 0); + alloc_cpupda(0, 0, 0); } static void __init ip27_prepare_cpus(unsigned int max_cpus) diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c index 9b4b9ac621a3..17302bbfa7a6 100644 --- a/arch/mips/sgi-ip27/ip27-timer.c +++ b/arch/mips/sgi-ip27/ip27-timer.c @@ -38,6 +38,8 @@ #include <asm/sn/sn0/hubio.h> #include <asm/pci/bridge.h> +#include "ip27-common.h" + static int rt_next_event(unsigned long delta, struct clock_event_device *evt) { unsigned int cpu = smp_processor_id(); @@ -170,7 +172,7 @@ void cpu_time_init(void) printk("CPU %d clock is %dMHz.\n", smp_processor_id(), cpu->cpu_speed); } -void hub_rtc_init(cnodeid_t cnode) +void hub_rtc_init(nasid_t nasid) { /* @@ -178,7 +180,7 @@ void hub_rtc_init(cnodeid_t cnode) * If this is not the current node then it is a cpuless * node and timeouts will not happen there. */ - if (get_compact_nodeid() == cnode) { + if (get_nasid() == nasid) { LOCAL_HUB_S(PI_RT_EN_A, 1); LOCAL_HUB_S(PI_RT_EN_B, 1); LOCAL_HUB_S(PI_PROF_EN_A, 0); diff --git a/arch/mips/sgi-ip27/ip27-xtalk.c b/arch/mips/sgi-ip27/ip27-xtalk.c index 4a1f0b0c29e2..5218b900f855 100644 --- a/arch/mips/sgi-ip27/ip27-xtalk.c +++ b/arch/mips/sgi-ip27/ip27-xtalk.c @@ -10,6 +10,7 @@ #include <linux/kernel.h> #include <linux/smp.h> #include <linux/platform_device.h> +#include <linux/platform_data/sgi-w1.h> #include <linux/platform_data/xtalk-bridge.h> #include <asm/sn/addrs.h> #include <asm/sn/types.h> @@ -26,9 +27,35 @@ static void bridge_platform_create(nasid_t nasid, int widget, int masterwid) { struct xtalk_bridge_platform_data *bd; + struct sgi_w1_platform_data *wd; struct platform_device *pdev; + struct resource w1_res; unsigned long offset; + offset = NODE_OFFSET(nasid); + + wd = kzalloc(sizeof(*wd), GFP_KERNEL); + if (!wd) + goto no_mem; + + snprintf(wd->dev_id, sizeof(wd->dev_id), "bridge-%012lx", + offset + (widget << SWIN_SIZE_BITS)); + + memset(&w1_res, 0, sizeof(w1_res)); + w1_res.start = offset + (widget << SWIN_SIZE_BITS) + + offsetof(struct bridge_regs, b_nic); + w1_res.end = w1_res.start + 3; + w1_res.flags = IORESOURCE_MEM; + + pdev = platform_device_alloc("sgi_w1", PLATFORM_DEVID_AUTO); + if (!pdev) { + kfree(wd); + goto no_mem; + } + platform_device_add_resources(pdev, &w1_res, 1); + platform_device_add_data(pdev, wd, sizeof(*wd)); + platform_device_add(pdev); + bd = kzalloc(sizeof(*bd), GFP_KERNEL); if (!bd) goto no_mem; @@ -38,7 +65,6 @@ static void bridge_platform_create(nasid_t nasid, int widget, int masterwid) goto no_mem; } - offset = NODE_OFFSET(nasid); bd->bridge_addr = RAW_NODE_SWIN_BASE(nasid, widget); bd->intr_addr = BIT_ULL(47) + 0x01800000 + PI_INT_PEND_MOD; @@ -46,14 +72,14 @@ static void bridge_platform_create(nasid_t nasid, int widget, int masterwid) bd->masterwid = masterwid; bd->mem.name = "Bridge PCI MEM"; - bd->mem.start = offset + (widget << SWIN_SIZE_BITS); - bd->mem.end = bd->mem.start + SWIN_SIZE - 1; + bd->mem.start = offset + (widget << SWIN_SIZE_BITS) + BRIDGE_DEVIO0; + bd->mem.end = offset + (widget << SWIN_SIZE_BITS) + SWIN_SIZE - 1; bd->mem.flags = IORESOURCE_MEM; bd->mem_offset = offset; bd->io.name = "Bridge PCI IO"; - bd->io.start = offset + (widget << SWIN_SIZE_BITS); - bd->io.end = bd->io.start + SWIN_SIZE - 1; + bd->io.start = offset + (widget << SWIN_SIZE_BITS) + BRIDGE_DEVIO0; + bd->io.end = offset + (widget << SWIN_SIZE_BITS) + SWIN_SIZE - 1; bd->io.flags = IORESOURCE_IO; bd->io_offset = offset; @@ -81,6 +107,8 @@ static int probe_one_port(nasid_t nasid, int widget, int masterwid) bridge_platform_create(nasid, widget, masterwid); break; default: + pr_info("xtalk:n%d/%d unknown widget (0x%x)\n", + nasid, widget, partnum); break; } @@ -138,14 +166,12 @@ static int xbow_probe(nasid_t nasid) return 0; } -static void xtalk_probe_node(cnodeid_t nid) +static void xtalk_probe_node(nasid_t nasid) { volatile u64 hubreg; - nasid_t nasid; xwidget_part_num_t partnum; widgetreg_t widget_id; - nasid = COMPACT_TO_NASID_NODEID(nid); hubreg = REMOTE_HUB_L(nasid, IIO_LLP_CSR); /* check whether the link is up */ @@ -173,10 +199,10 @@ static void xtalk_probe_node(cnodeid_t nid) static int __init xtalk_init(void) { - cnodeid_t cnode; + nasid_t nasid; - for_each_online_node(cnode) - xtalk_probe_node(cnode); + for_each_online_node(nasid) + xtalk_probe_node(nasid); return 0; } diff --git a/arch/mips/sgi-ip30/Makefile b/arch/mips/sgi-ip30/Makefile new file mode 100644 index 000000000000..18cf561b3d61 --- /dev/null +++ b/arch/mips/sgi-ip30/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the IP30 specific kernel interface routines under Linux. +# + +obj-y := ip30-irq.o ip30-power.o ip30-setup.o ip30-timer.o ip30-xtalk.o + +obj-$(CONFIG_EARLY_PRINTK) += ip30-console.o +obj-$(CONFIG_SMP) += ip30-smp.o diff --git a/arch/mips/sgi-ip30/Platform b/arch/mips/sgi-ip30/Platform new file mode 100644 index 000000000000..2b5695c2049a --- /dev/null +++ b/arch/mips/sgi-ip30/Platform @@ -0,0 +1,8 @@ +# +# SGI-IP30 (Octane/Octane2) +# +ifdef CONFIG_SGI_IP30 +platform-$(CONFIG_SGI_IP30) += sgi-ip30/ +cflags-$(CONFIG_SGI_IP30) += -I$(srctree)/arch/mips/include/asm/mach-ip30 +load-$(CONFIG_SGI_IP30) += 0xa800000020004000 +endif diff --git a/arch/mips/sgi-ip30/ip30-common.h b/arch/mips/sgi-ip30/ip30-common.h new file mode 100644 index 000000000000..d2bcaee712f3 --- /dev/null +++ b/arch/mips/sgi-ip30/ip30-common.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __IP30_COMMON_H +#define __IP30_COMMON_H + +extern struct plat_smp_ops ip30_smp_ops; +extern void __init ip30_per_cpu_init(void); + +#endif /* __IP30_COMMON_H */ diff --git a/arch/mips/sgi-ip30/ip30-console.c b/arch/mips/sgi-ip30/ip30-console.c new file mode 100644 index 000000000000..b91f8c4fdc78 --- /dev/null +++ b/arch/mips/sgi-ip30/ip30-console.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/io.h> + +#include <asm/sn/ioc3.h> + +static inline struct ioc3_uartregs *console_uart(void) +{ + struct ioc3 *ioc3; + + ioc3 = (struct ioc3 *)((void *)(0x900000001f600000)); + return &ioc3->sregs.uarta; +} + +void prom_putchar(char c) +{ + struct ioc3_uartregs *uart = console_uart(); + + while ((readb(&uart->iu_lsr) & 0x20) == 0) + cpu_relax(); + + writeb(c, &uart->iu_thr); +} diff --git a/arch/mips/sgi-ip30/ip30-irq.c b/arch/mips/sgi-ip30/ip30-irq.c new file mode 100644 index 000000000000..d46655b914f1 --- /dev/null +++ b/arch/mips/sgi-ip30/ip30-irq.c @@ -0,0 +1,328 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ip30-irq.c: Highlevel interrupt handling for IP30 architecture. + */ +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/percpu.h> +#include <linux/spinlock.h> +#include <linux/tick.h> +#include <linux/types.h> + +#include <asm/irq_cpu.h> +#include <asm/sgi/heart.h> + +struct heart_irq_data { + u64 *irq_mask; + int cpu; +}; + +static DECLARE_BITMAP(heart_irq_map, HEART_NUM_IRQS); + +static DEFINE_PER_CPU(unsigned long, irq_enable_mask); + +static inline int heart_alloc_int(void) +{ + int bit; + +again: + bit = find_first_zero_bit(heart_irq_map, HEART_NUM_IRQS); + if (bit >= HEART_NUM_IRQS) + return -ENOSPC; + + if (test_and_set_bit(bit, heart_irq_map)) + goto again; + + return bit; +} + +static void ip30_error_irq(struct irq_desc *desc) +{ + u64 pending, mask, cause, error_irqs, err_reg; + int cpu = smp_processor_id(); + int i; + + pending = heart_read(&heart_regs->isr); + mask = heart_read(&heart_regs->imr[cpu]); + cause = heart_read(&heart_regs->cause); + error_irqs = (pending & HEART_L4_INT_MASK & mask); + + /* Bail if there's nothing to process (how did we get here, then?) */ + if (unlikely(!error_irqs)) + return; + + /* Prevent any of the error IRQs from firing again. */ + heart_write(mask & ~(pending), &heart_regs->imr[cpu]); + + /* Ack all error IRQs. */ + heart_write(HEART_L4_INT_MASK, &heart_regs->clear_isr); + + /* + * If we also have a cause value, then something happened, so loop + * through the error IRQs and report a "heart attack" for each one + * and print the value of the HEART cause register. This is really + * primitive right now, but it should hopefully work until a more + * robust error handling routine can be put together. + * + * Refer to heart.h for the HC_* macros to work out the cause + * that got us here. + */ + if (cause) { + pr_alert("IP30: CPU%d: HEART ATTACK! ISR = 0x%.16llx, IMR = 0x%.16llx, CAUSE = 0x%.16llx\n", + cpu, pending, mask, cause); + + if (cause & HC_COR_MEM_ERR) { + err_reg = heart_read(&heart_regs->mem_err_addr); + pr_alert(" HEART_MEMERR_ADDR = 0x%.16llx\n", err_reg); + } + + /* i = 63; i >= 51; i-- */ + for (i = HEART_ERR_MASK_END; i >= HEART_ERR_MASK_START; i--) + if ((pending >> i) & 1) + pr_alert(" HEART Error IRQ #%d\n", i); + + /* XXX: Seems possible to loop forever here, so panic(). */ + panic("IP30: Fatal Error !\n"); + } + + /* Unmask the error IRQs. */ + heart_write(mask, &heart_regs->imr[cpu]); +} + +static void ip30_normal_irq(struct irq_desc *desc) +{ + int cpu = smp_processor_id(); + struct irq_domain *domain; + u64 pend, mask; + int irq; + + pend = heart_read(&heart_regs->isr); + mask = (heart_read(&heart_regs->imr[cpu]) & + (HEART_L0_INT_MASK | HEART_L1_INT_MASK | HEART_L2_INT_MASK)); + + pend &= mask; + if (unlikely(!pend)) + return; + +#ifdef CONFIG_SMP + if (pend & BIT_ULL(HEART_L2_INT_RESCHED_CPU_0)) { + heart_write(BIT_ULL(HEART_L2_INT_RESCHED_CPU_0), + &heart_regs->clear_isr); + scheduler_ipi(); + } else if (pend & BIT_ULL(HEART_L2_INT_RESCHED_CPU_1)) { + heart_write(BIT_ULL(HEART_L2_INT_RESCHED_CPU_1), + &heart_regs->clear_isr); + scheduler_ipi(); + } else if (pend & BIT_ULL(HEART_L2_INT_CALL_CPU_0)) { + heart_write(BIT_ULL(HEART_L2_INT_CALL_CPU_0), + &heart_regs->clear_isr); + generic_smp_call_function_interrupt(); + } else if (pend & BIT_ULL(HEART_L2_INT_CALL_CPU_1)) { + heart_write(BIT_ULL(HEART_L2_INT_CALL_CPU_1), + &heart_regs->clear_isr); + generic_smp_call_function_interrupt(); + } else +#endif + { + domain = irq_desc_get_handler_data(desc); + irq = irq_linear_revmap(domain, __ffs(pend)); + if (irq) + generic_handle_irq(irq); + else + spurious_interrupt(); + } +} + +static void ip30_ack_heart_irq(struct irq_data *d) +{ + heart_write(BIT_ULL(d->hwirq), &heart_regs->clear_isr); +} + +static void ip30_mask_heart_irq(struct irq_data *d) +{ + struct heart_irq_data *hd = irq_data_get_irq_chip_data(d); + unsigned long *mask = &per_cpu(irq_enable_mask, hd->cpu); + + clear_bit(d->hwirq, mask); + heart_write(*mask, &heart_regs->imr[hd->cpu]); +} + +static void ip30_mask_and_ack_heart_irq(struct irq_data *d) +{ + struct heart_irq_data *hd = irq_data_get_irq_chip_data(d); + unsigned long *mask = &per_cpu(irq_enable_mask, hd->cpu); + + clear_bit(d->hwirq, mask); + heart_write(*mask, &heart_regs->imr[hd->cpu]); + heart_write(BIT_ULL(d->hwirq), &heart_regs->clear_isr); +} + +static void ip30_unmask_heart_irq(struct irq_data *d) +{ + struct heart_irq_data *hd = irq_data_get_irq_chip_data(d); + unsigned long *mask = &per_cpu(irq_enable_mask, hd->cpu); + + set_bit(d->hwirq, mask); + heart_write(*mask, &heart_regs->imr[hd->cpu]); +} + +static int ip30_set_heart_irq_affinity(struct irq_data *d, + const struct cpumask *mask, bool force) +{ + struct heart_irq_data *hd = irq_data_get_irq_chip_data(d); + + if (!hd) + return -EINVAL; + + if (irqd_is_started(d)) + ip30_mask_and_ack_heart_irq(d); + + hd->cpu = cpumask_first_and(mask, cpu_online_mask); + + if (irqd_is_started(d)) + ip30_unmask_heart_irq(d); + + irq_data_update_effective_affinity(d, cpumask_of(hd->cpu)); + + return 0; +} + +static struct irq_chip heart_irq_chip = { + .name = "HEART", + .irq_ack = ip30_ack_heart_irq, + .irq_mask = ip30_mask_heart_irq, + .irq_mask_ack = ip30_mask_and_ack_heart_irq, + .irq_unmask = ip30_unmask_heart_irq, + .irq_set_affinity = ip30_set_heart_irq_affinity, +}; + +static int heart_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + struct irq_alloc_info *info = arg; + struct heart_irq_data *hd; + int hwirq; + + if (nr_irqs > 1 || !info) + return -EINVAL; + + hd = kzalloc(sizeof(*hd), GFP_KERNEL); + if (!hd) + return -ENOMEM; + + hwirq = heart_alloc_int(); + if (hwirq < 0) { + kfree(hd); + return -EAGAIN; + } + irq_domain_set_info(domain, virq, hwirq, &heart_irq_chip, hd, + handle_level_irq, NULL, NULL); + + return 0; +} + +static void heart_domain_free(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs) +{ + struct irq_data *irqd; + + if (nr_irqs > 1) + return; + + irqd = irq_domain_get_irq_data(domain, virq); + clear_bit(irqd->hwirq, heart_irq_map); + if (irqd && irqd->chip_data) + kfree(irqd->chip_data); +} + +static const struct irq_domain_ops heart_domain_ops = { + .alloc = heart_domain_alloc, + .free = heart_domain_free, +}; + +void __init ip30_install_ipi(void) +{ + int cpu = smp_processor_id(); + unsigned long *mask = &per_cpu(irq_enable_mask, cpu); + + set_bit(HEART_L2_INT_RESCHED_CPU_0 + cpu, mask); + heart_write(BIT_ULL(HEART_L2_INT_RESCHED_CPU_0 + cpu), + &heart_regs->clear_isr); + set_bit(HEART_L2_INT_CALL_CPU_0 + cpu, mask); + heart_write(BIT_ULL(HEART_L2_INT_CALL_CPU_0 + cpu), + &heart_regs->clear_isr); + + heart_write(*mask, &heart_regs->imr[cpu]); +} + +void __init arch_init_irq(void) +{ + struct irq_domain *domain; + struct fwnode_handle *fn; + unsigned long *mask; + int i; + + mips_cpu_irq_init(); + + /* Mask all IRQs. */ + heart_write(HEART_CLR_ALL_MASK, &heart_regs->imr[0]); + heart_write(HEART_CLR_ALL_MASK, &heart_regs->imr[1]); + heart_write(HEART_CLR_ALL_MASK, &heart_regs->imr[2]); + heart_write(HEART_CLR_ALL_MASK, &heart_regs->imr[3]); + + /* Ack everything. */ + heart_write(HEART_ACK_ALL_MASK, &heart_regs->clear_isr); + + /* Enable specific HEART error IRQs for each CPU. */ + mask = &per_cpu(irq_enable_mask, 0); + *mask |= HEART_CPU0_ERR_MASK; + heart_write(*mask, &heart_regs->imr[0]); + mask = &per_cpu(irq_enable_mask, 1); + *mask |= HEART_CPU1_ERR_MASK; + heart_write(*mask, &heart_regs->imr[1]); + + /* + * Some HEART bits are reserved by hardware or by software convention. + * Mark these as reserved right away so they won't be accidentally + * used later. + */ + set_bit(HEART_L0_INT_GENERIC, heart_irq_map); + set_bit(HEART_L0_INT_FLOW_CTRL_HWTR_0, heart_irq_map); + set_bit(HEART_L0_INT_FLOW_CTRL_HWTR_1, heart_irq_map); + set_bit(HEART_L2_INT_RESCHED_CPU_0, heart_irq_map); + set_bit(HEART_L2_INT_RESCHED_CPU_1, heart_irq_map); + set_bit(HEART_L2_INT_CALL_CPU_0, heart_irq_map); + set_bit(HEART_L2_INT_CALL_CPU_1, heart_irq_map); + set_bit(HEART_L3_INT_TIMER, heart_irq_map); + + /* Reserve the error interrupts (#51 to #63). */ + for (i = HEART_L4_INT_XWID_ERR_9; i <= HEART_L4_INT_HEART_EXCP; i++) + set_bit(i, heart_irq_map); + + fn = irq_domain_alloc_named_fwnode("HEART"); + WARN_ON(fn == NULL); + if (!fn) + return; + domain = irq_domain_create_linear(fn, HEART_NUM_IRQS, + &heart_domain_ops, NULL); + WARN_ON(domain == NULL); + if (!domain) + return; + + irq_set_default_host(domain); + + irq_set_percpu_devid(IP30_HEART_L0_IRQ); + irq_set_chained_handler_and_data(IP30_HEART_L0_IRQ, ip30_normal_irq, + domain); + irq_set_percpu_devid(IP30_HEART_L1_IRQ); + irq_set_chained_handler_and_data(IP30_HEART_L1_IRQ, ip30_normal_irq, + domain); + irq_set_percpu_devid(IP30_HEART_L2_IRQ); + irq_set_chained_handler_and_data(IP30_HEART_L2_IRQ, ip30_normal_irq, + domain); + irq_set_percpu_devid(IP30_HEART_ERR_IRQ); + irq_set_chained_handler_and_data(IP30_HEART_ERR_IRQ, ip30_error_irq, + domain); +} diff --git a/arch/mips/sgi-ip30/ip30-power.c b/arch/mips/sgi-ip30/ip30-power.c new file mode 100644 index 000000000000..120b3f3d5108 --- /dev/null +++ b/arch/mips/sgi-ip30/ip30-power.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ip30-power.c: Software powerdown and reset handling for IP30 architecture. + * + * Copyright (C) 2004-2007 Stanislaw Skowronek <skylark@unaligned.org> + * 2014 Joshua Kinard <kumba@gentoo.org> + * 2009 Johannes Dickgreber <tanzy@gmx.de> + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/notifier.h> +#include <linux/delay.h> +#include <linux/rtc/ds1685.h> +#include <linux/interrupt.h> +#include <linux/pm.h> + +#include <asm/reboot.h> +#include <asm/sgi/heart.h> + +static void __noreturn ip30_machine_restart(char *cmd) +{ + /* + * Execute HEART cold reset + * Yes, it's cold-HEARTed! + */ + heart_write((heart_read(&heart_regs->mode) | HM_COLD_RST), + &heart_regs->mode); + unreachable(); +} + +static int __init ip30_reboot_setup(void) +{ + _machine_restart = ip30_machine_restart; + + return 0; +} + +subsys_initcall(ip30_reboot_setup); diff --git a/arch/mips/sgi-ip30/ip30-setup.c b/arch/mips/sgi-ip30/ip30-setup.c new file mode 100644 index 000000000000..44b1607e964d --- /dev/null +++ b/arch/mips/sgi-ip30/ip30-setup.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * SGI IP30 miscellaneous setup bits. + * + * Copyright (C) 2004-2007 Stanislaw Skowronek <skylark@unaligned.org> + * 2007 Joshua Kinard <kumba@gentoo.org> + * 2009 Johannes Dickgreber <tanzy@gmx.de> + */ + +#include <linux/init.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/percpu.h> +#include <linux/memblock.h> + +#include <asm/smp-ops.h> +#include <asm/sgialib.h> +#include <asm/time.h> +#include <asm/sgi/heart.h> + +#include "ip30-common.h" + +/* Structure of accessible HEART registers located in XKPHYS space. */ +struct ip30_heart_regs __iomem *heart_regs = HEART_XKPHYS_BASE; + +/* + * ARCS will report up to the first 1GB of + * memory if queried. Anything beyond that + * is marked as reserved. + */ +#define IP30_MAX_PROM_MEMORY _AC(0x40000000, UL) + +/* + * Memory in the Octane starts at 512MB + */ +#define IP30_MEMORY_BASE _AC(0x20000000, UL) + +/* + * If using ARCS to probe for memory, then + * remaining memory will start at this offset. + */ +#define IP30_REAL_MEMORY_START (IP30_MEMORY_BASE + IP30_MAX_PROM_MEMORY) + +#define MEM_SHIFT(x) ((x) >> 20) + +static void __init ip30_mem_init(void) +{ + unsigned long total_mem; + phys_addr_t addr; + phys_addr_t size; + u32 memcfg; + int i; + + total_mem = 0; + for (i = 0; i < HEART_MEMORY_BANKS; i++) { + memcfg = __raw_readl(&heart_regs->mem_cfg.l[i]); + if (!(memcfg & HEART_MEMCFG_VALID)) + continue; + + addr = memcfg & HEART_MEMCFG_ADDR_MASK; + addr <<= HEART_MEMCFG_UNIT_SHIFT; + addr += IP30_MEMORY_BASE; + size = memcfg & HEART_MEMCFG_SIZE_MASK; + size >>= HEART_MEMCFG_SIZE_SHIFT; + size += 1; + size <<= HEART_MEMCFG_UNIT_SHIFT; + + total_mem += size; + + if (addr >= IP30_REAL_MEMORY_START) + memblock_free(addr, size); + else if ((addr + size) > IP30_REAL_MEMORY_START) + memblock_free(IP30_REAL_MEMORY_START, + size - IP30_MAX_PROM_MEMORY); + } + pr_info("Detected %luMB of physical memory.\n", MEM_SHIFT(total_mem)); +} + +/** + * ip30_cpu_time_init - platform time initialization. + */ +static void __init ip30_cpu_time_init(void) +{ + int cpu = smp_processor_id(); + u64 heart_compare; + unsigned int start, end; + int time_diff; + + heart_compare = (heart_read(&heart_regs->count) + + (HEART_CYCLES_PER_SEC / 10)); + start = read_c0_count(); + while ((heart_read(&heart_regs->count) - heart_compare) & 0x800000) + cpu_relax(); + + end = read_c0_count(); + time_diff = (int)end - (int)start; + mips_hpt_frequency = time_diff * 10; + pr_info("IP30: CPU%d: %d MHz CPU detected.\n", cpu, + (mips_hpt_frequency * 2) / 1000000); +} + +void __init ip30_per_cpu_init(void) +{ + /* Disable all interrupts. */ + clear_c0_status(ST0_IM); + + ip30_cpu_time_init(); +#ifdef CONFIG_SMP + ip30_install_ipi(); +#endif + + enable_percpu_irq(IP30_HEART_L0_IRQ, IRQ_TYPE_NONE); + enable_percpu_irq(IP30_HEART_L1_IRQ, IRQ_TYPE_NONE); + enable_percpu_irq(IP30_HEART_L2_IRQ, IRQ_TYPE_NONE); + enable_percpu_irq(IP30_HEART_ERR_IRQ, IRQ_TYPE_NONE); +} + +/** + * plat_mem_setup - despite the name, misc setup happens here. + */ +void __init plat_mem_setup(void) +{ + ip30_mem_init(); + + /* XXX: Hard lock on /sbin/init if this flag isn't specified. */ + prom_flags |= PROM_FLAG_DONT_FREE_TEMP; + +#ifdef CONFIG_SMP + register_smp_ops(&ip30_smp_ops); +#else + ip30_per_cpu_init(); +#endif + + ioport_resource.start = 0; + ioport_resource.end = ~0UL; + set_io_port_base(IO_BASE); +} diff --git a/arch/mips/sgi-ip30/ip30-smp.c b/arch/mips/sgi-ip30/ip30-smp.c new file mode 100644 index 000000000000..4bfe654602b1 --- /dev/null +++ b/arch/mips/sgi-ip30/ip30-smp.c @@ -0,0 +1,149 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ip30-smp.c: SMP on IP30 architecture. + * Based off of the original IP30 SMP code, with inspiration from ip27-smp.c + * and smp-bmips.c. + * + * Copyright (C) 2005-2007 Stanislaw Skowronek <skylark@unaligned.org> + * 2006-2007, 2014-2015 Joshua Kinard <kumba@gentoo.org> + * 2009 Johannes Dickgreber <tanzy@gmx.de> + */ + +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/sched/task_stack.h> + +#include <asm/time.h> +#include <asm/sgi/heart.h> + +#include "ip30-common.h" + +#define MPCONF_MAGIC 0xbaddeed2 +#define MPCONF_ADDR 0xa800000000000600L +#define MPCONF_SIZE 0x80 +#define MPCONF(x) (MPCONF_ADDR + (x) * MPCONF_SIZE) + +/* HEART can theoretically do 4 CPUs, but only 2 are physically possible */ +#define MP_NCPU 2 + +struct mpconf { + u32 magic; + u32 prid; + u32 physid; + u32 virtid; + u32 scachesz; + u16 fanloads; + u16 res; + void *launch; + void *rendezvous; + u64 res2[3]; + void *stackaddr; + void *lnch_parm; + void *rndv_parm; + u32 idleflag; +}; + +static void ip30_smp_send_ipi_single(int cpu, u32 action) +{ + int irq; + + switch (action) { + case SMP_RESCHEDULE_YOURSELF: + irq = HEART_L2_INT_RESCHED_CPU_0; + break; + case SMP_CALL_FUNCTION: + irq = HEART_L2_INT_CALL_CPU_0; + break; + default: + panic("IP30: Unknown action value in %s!\n", __func__); + } + + irq += cpu; + + /* Poke the other CPU -- it's got mail! */ + heart_write(BIT_ULL(irq), &heart_regs->set_isr); +} + +static void ip30_smp_send_ipi_mask(const struct cpumask *mask, u32 action) +{ + u32 i; + + for_each_cpu(i, mask) + ip30_smp_send_ipi_single(i, action); +} + +static void __init ip30_smp_setup(void) +{ + int i; + int ncpu = 0; + struct mpconf *mpc; + + init_cpu_possible(cpumask_of(0)); + + /* Scan the MPCONF structure and enumerate available CPUs. */ + for (i = 0; i < MP_NCPU; i++) { + mpc = (struct mpconf *)MPCONF(i); + if (mpc->magic == MPCONF_MAGIC) { + set_cpu_possible(i, true); + __cpu_number_map[i] = ++ncpu; + __cpu_logical_map[ncpu] = i; + pr_info("IP30: Slot: %d, PrID: %.8x, PhyID: %d, VirtID: %d\n", + i, mpc->prid, mpc->physid, mpc->virtid); + } + } + pr_info("IP30: Detected %d CPU(s) present.\n", ncpu); + + /* + * Set the coherency algorithm to '5' (cacheable coherent + * exclusive on write). This is needed on IP30 SMP, especially + * for R14000 CPUs, otherwise, instruction bus errors will + * occur upon reaching userland. + */ + change_c0_config(CONF_CM_CMASK, CONF_CM_CACHABLE_COW); +} + +static void __init ip30_smp_prepare_cpus(unsigned int max_cpus) +{ + /* nothing to do here */ +} + +static int __init ip30_smp_boot_secondary(int cpu, struct task_struct *idle) +{ + struct mpconf *mpc = (struct mpconf *)MPCONF(cpu); + + /* Stack pointer (sp). */ + mpc->stackaddr = (void *)__KSTK_TOS(idle); + + /* Global pointer (gp). */ + mpc->lnch_parm = task_thread_info(idle); + + mb(); /* make sure stack and lparm are written */ + + /* Boot CPUx. */ + mpc->launch = smp_bootstrap; + + /* CPUx now executes smp_bootstrap, then ip30_smp_finish */ + return 0; +} + +static void __init ip30_smp_init_cpu(void) +{ + ip30_per_cpu_init(); +} + +static void __init ip30_smp_finish(void) +{ + enable_percpu_irq(get_c0_compare_int(), IRQ_TYPE_NONE); + local_irq_enable(); +} + +struct plat_smp_ops __read_mostly ip30_smp_ops = { + .send_ipi_single = ip30_smp_send_ipi_single, + .send_ipi_mask = ip30_smp_send_ipi_mask, + .smp_setup = ip30_smp_setup, + .prepare_cpus = ip30_smp_prepare_cpus, + .boot_secondary = ip30_smp_boot_secondary, + .init_secondary = ip30_smp_init_cpu, + .smp_finish = ip30_smp_finish, + .prepare_boot_cpu = ip30_smp_init_cpu, +}; diff --git a/arch/mips/sgi-ip30/ip30-timer.c b/arch/mips/sgi-ip30/ip30-timer.c new file mode 100644 index 000000000000..d13e105478ae --- /dev/null +++ b/arch/mips/sgi-ip30/ip30-timer.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ip30-timer.c: Clocksource/clockevent support for the + * HEART chip in SGI Octane (IP30) systems. + * + * Copyright (C) 2004-2007 Stanislaw Skowronek <skylark@unaligned.org> + * Copyright (C) 2009 Johannes Dickgreber <tanzy@gmx.de> + * Copyright (C) 2011 Joshua Kinard <kumba@gentoo.org> + */ + +#include <linux/clocksource.h> +#include <linux/cpumask.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/percpu.h> +#include <linux/sched_clock.h> + +#include <asm/time.h> +#include <asm/cevt-r4k.h> +#include <asm/sgi/heart.h> + +static u64 ip30_heart_counter_read(struct clocksource *cs) +{ + return heart_read(&heart_regs->count); +} + +struct clocksource ip30_heart_clocksource = { + .name = "HEART", + .rating = 400, + .read = ip30_heart_counter_read, + .mask = CLOCKSOURCE_MASK(52), + .flags = (CLOCK_SOURCE_IS_CONTINUOUS | CLOCK_SOURCE_VALID_FOR_HRES), +}; + +static u64 notrace ip30_heart_read_sched_clock(void) +{ + return heart_read(&heart_regs->count); +} + +static void __init ip30_heart_clocksource_init(void) +{ + struct clocksource *cs = &ip30_heart_clocksource; + + clocksource_register_hz(cs, HEART_CYCLES_PER_SEC); + + sched_clock_register(ip30_heart_read_sched_clock, 52, + HEART_CYCLES_PER_SEC); +} + +void __init plat_time_init(void) +{ + int irq = get_c0_compare_int(); + + cp0_timer_irq_installed = 1; + c0_compare_irqaction.percpu_dev_id = &mips_clockevent_device; + c0_compare_irqaction.flags &= ~IRQF_SHARED; + irq_set_handler(irq, handle_percpu_devid_irq); + irq_set_percpu_devid(irq); + setup_percpu_irq(irq, &c0_compare_irqaction); + enable_percpu_irq(irq, IRQ_TYPE_NONE); + + ip30_heart_clocksource_init(); +} diff --git a/arch/mips/sgi-ip30/ip30-xtalk.c b/arch/mips/sgi-ip30/ip30-xtalk.c new file mode 100644 index 000000000000..8a2894645529 --- /dev/null +++ b/arch/mips/sgi-ip30/ip30-xtalk.c @@ -0,0 +1,152 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ip30-xtalk.c - Very basic Crosstalk (XIO) detection support. + * Copyright (C) 2004-2007 Stanislaw Skowronek <skylark@unaligned.org> + * Copyright (C) 2009 Johannes Dickgreber <tanzy@gmx.de> + * Copyright (C) 2007, 2014-2016 Joshua Kinard <kumba@gentoo.org> + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/platform_device.h> +#include <linux/platform_data/sgi-w1.h> +#include <linux/platform_data/xtalk-bridge.h> + +#include <asm/xtalk/xwidget.h> +#include <asm/pci/bridge.h> + +#define IP30_SWIN_BASE(widget) \ + (0x0000000010000000 | (((unsigned long)(widget)) << 24)) + +#define IP30_RAW_SWIN_BASE(widget) (IO_BASE + IP30_SWIN_BASE(widget)) + +#define IP30_SWIN_SIZE (1 << 24) + +#define IP30_WIDGET_XBOW _AC(0x0, UL) /* XBow is always 0 */ +#define IP30_WIDGET_HEART _AC(0x8, UL) /* HEART is always 8 */ +#define IP30_WIDGET_PCI_BASE _AC(0xf, UL) /* BaseIO PCI is always 15 */ + +#define XTALK_NODEV 0xffffffff + +#define XBOW_REG_LINK_STAT_0 0x114 +#define XBOW_REG_LINK_BLK_SIZE 0x40 +#define XBOW_REG_LINK_ALIVE 0x80000000 + +#define HEART_INTR_ADDR 0x00000080 + +#define xtalk_read __raw_readl + +static void bridge_platform_create(int widget, int masterwid) +{ + struct xtalk_bridge_platform_data *bd; + struct sgi_w1_platform_data *wd; + struct platform_device *pdev; + struct resource w1_res; + + wd = kzalloc(sizeof(*wd), GFP_KERNEL); + if (!wd) + goto no_mem; + + snprintf(wd->dev_id, sizeof(wd->dev_id), "bridge-%012lx", + IP30_SWIN_BASE(widget)); + + memset(&w1_res, 0, sizeof(w1_res)); + w1_res.start = IP30_SWIN_BASE(widget) + + offsetof(struct bridge_regs, b_nic); + w1_res.end = w1_res.start + 3; + w1_res.flags = IORESOURCE_MEM; + + pdev = platform_device_alloc("sgi_w1", PLATFORM_DEVID_AUTO); + if (!pdev) { + kfree(wd); + goto no_mem; + } + platform_device_add_resources(pdev, &w1_res, 1); + platform_device_add_data(pdev, wd, sizeof(*wd)); + platform_device_add(pdev); + + bd = kzalloc(sizeof(*bd), GFP_KERNEL); + if (!bd) + goto no_mem; + pdev = platform_device_alloc("xtalk-bridge", PLATFORM_DEVID_AUTO); + if (!pdev) { + kfree(bd); + goto no_mem; + } + + bd->bridge_addr = IP30_RAW_SWIN_BASE(widget); + bd->intr_addr = HEART_INTR_ADDR; + bd->nasid = 0; + bd->masterwid = masterwid; + + bd->mem.name = "Bridge PCI MEM"; + bd->mem.start = IP30_SWIN_BASE(widget) + BRIDGE_DEVIO0; + bd->mem.end = IP30_SWIN_BASE(widget) + IP30_SWIN_SIZE - 1; + bd->mem.flags = IORESOURCE_MEM; + bd->mem_offset = IP30_SWIN_BASE(widget); + + bd->io.name = "Bridge PCI IO"; + bd->io.start = IP30_SWIN_BASE(widget) + BRIDGE_DEVIO0; + bd->io.end = IP30_SWIN_BASE(widget) + IP30_SWIN_SIZE - 1; + bd->io.flags = IORESOURCE_IO; + bd->io_offset = IP30_SWIN_BASE(widget); + + platform_device_add_data(pdev, bd, sizeof(*bd)); + platform_device_add(pdev); + pr_info("xtalk:%x bridge widget\n", widget); + return; + +no_mem: + pr_warn("xtalk:%x bridge create out of memory\n", widget); +} + +static unsigned int __init xbow_widget_active(s8 wid) +{ + unsigned int link_stat; + + link_stat = xtalk_read((void *)(IP30_RAW_SWIN_BASE(IP30_WIDGET_XBOW) + + XBOW_REG_LINK_STAT_0 + + XBOW_REG_LINK_BLK_SIZE * + (wid - 8))); + + return (link_stat & XBOW_REG_LINK_ALIVE) ? 1 : 0; +} + +static void __init xtalk_init_widget(s8 wid, s8 masterwid) +{ + xwidget_part_num_t partnum; + widgetreg_t widget_id; + + if (!xbow_widget_active(wid)) + return; + + widget_id = xtalk_read((void *)(IP30_RAW_SWIN_BASE(wid) + WIDGET_ID)); + + partnum = XWIDGET_PART_NUM(widget_id); + + switch (partnum) { + case BRIDGE_WIDGET_PART_NUM: + case XBRIDGE_WIDGET_PART_NUM: + bridge_platform_create(wid, masterwid); + break; + default: + pr_info("xtalk:%x unknown widget (0x%x)\n", wid, partnum); + break; + } +} + +static int __init ip30_xtalk_init(void) +{ + int i; + + /* + * Walk widget IDs backwards so that BaseIO is probed first. This + * ensures that the BaseIO IOC3 is always detected as eth0. + */ + for (i = IP30_WIDGET_PCI_BASE; i > IP30_WIDGET_HEART; i--) + xtalk_init_widget(i, IP30_WIDGET_HEART); + + return 0; +} + +arch_initcall(ip30_xtalk_init); diff --git a/arch/mips/tools/.gitignore b/arch/mips/tools/.gitignore index 56d34ccccce4..b0209450d9ff 100644 --- a/arch/mips/tools/.gitignore +++ b/arch/mips/tools/.gitignore @@ -1 +1,2 @@ elf-entry +loongson3-llsc-check diff --git a/arch/mips/tools/Makefile b/arch/mips/tools/Makefile index 3baee4bc6775..aaef688749f5 100644 --- a/arch/mips/tools/Makefile +++ b/arch/mips/tools/Makefile @@ -3,3 +3,8 @@ hostprogs-y := elf-entry PHONY += elf-entry elf-entry: $(obj)/elf-entry @: + +hostprogs-$(CONFIG_CPU_LOONGSON3_WORKAROUNDS) += loongson3-llsc-check +PHONY += loongson3-llsc-check +loongson3-llsc-check: $(obj)/loongson3-llsc-check + @: diff --git a/arch/mips/tools/loongson3-llsc-check.c b/arch/mips/tools/loongson3-llsc-check.c new file mode 100644 index 000000000000..0ebddd0ae46f --- /dev/null +++ b/arch/mips/tools/loongson3-llsc-check.c @@ -0,0 +1,307 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <byteswap.h> +#include <elf.h> +#include <endian.h> +#include <errno.h> +#include <fcntl.h> +#include <inttypes.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> + +#ifdef be32toh +/* If libc provides le{16,32,64}toh() then we'll use them */ +#elif BYTE_ORDER == LITTLE_ENDIAN +# define le16toh(x) (x) +# define le32toh(x) (x) +# define le64toh(x) (x) +#elif BYTE_ORDER == BIG_ENDIAN +# define le16toh(x) bswap_16(x) +# define le32toh(x) bswap_32(x) +# define le64toh(x) bswap_64(x) +#endif + +/* MIPS opcodes, in bits 31:26 of an instruction */ +#define OP_SPECIAL 0x00 +#define OP_REGIMM 0x01 +#define OP_BEQ 0x04 +#define OP_BNE 0x05 +#define OP_BLEZ 0x06 +#define OP_BGTZ 0x07 +#define OP_BEQL 0x14 +#define OP_BNEL 0x15 +#define OP_BLEZL 0x16 +#define OP_BGTZL 0x17 +#define OP_LL 0x30 +#define OP_LLD 0x34 +#define OP_SC 0x38 +#define OP_SCD 0x3c + +/* Bits 20:16 of OP_REGIMM instructions */ +#define REGIMM_BLTZ 0x00 +#define REGIMM_BGEZ 0x01 +#define REGIMM_BLTZL 0x02 +#define REGIMM_BGEZL 0x03 +#define REGIMM_BLTZAL 0x10 +#define REGIMM_BGEZAL 0x11 +#define REGIMM_BLTZALL 0x12 +#define REGIMM_BGEZALL 0x13 + +/* Bits 5:0 of OP_SPECIAL instructions */ +#define SPECIAL_SYNC 0x0f + +static void usage(FILE *f) +{ + fprintf(f, "Usage: loongson3-llsc-check /path/to/vmlinux\n"); +} + +static int se16(uint16_t x) +{ + return (int16_t)x; +} + +static bool is_ll(uint32_t insn) +{ + switch (insn >> 26) { + case OP_LL: + case OP_LLD: + return true; + + default: + return false; + } +} + +static bool is_sc(uint32_t insn) +{ + switch (insn >> 26) { + case OP_SC: + case OP_SCD: + return true; + + default: + return false; + } +} + +static bool is_sync(uint32_t insn) +{ + /* Bits 31:11 should all be zeroes */ + if (insn >> 11) + return false; + + /* Bits 5:0 specify the SYNC special encoding */ + if ((insn & 0x3f) != SPECIAL_SYNC) + return false; + + return true; +} + +static bool is_branch(uint32_t insn, int *off) +{ + switch (insn >> 26) { + case OP_BEQ: + case OP_BEQL: + case OP_BNE: + case OP_BNEL: + case OP_BGTZ: + case OP_BGTZL: + case OP_BLEZ: + case OP_BLEZL: + *off = se16(insn) + 1; + return true; + + case OP_REGIMM: + switch ((insn >> 16) & 0x1f) { + case REGIMM_BGEZ: + case REGIMM_BGEZL: + case REGIMM_BGEZAL: + case REGIMM_BGEZALL: + case REGIMM_BLTZ: + case REGIMM_BLTZL: + case REGIMM_BLTZAL: + case REGIMM_BLTZALL: + *off = se16(insn) + 1; + return true; + + default: + return false; + } + + default: + return false; + } +} + +static int check_ll(uint64_t pc, uint32_t *code, size_t sz) +{ + ssize_t i, max, sc_pos; + int off; + + /* + * Every LL must be preceded by a sync instruction in order to ensure + * that instruction reordering doesn't allow a prior memory access to + * execute after the LL & cause erroneous results. + */ + if (!is_sync(le32toh(code[-1]))) { + fprintf(stderr, "%" PRIx64 ": LL not preceded by sync\n", pc); + return -EINVAL; + } + + /* Find the matching SC instruction */ + max = sz / 4; + for (sc_pos = 0; sc_pos < max; sc_pos++) { + if (is_sc(le32toh(code[sc_pos]))) + break; + } + if (sc_pos >= max) { + fprintf(stderr, "%" PRIx64 ": LL has no matching SC\n", pc); + return -EINVAL; + } + + /* + * Check branches within the LL/SC loop target sync instructions, + * ensuring that speculative execution can't generate memory accesses + * due to instructions outside of the loop. + */ + for (i = 0; i < sc_pos; i++) { + if (!is_branch(le32toh(code[i]), &off)) + continue; + + /* + * If the branch target is within the LL/SC loop then we don't + * need to worry about it. + */ + if ((off >= -i) && (off <= sc_pos)) + continue; + + /* If the branch targets a sync instruction we're all good... */ + if (is_sync(le32toh(code[i + off]))) + continue; + + /* ...but if not, we have a problem */ + fprintf(stderr, "%" PRIx64 ": Branch target not a sync\n", + pc + (i * 4)); + return -EINVAL; + } + + return 0; +} + +static int check_code(uint64_t pc, uint32_t *code, size_t sz) +{ + int err = 0; + + if (sz % 4) { + fprintf(stderr, "%" PRIx64 ": Section size not a multiple of 4\n", + pc); + err = -EINVAL; + sz -= (sz % 4); + } + + if (is_ll(le32toh(code[0]))) { + fprintf(stderr, "%" PRIx64 ": First instruction in section is an LL\n", + pc); + err = -EINVAL; + } + +#define advance() ( \ + code++, \ + pc += 4, \ + sz -= 4 \ +) + + /* + * Skip the first instructionm allowing check_ll to look backwards + * unconditionally. + */ + advance(); + + /* Now scan through the code looking for LL instructions */ + for (; sz; advance()) { + if (is_ll(le32toh(code[0]))) + err |= check_ll(pc, code, sz); + } + + return err; +} + +int main(int argc, char *argv[]) +{ + int vmlinux_fd, status, err, i; + const char *vmlinux_path; + struct stat st; + Elf64_Ehdr *eh; + Elf64_Shdr *sh; + void *vmlinux; + + status = EXIT_FAILURE; + + if (argc < 2) { + usage(stderr); + goto out_ret; + } + + vmlinux_path = argv[1]; + vmlinux_fd = open(vmlinux_path, O_RDONLY); + if (vmlinux_fd == -1) { + perror("Unable to open vmlinux"); + goto out_ret; + } + + err = fstat(vmlinux_fd, &st); + if (err) { + perror("Unable to stat vmlinux"); + goto out_close; + } + + vmlinux = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, vmlinux_fd, 0); + if (vmlinux == MAP_FAILED) { + perror("Unable to mmap vmlinux"); + goto out_close; + } + + eh = vmlinux; + if (memcmp(eh->e_ident, ELFMAG, SELFMAG)) { + fprintf(stderr, "vmlinux is not an ELF?\n"); + goto out_munmap; + } + + if (eh->e_ident[EI_CLASS] != ELFCLASS64) { + fprintf(stderr, "vmlinux is not 64b?\n"); + goto out_munmap; + } + + if (eh->e_ident[EI_DATA] != ELFDATA2LSB) { + fprintf(stderr, "vmlinux is not little endian?\n"); + goto out_munmap; + } + + for (i = 0; i < le16toh(eh->e_shnum); i++) { + sh = vmlinux + le64toh(eh->e_shoff) + (i * le16toh(eh->e_shentsize)); + + if (sh->sh_type != SHT_PROGBITS) + continue; + if (!(sh->sh_flags & SHF_EXECINSTR)) + continue; + + err = check_code(le64toh(sh->sh_addr), + vmlinux + le64toh(sh->sh_offset), + le64toh(sh->sh_size)); + if (err) + goto out_munmap; + } + + status = EXIT_SUCCESS; +out_munmap: + munmap(vmlinux, st.st_size); +out_close: + close(vmlinux_fd); +out_ret: + return status; +} diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index 69cfa0a5339e..e05938997e69 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -15,6 +15,7 @@ ccflags-vdso := \ $(filter -mmicromips,$(KBUILD_CFLAGS)) \ $(filter -march=%,$(KBUILD_CFLAGS)) \ $(filter -m%-float,$(KBUILD_CFLAGS)) \ + $(filter -mno-loongson-%,$(KBUILD_CFLAGS)) \ -D__VDSO__ ifdef CONFIG_CC_IS_CLANG @@ -59,7 +60,7 @@ CFLAGS_REMOVE_vgettimeofday.o = -pg ifndef CONFIG_CPU_MIPSR6 ifeq ($(call ld-ifversion, -lt, 225000000, y),y) $(warning MIPS VDSO requires binutils >= 2.25) - obj-vdso-y := $(filter-out gettimeofday.o, $(obj-vdso-y)) + obj-vdso-y := $(filter-out vgettimeofday.o, $(obj-vdso-y)) ccflags-vdso += -DDISABLE_MIPS_VDSO endif endif @@ -74,6 +75,7 @@ CFLAGS_REMOVE_vdso.o = -pg GCOV_PROFILE := n UBSAN_SANITIZE := n +KCOV_INSTRUMENT := n # # Shared build commands. diff --git a/arch/mips/vdso/gettimeofday.c b/arch/mips/vdso/gettimeofday.c deleted file mode 100644 index e8243c7fd5b5..000000000000 --- a/arch/mips/vdso/gettimeofday.c +++ /dev/null @@ -1,269 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2015 Imagination Technologies - * Author: Alex Smith <alex.smith@imgtec.com> - */ - -#include "vdso.h" - -#include <linux/compiler.h> -#include <linux/time.h> - -#include <asm/clocksource.h> -#include <asm/io.h> -#include <asm/unistd.h> -#include <asm/vdso.h> - -#ifdef CONFIG_MIPS_CLOCK_VSYSCALL - -static __always_inline long gettimeofday_fallback(struct timeval *_tv, - struct timezone *_tz) -{ - register struct timezone *tz asm("a1") = _tz; - register struct timeval *tv asm("a0") = _tv; - register long ret asm("v0"); - register long nr asm("v0") = __NR_gettimeofday; - register long error asm("a3"); - - asm volatile( - " syscall\n" - : "=r" (ret), "=r" (error) - : "r" (tv), "r" (tz), "r" (nr) - : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13", - "$14", "$15", "$24", "$25", "hi", "lo", "memory"); - - return error ? -ret : ret; -} - -#endif - -static __always_inline long clock_gettime_fallback(clockid_t _clkid, - struct timespec *_ts) -{ - register struct timespec *ts asm("a1") = _ts; - register clockid_t clkid asm("a0") = _clkid; - register long ret asm("v0"); - register long nr asm("v0") = __NR_clock_gettime; - register long error asm("a3"); - - asm volatile( - " syscall\n" - : "=r" (ret), "=r" (error) - : "r" (clkid), "r" (ts), "r" (nr) - : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13", - "$14", "$15", "$24", "$25", "hi", "lo", "memory"); - - return error ? -ret : ret; -} - -static __always_inline int do_realtime_coarse(struct timespec *ts, - const union mips_vdso_data *data) -{ - u32 start_seq; - - do { - start_seq = vdso_data_read_begin(data); - - ts->tv_sec = data->xtime_sec; - ts->tv_nsec = data->xtime_nsec >> data->cs_shift; - } while (vdso_data_read_retry(data, start_seq)); - - return 0; -} - -static __always_inline int do_monotonic_coarse(struct timespec *ts, - const union mips_vdso_data *data) -{ - u32 start_seq; - u64 to_mono_sec; - u64 to_mono_nsec; - - do { - start_seq = vdso_data_read_begin(data); - - ts->tv_sec = data->xtime_sec; - ts->tv_nsec = data->xtime_nsec >> data->cs_shift; - - to_mono_sec = data->wall_to_mono_sec; - to_mono_nsec = data->wall_to_mono_nsec; - } while (vdso_data_read_retry(data, start_seq)); - - ts->tv_sec += to_mono_sec; - timespec_add_ns(ts, to_mono_nsec); - - return 0; -} - -#ifdef CONFIG_CSRC_R4K - -static __always_inline u64 read_r4k_count(void) -{ - unsigned int count; - - __asm__ __volatile__( - " .set push\n" - " .set mips32r2\n" - " rdhwr %0, $2\n" - " .set pop\n" - : "=r" (count)); - - return count; -} - -#endif - -#ifdef CONFIG_CLKSRC_MIPS_GIC - -static __always_inline u64 read_gic_count(const union mips_vdso_data *data) -{ - void __iomem *gic = get_gic(data); - u32 hi, hi2, lo; - - do { - hi = __raw_readl(gic + sizeof(lo)); - lo = __raw_readl(gic); - hi2 = __raw_readl(gic + sizeof(lo)); - } while (hi2 != hi); - - return (((u64)hi) << 32) + lo; -} - -#endif - -static __always_inline u64 get_ns(const union mips_vdso_data *data) -{ - u64 cycle_now, delta, nsec; - - switch (data->clock_mode) { -#ifdef CONFIG_CSRC_R4K - case VDSO_CLOCK_R4K: - cycle_now = read_r4k_count(); - break; -#endif -#ifdef CONFIG_CLKSRC_MIPS_GIC - case VDSO_CLOCK_GIC: - cycle_now = read_gic_count(data); - break; -#endif - default: - return 0; - } - - delta = (cycle_now - data->cs_cycle_last) & data->cs_mask; - - nsec = (delta * data->cs_mult) + data->xtime_nsec; - nsec >>= data->cs_shift; - - return nsec; -} - -static __always_inline int do_realtime(struct timespec *ts, - const union mips_vdso_data *data) -{ - u32 start_seq; - u64 ns; - - do { - start_seq = vdso_data_read_begin(data); - - if (data->clock_mode == VDSO_CLOCK_NONE) - return -ENOSYS; - - ts->tv_sec = data->xtime_sec; - ns = get_ns(data); - } while (vdso_data_read_retry(data, start_seq)); - - ts->tv_nsec = 0; - timespec_add_ns(ts, ns); - - return 0; -} - -static __always_inline int do_monotonic(struct timespec *ts, - const union mips_vdso_data *data) -{ - u32 start_seq; - u64 ns; - u64 to_mono_sec; - u64 to_mono_nsec; - - do { - start_seq = vdso_data_read_begin(data); - - if (data->clock_mode == VDSO_CLOCK_NONE) - return -ENOSYS; - - ts->tv_sec = data->xtime_sec; - ns = get_ns(data); - - to_mono_sec = data->wall_to_mono_sec; - to_mono_nsec = data->wall_to_mono_nsec; - } while (vdso_data_read_retry(data, start_seq)); - - ts->tv_sec += to_mono_sec; - ts->tv_nsec = 0; - timespec_add_ns(ts, ns + to_mono_nsec); - - return 0; -} - -#ifdef CONFIG_MIPS_CLOCK_VSYSCALL - -/* - * This is behind the ifdef so that we don't provide the symbol when there's no - * possibility of there being a usable clocksource, because there's nothing we - * can do without it. When libc fails the symbol lookup it should fall back on - * the standard syscall path. - */ -int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) -{ - const union mips_vdso_data *data = get_vdso_data(); - struct timespec ts; - int ret; - - ret = do_realtime(&ts, data); - if (ret) - return gettimeofday_fallback(tv, tz); - - if (tv) { - tv->tv_sec = ts.tv_sec; - tv->tv_usec = ts.tv_nsec / 1000; - } - - if (tz) { - tz->tz_minuteswest = data->tz_minuteswest; - tz->tz_dsttime = data->tz_dsttime; - } - - return 0; -} - -#endif /* CONFIG_MIPS_CLOCK_VSYSCALL */ - -int __vdso_clock_gettime(clockid_t clkid, struct timespec *ts) -{ - const union mips_vdso_data *data = get_vdso_data(); - int ret = -1; - - switch (clkid) { - case CLOCK_REALTIME_COARSE: - ret = do_realtime_coarse(ts, data); - break; - case CLOCK_MONOTONIC_COARSE: - ret = do_monotonic_coarse(ts, data); - break; - case CLOCK_REALTIME: - ret = do_realtime(ts, data); - break; - case CLOCK_MONOTONIC: - ret = do_monotonic(ts, data); - break; - default: - break; - } - - if (ret) - ret = clock_gettime_fallback(clkid, ts); - - return ret; -} diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index 36b834f1c933..dca8f2de8cf5 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -60,7 +60,6 @@ KBUILD_CFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY=1 \ -DFTRACE_PATCHABLE_FUNCTION_SIZE=$(NOP_COUNT) CC_FLAGS_FTRACE := -fpatchable-function-entry=$(NOP_COUNT),$(shell echo $$(($(NOP_COUNT)-1))) -KBUILD_LDS_MODULE += $(srctree)/arch/parisc/kernel/module.lds endif OBJCOPY_FLAGS =-O binary -R .note -R .comment -S diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h index 73ca89a47f49..e5de3f897633 100644 --- a/arch/parisc/include/asm/cache.h +++ b/arch/parisc/include/asm/cache.h @@ -22,7 +22,7 @@ #define ARCH_DMA_MINALIGN L1_CACHE_BYTES -#define __read_mostly __attribute__((__section__(".data..read_mostly"))) +#define __read_mostly __section(.data..read_mostly) void parisc_cache_init(void); /* initializes cache-flushing */ void disable_sr_hashing_asm(int); /* low level support for above */ diff --git a/arch/parisc/include/asm/ldcw.h b/arch/parisc/include/asm/ldcw.h index 3eb4bfc1fb36..e080143e79a3 100644 --- a/arch/parisc/include/asm/ldcw.h +++ b/arch/parisc/include/asm/ldcw.h @@ -52,7 +52,7 @@ }) #ifdef CONFIG_SMP -# define __lock_aligned __attribute__((__section__(".data..lock_aligned"))) +# define __lock_aligned __section(.data..lock_aligned) #endif #endif /* __PARISC_LDCW_H */ diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 1d1d748c227f..b96d74496977 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -2125,7 +2125,7 @@ ftrace_regs_caller: copy %rp, %r26 LDREG -FTRACE_FRAME_SIZE-PT_SZ_ALGN(%sp), %r25 ldo -8(%r25), %r25 - copy %r3, %arg2 + ldo -FTRACE_FRAME_SIZE(%r1), %arg2 b,l ftrace_function_trampoline, %rp copy %r1, %arg3 /* struct pt_regs */ diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index ac5f34993b53..1c50093e2ebe 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -43,6 +43,7 @@ #include <linux/elf.h> #include <linux/vmalloc.h> #include <linux/fs.h> +#include <linux/ftrace.h> #include <linux/string.h> #include <linux/kernel.h> #include <linux/bug.h> @@ -862,7 +863,7 @@ int module_finalize(const Elf_Ehdr *hdr, const char *strtab = NULL; const Elf_Shdr *s; char *secstrings; - int err, symindex = -1; + int symindex = -1; Elf_Sym *newptr, *oldptr; Elf_Shdr *symhdr = NULL; #ifdef DEBUG @@ -946,11 +947,13 @@ int module_finalize(const Elf_Ehdr *hdr, /* patch .altinstructions */ apply_alternatives(aseg, aseg + s->sh_size, me->name); +#ifdef CONFIG_DYNAMIC_FTRACE /* For 32 bit kernels we're compiling modules with * -ffunction-sections so we must relocate the addresses in the - *__mcount_loc section. + * ftrace callsite section. */ - if (symindex != -1 && !strcmp(secname, "__mcount_loc")) { + if (symindex != -1 && !strcmp(secname, FTRACE_CALLSITE_SECTION)) { + int err; if (s->sh_type == SHT_REL) err = apply_relocate((Elf_Shdr *)sechdrs, strtab, symindex, @@ -962,6 +965,7 @@ int module_finalize(const Elf_Ehdr *hdr, if (err) return err; } +#endif } return 0; } diff --git a/arch/parisc/kernel/module.lds b/arch/parisc/kernel/module.lds deleted file mode 100644 index 1a9a92aca5c8..000000000000 --- a/arch/parisc/kernel/module.lds +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -SECTIONS { - __mcount_loc : { - *(__patchable_function_entries) - } -} diff --git a/arch/parisc/mm/ioremap.c b/arch/parisc/mm/ioremap.c index 92a9b5f12f98..f29f682352f0 100644 --- a/arch/parisc/mm/ioremap.c +++ b/arch/parisc/mm/ioremap.c @@ -3,7 +3,7 @@ * arch/parisc/mm/ioremap.c * * (C) Copyright 1995 1996 Linus Torvalds - * (C) Copyright 2001-2006 Helge Deller <deller@gmx.de> + * (C) Copyright 2001-2019 Helge Deller <deller@gmx.de> * (C) Copyright 2005 Kyle McMartin <kyle@parisc-linux.org> */ @@ -84,7 +84,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l addr = (void __iomem *) area->addr; if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size, phys_addr, pgprot)) { - vfree(addr); + vunmap(addr); return NULL; } @@ -92,9 +92,11 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l } EXPORT_SYMBOL(__ioremap); -void iounmap(const volatile void __iomem *addr) +void iounmap(const volatile void __iomem *io_addr) { - if (addr > high_memory) - return vfree((void *) (PAGE_MASK & (unsigned long __force) addr)); + unsigned long addr = (unsigned long)io_addr & PAGE_MASK; + + if (is_vmalloc_addr((void *)addr)) + vunmap((void *)addr); } EXPORT_SYMBOL(iounmap); diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 6841bd52738b..dfbd7f22eef5 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -50,7 +50,7 @@ endif BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -nostdinc -BOOTARFLAGS := -cr$(KBUILD_ARFLAGS) +BOOTARFLAGS := -crD ifdef CONFIG_CC_IS_CLANG BOOTCFLAGS += $(CLANG_FLAGS) diff --git a/arch/powerpc/crypto/aes-spe-glue.c b/arch/powerpc/crypto/aes-spe-glue.c index 3a4ca7d32477..1fad5d4c658d 100644 --- a/arch/powerpc/crypto/aes-spe-glue.c +++ b/arch/powerpc/crypto/aes-spe-glue.c @@ -17,7 +17,10 @@ #include <asm/byteorder.h> #include <asm/switch_to.h> #include <crypto/algapi.h> +#include <crypto/internal/skcipher.h> #include <crypto/xts.h> +#include <crypto/gf128mul.h> +#include <crypto/scatterwalk.h> /* * MAX_BYTES defines the number of bytes that are allowed to be processed @@ -118,13 +121,19 @@ static int ppc_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key, return 0; } -static int ppc_xts_setkey(struct crypto_tfm *tfm, const u8 *in_key, +static int ppc_aes_setkey_skcipher(struct crypto_skcipher *tfm, + const u8 *in_key, unsigned int key_len) +{ + return ppc_aes_setkey(crypto_skcipher_tfm(tfm), in_key, key_len); +} + +static int ppc_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { - struct ppc_xts_ctx *ctx = crypto_tfm_ctx(tfm); + struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm); int err; - err = xts_check_key(tfm, in_key, key_len); + err = xts_verify_key(tfm, in_key, key_len); if (err) return err; @@ -133,7 +142,7 @@ static int ppc_xts_setkey(struct crypto_tfm *tfm, const u8 *in_key, if (key_len != AES_KEYSIZE_128 && key_len != AES_KEYSIZE_192 && key_len != AES_KEYSIZE_256) { - tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; } @@ -178,208 +187,229 @@ static void ppc_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) spe_end(); } -static int ppc_ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ppc_ecb_crypt(struct skcipher_request *req, bool enc) { - struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; - unsigned int ubytes; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct ppc_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; int err; - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, false); - while ((nbytes = walk.nbytes)) { - ubytes = nbytes > MAX_BYTES ? - nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1); - nbytes -= ubytes; + while ((nbytes = walk.nbytes) != 0) { + nbytes = min_t(unsigned int, nbytes, MAX_BYTES); + nbytes = round_down(nbytes, AES_BLOCK_SIZE); spe_begin(); - ppc_encrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key_enc, ctx->rounds, nbytes); + if (enc) + ppc_encrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key_enc, ctx->rounds, nbytes); + else + ppc_decrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key_dec, ctx->rounds, nbytes); spe_end(); - err = blkcipher_walk_done(desc, &walk, ubytes); + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } return err; } -static int ppc_ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ppc_ecb_encrypt(struct skcipher_request *req) { - struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; - unsigned int ubytes; - int err; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - - while ((nbytes = walk.nbytes)) { - ubytes = nbytes > MAX_BYTES ? - nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1); - nbytes -= ubytes; - - spe_begin(); - ppc_decrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key_dec, ctx->rounds, nbytes); - spe_end(); - - err = blkcipher_walk_done(desc, &walk, ubytes); - } + return ppc_ecb_crypt(req, true); +} - return err; +static int ppc_ecb_decrypt(struct skcipher_request *req) +{ + return ppc_ecb_crypt(req, false); } -static int ppc_cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ppc_cbc_crypt(struct skcipher_request *req, bool enc) { - struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; - unsigned int ubytes; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct ppc_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; int err; - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, false); - while ((nbytes = walk.nbytes)) { - ubytes = nbytes > MAX_BYTES ? - nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1); - nbytes -= ubytes; + while ((nbytes = walk.nbytes) != 0) { + nbytes = min_t(unsigned int, nbytes, MAX_BYTES); + nbytes = round_down(nbytes, AES_BLOCK_SIZE); spe_begin(); - ppc_encrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key_enc, ctx->rounds, nbytes, walk.iv); + if (enc) + ppc_encrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key_enc, ctx->rounds, nbytes, + walk.iv); + else + ppc_decrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key_dec, ctx->rounds, nbytes, + walk.iv); spe_end(); - err = blkcipher_walk_done(desc, &walk, ubytes); + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } return err; } -static int ppc_cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ppc_cbc_encrypt(struct skcipher_request *req) { - struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; - unsigned int ubytes; - int err; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - - while ((nbytes = walk.nbytes)) { - ubytes = nbytes > MAX_BYTES ? - nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1); - nbytes -= ubytes; - - spe_begin(); - ppc_decrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key_dec, ctx->rounds, nbytes, walk.iv); - spe_end(); - - err = blkcipher_walk_done(desc, &walk, ubytes); - } + return ppc_cbc_crypt(req, true); +} - return err; +static int ppc_cbc_decrypt(struct skcipher_request *req) +{ + return ppc_cbc_crypt(req, false); } -static int ppc_ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ppc_ctr_crypt(struct skcipher_request *req) { - struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; - unsigned int pbytes, ubytes; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct ppc_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; int err; - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); + err = skcipher_walk_virt(&walk, req, false); - while ((pbytes = walk.nbytes)) { - pbytes = pbytes > MAX_BYTES ? MAX_BYTES : pbytes; - pbytes = pbytes == nbytes ? - nbytes : pbytes & ~(AES_BLOCK_SIZE - 1); - ubytes = walk.nbytes - pbytes; + while ((nbytes = walk.nbytes) != 0) { + nbytes = min_t(unsigned int, nbytes, MAX_BYTES); + if (nbytes < walk.total) + nbytes = round_down(nbytes, AES_BLOCK_SIZE); spe_begin(); ppc_crypt_ctr(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key_enc, ctx->rounds, pbytes , walk.iv); + ctx->key_enc, ctx->rounds, nbytes, walk.iv); spe_end(); - nbytes -= pbytes; - err = blkcipher_walk_done(desc, &walk, ubytes); + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } return err; } -static int ppc_xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ppc_xts_crypt(struct skcipher_request *req, bool enc) { - struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; - unsigned int ubytes; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; int err; u32 *twk; - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, false); twk = ctx->key_twk; - while ((nbytes = walk.nbytes)) { - ubytes = nbytes > MAX_BYTES ? - nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1); - nbytes -= ubytes; + while ((nbytes = walk.nbytes) != 0) { + nbytes = min_t(unsigned int, nbytes, MAX_BYTES); + nbytes = round_down(nbytes, AES_BLOCK_SIZE); spe_begin(); - ppc_encrypt_xts(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key_enc, ctx->rounds, nbytes, walk.iv, twk); + if (enc) + ppc_encrypt_xts(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key_enc, ctx->rounds, nbytes, + walk.iv, twk); + else + ppc_decrypt_xts(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key_dec, ctx->rounds, nbytes, + walk.iv, twk); spe_end(); twk = NULL; - err = blkcipher_walk_done(desc, &walk, ubytes); + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } return err; } -static int ppc_xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ppc_xts_encrypt(struct skcipher_request *req) { - struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; - unsigned int ubytes; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + int tail = req->cryptlen % AES_BLOCK_SIZE; + int offset = req->cryptlen - tail - AES_BLOCK_SIZE; + struct skcipher_request subreq; + u8 b[2][AES_BLOCK_SIZE]; int err; - u32 *twk; - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - twk = ctx->key_twk; + if (req->cryptlen < AES_BLOCK_SIZE) + return -EINVAL; + + if (tail) { + subreq = *req; + skcipher_request_set_crypt(&subreq, req->src, req->dst, + req->cryptlen - tail, req->iv); + req = &subreq; + } - while ((nbytes = walk.nbytes)) { - ubytes = nbytes > MAX_BYTES ? - nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1); - nbytes -= ubytes; + err = ppc_xts_crypt(req, true); + if (err || !tail) + return err; - spe_begin(); - ppc_decrypt_xts(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key_dec, ctx->rounds, nbytes, walk.iv, twk); - spe_end(); + scatterwalk_map_and_copy(b[0], req->dst, offset, AES_BLOCK_SIZE, 0); + memcpy(b[1], b[0], tail); + scatterwalk_map_and_copy(b[0], req->src, offset + AES_BLOCK_SIZE, tail, 0); - twk = NULL; - err = blkcipher_walk_done(desc, &walk, ubytes); + spe_begin(); + ppc_encrypt_xts(b[0], b[0], ctx->key_enc, ctx->rounds, AES_BLOCK_SIZE, + req->iv, NULL); + spe_end(); + + scatterwalk_map_and_copy(b[0], req->dst, offset, AES_BLOCK_SIZE + tail, 1); + + return 0; +} + +static int ppc_xts_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + int tail = req->cryptlen % AES_BLOCK_SIZE; + int offset = req->cryptlen - tail - AES_BLOCK_SIZE; + struct skcipher_request subreq; + u8 b[3][AES_BLOCK_SIZE]; + le128 twk; + int err; + + if (req->cryptlen < AES_BLOCK_SIZE) + return -EINVAL; + + if (tail) { + subreq = *req; + skcipher_request_set_crypt(&subreq, req->src, req->dst, + offset, req->iv); + req = &subreq; } - return err; + err = ppc_xts_crypt(req, false); + if (err || !tail) + return err; + + scatterwalk_map_and_copy(b[1], req->src, offset, AES_BLOCK_SIZE + tail, 0); + + spe_begin(); + if (!offset) + ppc_encrypt_ecb(req->iv, req->iv, ctx->key_twk, ctx->rounds, + AES_BLOCK_SIZE); + + gf128mul_x_ble(&twk, (le128 *)req->iv); + + ppc_decrypt_xts(b[1], b[1], ctx->key_dec, ctx->rounds, AES_BLOCK_SIZE, + (u8 *)&twk, NULL); + memcpy(b[0], b[2], tail); + memcpy(b[0] + tail, b[1] + tail, AES_BLOCK_SIZE - tail); + ppc_decrypt_xts(b[0], b[0], ctx->key_dec, ctx->rounds, AES_BLOCK_SIZE, + req->iv, NULL); + spe_end(); + + scatterwalk_map_and_copy(b[0], req->dst, offset, AES_BLOCK_SIZE + tail, 1); + + return 0; } /* @@ -388,9 +418,9 @@ static int ppc_xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, * This improves IPsec thoughput by another few percent. Additionally we assume * that AES context is always aligned to at least 8 bytes because it is created * with kmalloc() in the crypto infrastructure - * */ -static struct crypto_alg aes_algs[] = { { + +static struct crypto_alg aes_cipher_alg = { .cra_name = "aes", .cra_driver_name = "aes-ppc-spe", .cra_priority = 300, @@ -408,96 +438,84 @@ static struct crypto_alg aes_algs[] = { { .cia_decrypt = ppc_aes_decrypt } } -}, { - .cra_name = "ecb(aes)", - .cra_driver_name = "ecb-ppc-spe", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct ppc_aes_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ppc_aes_setkey, - .encrypt = ppc_ecb_encrypt, - .decrypt = ppc_ecb_decrypt, - } - } -}, { - .cra_name = "cbc(aes)", - .cra_driver_name = "cbc-ppc-spe", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct ppc_aes_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ppc_aes_setkey, - .encrypt = ppc_cbc_encrypt, - .decrypt = ppc_cbc_decrypt, - } - } -}, { - .cra_name = "ctr(aes)", - .cra_driver_name = "ctr-ppc-spe", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct ppc_aes_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ppc_aes_setkey, - .encrypt = ppc_ctr_crypt, - .decrypt = ppc_ctr_crypt, - } - } -}, { - .cra_name = "xts(aes)", - .cra_driver_name = "xts-ppc-spe", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct ppc_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = AES_MIN_KEY_SIZE * 2, - .max_keysize = AES_MAX_KEY_SIZE * 2, - .ivsize = AES_BLOCK_SIZE, - .setkey = ppc_xts_setkey, - .encrypt = ppc_xts_encrypt, - .decrypt = ppc_xts_decrypt, - } +}; + +static struct skcipher_alg aes_skcipher_algs[] = { + { + .base.cra_name = "ecb(aes)", + .base.cra_driver_name = "ecb-ppc-spe", + .base.cra_priority = 300, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct ppc_aes_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = ppc_aes_setkey_skcipher, + .encrypt = ppc_ecb_encrypt, + .decrypt = ppc_ecb_decrypt, + }, { + .base.cra_name = "cbc(aes)", + .base.cra_driver_name = "cbc-ppc-spe", + .base.cra_priority = 300, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct ppc_aes_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ppc_aes_setkey_skcipher, + .encrypt = ppc_cbc_encrypt, + .decrypt = ppc_cbc_decrypt, + }, { + .base.cra_name = "ctr(aes)", + .base.cra_driver_name = "ctr-ppc-spe", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct ppc_aes_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ppc_aes_setkey_skcipher, + .encrypt = ppc_ctr_crypt, + .decrypt = ppc_ctr_crypt, + .chunksize = AES_BLOCK_SIZE, + }, { + .base.cra_name = "xts(aes)", + .base.cra_driver_name = "xts-ppc-spe", + .base.cra_priority = 300, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct ppc_xts_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = AES_MIN_KEY_SIZE * 2, + .max_keysize = AES_MAX_KEY_SIZE * 2, + .ivsize = AES_BLOCK_SIZE, + .setkey = ppc_xts_setkey, + .encrypt = ppc_xts_encrypt, + .decrypt = ppc_xts_decrypt, } -} }; +}; static int __init ppc_aes_mod_init(void) { - return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs)); + int err; + + err = crypto_register_alg(&aes_cipher_alg); + if (err) + return err; + + err = crypto_register_skciphers(aes_skcipher_algs, + ARRAY_SIZE(aes_skcipher_algs)); + if (err) + crypto_unregister_alg(&aes_cipher_alg); + return err; } static void __exit ppc_aes_mod_fini(void) { - crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs)); + crypto_unregister_alg(&aes_cipher_alg); + crypto_unregister_skciphers(aes_skcipher_algs, + ARRAY_SIZE(aes_skcipher_algs)); } module_init(ppc_aes_mod_init); diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index 677e9babef80..f9dc597b0b86 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -91,6 +91,7 @@ static inline void kuap_update_sr(u32 sr, u32 addr, u32 end) { + addr &= 0xf0000000; /* align addr to start of segment */ barrier(); /* make sure thread.kuap is updated before playing with SRs */ while (addr < end) { mtsrin(sr, addr); diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index 4ce795d30377..ca8db193ae38 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -35,6 +35,10 @@ static inline void radix__flush_all_lpid(unsigned int lpid) { WARN_ON(1); } +static inline void radix__flush_all_lpid_guest(unsigned int lpid) +{ + WARN_ON(1); +} #endif extern void radix__flush_hugetlb_tlb_range(struct vm_area_struct *vma, diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index 409c9bfb43d9..57c229a86f08 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -175,4 +175,7 @@ do { \ ARCH_DLINFO_CACHE_GEOMETRY; \ } while (0) +/* Relocate the kernel image to @final_address */ +void relocate(unsigned long final_address); + #endif /* _ASM_POWERPC_ELF_H */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 6fe6ad64cba5..4273e799203d 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -401,7 +401,6 @@ struct kvmppc_mmu { u32 (*mfsrin)(struct kvm_vcpu *vcpu, u32 srnum); int (*xlate)(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *pte, bool data, bool iswrite); - void (*reset_msr)(struct kvm_vcpu *vcpu); void (*tlbie)(struct kvm_vcpu *vcpu, ulong addr, bool large); int (*esid_to_vsid)(struct kvm_vcpu *vcpu, ulong esid, u64 *vsid); u64 (*ea_to_vp)(struct kvm_vcpu *vcpu, gva_t eaddr, bool data); diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index ee62776e5433..d63f649fe713 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -271,6 +271,7 @@ struct kvmppc_ops { union kvmppc_one_reg *val); void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); void (*vcpu_put)(struct kvm_vcpu *vcpu); + void (*inject_interrupt)(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags); void (*set_msr)(struct kvm_vcpu *vcpu, u64 msr); int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu); struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned int id); diff --git a/arch/powerpc/include/asm/local.h b/arch/powerpc/include/asm/local.h index fdd00939270b..bc4bd19b7fc2 100644 --- a/arch/powerpc/include/asm/local.h +++ b/arch/powerpc/include/asm/local.h @@ -17,7 +17,7 @@ typedef struct #define LOCAL_INIT(i) { (i) } -static __inline__ long local_read(local_t *l) +static __inline__ long local_read(const local_t *l) { return READ_ONCE(l->v); } diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index c8bb14ff4713..f6c562acc3f8 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -329,13 +329,4 @@ struct vm_area_struct; #endif /* __ASSEMBLY__ */ #include <asm/slice.h> -/* - * Allow 30-bit DMA for very limited Broadcom wifi chips on many powerbooks. - */ -#ifdef CONFIG_PPC32 -#define ARCH_ZONE_DMA_BITS 30 -#else -#define ARCH_ZONE_DMA_BITS 31 -#endif - #endif /* _ASM_POWERPC_PAGE_H */ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index b3cbb1136bce..75c7e95a321b 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -748,6 +748,18 @@ #define SPRN_USPRG7 0x107 /* SPRG7 userspace read */ #define SPRN_SRR0 0x01A /* Save/Restore Register 0 */ #define SPRN_SRR1 0x01B /* Save/Restore Register 1 */ + +#ifdef CONFIG_PPC_BOOK3S +/* + * Bits loaded from MSR upon interrupt. + * PPC (64-bit) bits 33-36,42-47 are interrupt dependent, the others are + * loaded from MSR. The exception is that SRESET and MCE do not always load + * bit 62 (RI) from MSR. Don't use PPC_BITMASK for this because 32-bit uses + * it. + */ +#define SRR1_MSR_BITS (~0x783f0000UL) +#endif + #define SRR1_ISI_NOPT 0x40000000 /* ISI: Not found in hash */ #define SRR1_ISI_N_OR_G 0x10000000 /* ISI: Access is no-exec or G */ #define SRR1_ISI_PROT 0x08000000 /* ISI: Other protection fault */ diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index b0f72dea8b11..264e266a85bf 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -667,6 +667,8 @@ struct kvm_ppc_cpu_char { /* PPC64 eXternal Interrupt Controller Specification */ #define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */ +#define KVM_DEV_XICS_GRP_CTRL 2 +#define KVM_DEV_XICS_NR_SERVERS 1 /* Layout of 64-bit source attribute values */ #define KVM_XICS_DESTINATION_SHIFT 0 @@ -683,6 +685,7 @@ struct kvm_ppc_cpu_char { #define KVM_DEV_XIVE_GRP_CTRL 1 #define KVM_DEV_XIVE_RESET 1 #define KVM_DEV_XIVE_EQ_SYNC 2 +#define KVM_DEV_XIVE_NR_SERVERS 3 #define KVM_DEV_XIVE_GRP_SOURCE 2 /* 64-bit source identifier */ #define KVM_DEV_XIVE_GRP_SOURCE_CONFIG 3 /* 64-bit source identifier */ #define KVM_DEV_XIVE_GRP_EQ_CONFIG 4 /* 64-bit EQ identifier */ diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index a4e7762dd286..100f1b57ec2f 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -3249,7 +3249,20 @@ static void setup_secure_guest(unsigned long kbase, unsigned long fdt) /* Switch to secure mode. */ prom_printf("Switching to secure mode.\n"); + /* + * The ultravisor will do an integrity check of the kernel image but we + * relocated it so the check will fail. Restore the original image by + * relocating it back to the kernel virtual base address. + */ + if (IS_ENABLED(CONFIG_RELOCATABLE)) + relocate(KERNELBASE); + ret = enter_secure_mode(kbase, fdt); + + /* Relocate the kernel again. */ + if (IS_ENABLED(CONFIG_RELOCATABLE)) + relocate(kbase); + if (ret != U_SUCCESS) { prom_printf("Returned %d from switching to secure mode.\n", ret); prom_rtas_os_term("Switch to secure mode failed.\n"); diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index 78bab17b1396..b183ab9c5107 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -26,7 +26,8 @@ _end enter_prom $MEM_FUNCS reloc_offset __secondary_hold __secondary_hold_acknowledge __secondary_hold_spinloop __start logo_linux_clut224 btext_prepare_BAT reloc_got2 kernstart_addr memstart_addr linux_banner _stext -__prom_init_toc_start __prom_init_toc_end btext_setup_display TOC." +__prom_init_toc_start __prom_init_toc_end btext_setup_display TOC. +relocate" NM="$1" OBJ="$2" diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index d7fcdfa7fee4..58a59ee998e2 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -36,8 +36,8 @@ #include "book3s.h" #include "trace.h" -#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM -#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU +#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__ +#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__ /* #define EXIT_DEBUG */ @@ -69,32 +69,11 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "pthru_all", VCPU_STAT(pthru_all) }, { "pthru_host", VCPU_STAT(pthru_host) }, { "pthru_bad_aff", VCPU_STAT(pthru_bad_aff) }, - { "largepages_2M", VM_STAT(num_2M_pages) }, - { "largepages_1G", VM_STAT(num_1G_pages) }, + { "largepages_2M", VM_STAT(num_2M_pages, .mode = 0444) }, + { "largepages_1G", VM_STAT(num_1G_pages, .mode = 0444) }, { NULL } }; -void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu) -{ - if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) { - ulong pc = kvmppc_get_pc(vcpu); - ulong lr = kvmppc_get_lr(vcpu); - if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS) - kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK); - if ((lr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS) - kvmppc_set_lr(vcpu, lr & ~SPLIT_HACK_MASK); - vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK; - } -} -EXPORT_SYMBOL_GPL(kvmppc_unfixup_split_real); - -static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu) -{ - if (!is_kvmppc_hv_enabled(vcpu->kvm)) - return to_book3s(vcpu)->hior; - return 0; -} - static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu, unsigned long pending_now, unsigned long old_pending) { @@ -134,11 +113,7 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags) { - kvmppc_unfixup_split_real(vcpu); - kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu)); - kvmppc_set_srr1(vcpu, (kvmppc_get_msr(vcpu) & ~0x783f0000ul) | flags); - kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec); - vcpu->arch.mmu.reset_msr(vcpu); + vcpu->kvm->arch.kvm_ops->inject_interrupt(vcpu, vec, flags); } static int kvmppc_book3s_vec2irqprio(unsigned int vec) diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h index 2ef1311a2a13..3a4613985949 100644 --- a/arch/powerpc/kvm/book3s.h +++ b/arch/powerpc/kvm/book3s.h @@ -32,4 +32,7 @@ extern void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val); static inline void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val) {} #endif +extern void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr); +extern void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags); + #endif diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c index 18f244aad7aa..f21e73492ce3 100644 --- a/arch/powerpc/kvm/book3s_32_mmu.c +++ b/arch/powerpc/kvm/book3s_32_mmu.c @@ -90,11 +90,6 @@ static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr, return (((u64)eaddr >> 12) & 0xffff) | (vsid << 16); } -static void kvmppc_mmu_book3s_32_reset_msr(struct kvm_vcpu *vcpu) -{ - kvmppc_set_msr(vcpu, 0); -} - static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvm_vcpu *vcpu, u32 sre, gva_t eaddr, bool primary) @@ -406,7 +401,6 @@ void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu) mmu->mtsrin = kvmppc_mmu_book3s_32_mtsrin; mmu->mfsrin = kvmppc_mmu_book3s_32_mfsrin; mmu->xlate = kvmppc_mmu_book3s_32_xlate; - mmu->reset_msr = kvmppc_mmu_book3s_32_reset_msr; mmu->tlbie = kvmppc_mmu_book3s_32_tlbie; mmu->esid_to_vsid = kvmppc_mmu_book3s_32_esid_to_vsid; mmu->ea_to_vp = kvmppc_mmu_book3s_32_ea_to_vp; diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index 5f63a5f7f24f..599133256a95 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c @@ -24,20 +24,6 @@ #define dprintk(X...) do { } while(0) #endif -static void kvmppc_mmu_book3s_64_reset_msr(struct kvm_vcpu *vcpu) -{ - unsigned long msr = vcpu->arch.intr_msr; - unsigned long cur_msr = kvmppc_get_msr(vcpu); - - /* If transactional, change to suspend mode on IRQ delivery */ - if (MSR_TM_TRANSACTIONAL(cur_msr)) - msr |= MSR_TS_S; - else - msr |= cur_msr & MSR_TS_MASK; - - kvmppc_set_msr(vcpu, msr); -} - static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe( struct kvm_vcpu *vcpu, gva_t eaddr) @@ -676,7 +662,6 @@ void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu) mmu->slbie = kvmppc_mmu_book3s_64_slbie; mmu->slbia = kvmppc_mmu_book3s_64_slbia; mmu->xlate = kvmppc_mmu_book3s_64_xlate; - mmu->reset_msr = kvmppc_mmu_book3s_64_reset_msr; mmu->tlbie = kvmppc_mmu_book3s_64_tlbie; mmu->esid_to_vsid = kvmppc_mmu_book3s_64_esid_to_vsid; mmu->ea_to_vp = kvmppc_mmu_book3s_64_ea_to_vp; diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 9a75f0e1933b..d381526c5c9b 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -275,18 +275,6 @@ int kvmppc_mmu_hv_init(void) return 0; } -static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) -{ - unsigned long msr = vcpu->arch.intr_msr; - - /* If transactional, change to suspend mode on IRQ delivery */ - if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr)) - msr |= MSR_TS_S; - else - msr |= vcpu->arch.shregs.msr & MSR_TS_MASK; - kvmppc_set_msr(vcpu, msr); -} - static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, long pte_index, unsigned long pteh, unsigned long ptel, unsigned long *pte_idx_ret) @@ -508,6 +496,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, struct vm_area_struct *vma; unsigned long rcbits; long mmio_update; + struct mm_struct *mm; if (kvm_is_radix(kvm)) return kvmppc_book3s_radix_page_fault(run, vcpu, ea, dsisr); @@ -584,6 +573,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, is_ci = false; pfn = 0; page = NULL; + mm = current->mm; pte_size = PAGE_SIZE; writing = (dsisr & DSISR_ISSTORE) != 0; /* If writing != 0, then the HPTE must allow writing, if we get here */ @@ -592,8 +582,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, npages = get_user_pages_fast(hva, 1, writing ? FOLL_WRITE : 0, pages); if (npages < 1) { /* Check if it's an I/O mapping */ - down_read(¤t->mm->mmap_sem); - vma = find_vma(current->mm, hva); + down_read(&mm->mmap_sem); + vma = find_vma(mm, hva); if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end && (vma->vm_flags & VM_PFNMAP)) { pfn = vma->vm_pgoff + @@ -602,7 +592,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, is_ci = pte_ci(__pte((pgprot_val(vma->vm_page_prot)))); write_ok = vma->vm_flags & VM_WRITE; } - up_read(¤t->mm->mmap_sem); + up_read(&mm->mmap_sem); if (!pfn) goto out_put; } else { @@ -621,8 +611,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, * hugepage split and collapse. */ local_irq_save(flags); - ptep = find_current_mm_pte(current->mm->pgd, - hva, NULL, NULL); + ptep = find_current_mm_pte(mm->pgd, hva, NULL, NULL); if (ptep) { pte = kvmppc_read_update_linux_pte(ptep, 1); if (__pte_write(pte)) @@ -2000,7 +1989,7 @@ int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf) ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag | O_CLOEXEC); if (ret < 0) { kfree(ctx); - kvm_put_kvm(kvm); + kvm_put_kvm_no_destroy(kvm); return ret; } @@ -2161,7 +2150,6 @@ void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu) vcpu->arch.slb_nr = 32; /* POWER7/POWER8 */ mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate; - mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr; vcpu->arch.hflags |= BOOK3S_HFLAG_SLB; } diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 5834db0a54c6..883a66e76638 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -317,7 +317,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, if (ret >= 0) list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); else - kvm_put_kvm(kvm); + kvm_put_kvm_no_destroy(kvm); mutex_unlock(&kvm->lock); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 709cf1fd4cf4..ec5c0379296a 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -133,7 +133,6 @@ static inline bool nesting_enabled(struct kvm *kvm) /* If set, the threads on each CPU core have to be in the same MMU mode */ static bool no_mixing_hpt_and_radix; -static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); /* @@ -338,18 +337,6 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu) spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); } -static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr) -{ - /* - * Check for illegal transactional state bit combination - * and if we find it, force the TS field to a safe state. - */ - if ((msr & MSR_TS_MASK) == MSR_TS_MASK) - msr &= ~MSR_TS_MASK; - vcpu->arch.shregs.msr = msr; - kvmppc_end_cede(vcpu); -} - static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr) { vcpu->arch.pvr = pvr; @@ -792,6 +779,11 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags, vcpu->arch.dawr = value1; vcpu->arch.dawrx = value2; return H_SUCCESS; + case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE: + /* KVM does not support mflags=2 (AIL=2) */ + if (mflags != 0 && mflags != 3) + return H_UNSUPPORTED_FLAG_START; + return H_TOO_HARD; default: return H_TOO_HARD; } @@ -2454,15 +2446,6 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu) vcpu->arch.timer_running = 1; } -static void kvmppc_end_cede(struct kvm_vcpu *vcpu) -{ - vcpu->arch.ceded = 0; - if (vcpu->arch.timer_running) { - hrtimer_try_to_cancel(&vcpu->arch.dec_timer); - vcpu->arch.timer_running = 0; - } -} - extern int __kvmppc_vcore_entry(void); static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, @@ -5401,6 +5384,7 @@ static struct kvmppc_ops kvm_ops_hv = { .set_one_reg = kvmppc_set_one_reg_hv, .vcpu_load = kvmppc_core_vcpu_load_hv, .vcpu_put = kvmppc_core_vcpu_put_hv, + .inject_interrupt = kvmppc_inject_interrupt_hv, .set_msr = kvmppc_set_msr_hv, .vcpu_run = kvmppc_vcpu_run_hv, .vcpu_create = kvmppc_core_vcpu_create_hv, diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 7c1909657b55..7cd3cf3d366b 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -755,6 +755,71 @@ void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip) local_paca->kvm_hstate.kvm_split_mode = NULL; } +static void kvmppc_end_cede(struct kvm_vcpu *vcpu) +{ + vcpu->arch.ceded = 0; + if (vcpu->arch.timer_running) { + hrtimer_try_to_cancel(&vcpu->arch.dec_timer); + vcpu->arch.timer_running = 0; + } +} + +void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr) +{ + /* + * Check for illegal transactional state bit combination + * and if we find it, force the TS field to a safe state. + */ + if ((msr & MSR_TS_MASK) == MSR_TS_MASK) + msr &= ~MSR_TS_MASK; + vcpu->arch.shregs.msr = msr; + kvmppc_end_cede(vcpu); +} +EXPORT_SYMBOL_GPL(kvmppc_set_msr_hv); + +static void inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags) +{ + unsigned long msr, pc, new_msr, new_pc; + + msr = kvmppc_get_msr(vcpu); + pc = kvmppc_get_pc(vcpu); + new_msr = vcpu->arch.intr_msr; + new_pc = vec; + + /* If transactional, change to suspend mode on IRQ delivery */ + if (MSR_TM_TRANSACTIONAL(msr)) + new_msr |= MSR_TS_S; + else + new_msr |= msr & MSR_TS_MASK; + + /* + * Perform MSR and PC adjustment for LPCR[AIL]=3 if it is set and + * applicable. AIL=2 is not supported. + * + * AIL does not apply to SRESET, MCE, or HMI (which is never + * delivered to the guest), and does not apply if IR=0 or DR=0. + */ + if (vec != BOOK3S_INTERRUPT_SYSTEM_RESET && + vec != BOOK3S_INTERRUPT_MACHINE_CHECK && + (vcpu->arch.vcore->lpcr & LPCR_AIL) == LPCR_AIL_3 && + (msr & (MSR_IR|MSR_DR)) == (MSR_IR|MSR_DR) ) { + new_msr |= MSR_IR | MSR_DR; + new_pc += 0xC000000000004000ULL; + } + + kvmppc_set_srr0(vcpu, pc); + kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags); + kvmppc_set_pc(vcpu, new_pc); + vcpu->arch.shregs.msr = new_msr; +} + +void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags) +{ + inject_interrupt(vcpu, vec, srr1_flags); + kvmppc_end_cede(vcpu); +} +EXPORT_SYMBOL_GPL(kvmppc_inject_interrupt_hv); + /* * Is there a PRIV_DOORBELL pending for the guest (on POWER9)? * Can we inject a Decrementer or a External interrupt? @@ -762,7 +827,6 @@ void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip) void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu) { int ext; - unsigned long vec = 0; unsigned long lpcr; /* Insert EXTERNAL bit into LPCR at the MER bit position */ @@ -774,26 +838,16 @@ void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu) if (vcpu->arch.shregs.msr & MSR_EE) { if (ext) { - vec = BOOK3S_INTERRUPT_EXTERNAL; + inject_interrupt(vcpu, BOOK3S_INTERRUPT_EXTERNAL, 0); } else { long int dec = mfspr(SPRN_DEC); if (!(lpcr & LPCR_LD)) dec = (int) dec; if (dec < 0) - vec = BOOK3S_INTERRUPT_DECREMENTER; + inject_interrupt(vcpu, + BOOK3S_INTERRUPT_DECREMENTER, 0); } } - if (vec) { - unsigned long msr, old_msr = vcpu->arch.shregs.msr; - - kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu)); - kvmppc_set_srr1(vcpu, old_msr); - kvmppc_set_pc(vcpu, vec); - msr = vcpu->arch.intr_msr; - if (MSR_TM_ACTIVE(old_msr)) - msr |= MSR_TS_S; - vcpu->arch.shregs.msr = msr; - } if (vcpu->arch.doorbell_request) { mtspr(SPRN_DPDES, 1); diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index cdf30c6eaf54..dc97e5be76f6 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -1186,7 +1186,7 @@ static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu, forward_to_l1: vcpu->arch.fault_dsisr = flags; if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) { - vcpu->arch.shregs.msr &= ~0x783f0000ul; + vcpu->arch.shregs.msr &= SRR1_MSR_BITS; vcpu->arch.shregs.msr |= flags; } return RESUME_HOST; diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 74a9cfe84aee..faebcbb8c4db 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1921,6 +1921,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mtspr SPRN_PCR, r6 18: /* Signal secondary CPUs to continue */ + li r0, 0 stb r0,VCORE_IN_GUEST(r5) 19: lis r8,0x7fff /* MAX_INT@h */ mtspr SPRN_HDEC,r8 diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index cc65af8fe6f7..ce4fcf76e53e 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -90,7 +90,43 @@ static void kvmppc_fixup_split_real(struct kvm_vcpu *vcpu) kvmppc_set_pc(vcpu, pc | SPLIT_HACK_OFFS); } -void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu); +static void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) { + ulong pc = kvmppc_get_pc(vcpu); + ulong lr = kvmppc_get_lr(vcpu); + if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS) + kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK); + if ((lr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS) + kvmppc_set_lr(vcpu, lr & ~SPLIT_HACK_MASK); + vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK; + } +} + +static void kvmppc_inject_interrupt_pr(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags) +{ + unsigned long msr, pc, new_msr, new_pc; + + kvmppc_unfixup_split_real(vcpu); + + msr = kvmppc_get_msr(vcpu); + pc = kvmppc_get_pc(vcpu); + new_msr = vcpu->arch.intr_msr; + new_pc = to_book3s(vcpu)->hior + vec; + +#ifdef CONFIG_PPC_BOOK3S_64 + /* If transactional, change to suspend mode on IRQ delivery */ + if (MSR_TM_TRANSACTIONAL(msr)) + new_msr |= MSR_TS_S; + else + new_msr |= msr & MSR_TS_MASK; +#endif + + kvmppc_set_srr0(vcpu, pc); + kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags); + kvmppc_set_pc(vcpu, new_pc); + kvmppc_set_msr(vcpu, new_msr); +} static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu) { @@ -1761,6 +1797,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm, #else /* default to book3s_32 (750) */ vcpu->arch.pvr = 0x84202; + vcpu->arch.intr_msr = 0; #endif kvmppc_set_pvr_pr(vcpu, vcpu->arch.pvr); vcpu->arch.slb_nr = 64; @@ -2058,6 +2095,7 @@ static struct kvmppc_ops kvm_ops_pr = { .set_one_reg = kvmppc_set_one_reg_pr, .vcpu_load = kvmppc_core_vcpu_load_pr, .vcpu_put = kvmppc_core_vcpu_put_pr, + .inject_interrupt = kvmppc_inject_interrupt_pr, .set_msr = kvmppc_set_msr_pr, .vcpu_run = kvmppc_vcpu_run_pr, .vcpu_create = kvmppc_core_vcpu_create_pr, diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 591bfb4bfd0f..66858b7d3c6b 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -1211,12 +1211,52 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) vcpu->arch.xive_vcpu = NULL; } +static bool kvmppc_xive_vcpu_id_valid(struct kvmppc_xive *xive, u32 cpu) +{ + /* We have a block of xive->nr_servers VPs. We just need to check + * raw vCPU ids are below the expected limit for this guest's + * core stride ; kvmppc_pack_vcpu_id() will pack them down to an + * index that can be safely used to compute a VP id that belongs + * to the VP block. + */ + return cpu < xive->nr_servers * xive->kvm->arch.emul_smt_mode; +} + +int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp) +{ + u32 vp_id; + + if (!kvmppc_xive_vcpu_id_valid(xive, cpu)) { + pr_devel("Out of bounds !\n"); + return -EINVAL; + } + + if (xive->vp_base == XIVE_INVALID_VP) { + xive->vp_base = xive_native_alloc_vp_block(xive->nr_servers); + pr_devel("VP_Base=%x nr_servers=%d\n", xive->vp_base, xive->nr_servers); + + if (xive->vp_base == XIVE_INVALID_VP) + return -ENOSPC; + } + + vp_id = kvmppc_xive_vp(xive, cpu); + if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) { + pr_devel("Duplicate !\n"); + return -EEXIST; + } + + *vp = vp_id; + + return 0; +} + int kvmppc_xive_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, u32 cpu) { struct kvmppc_xive *xive = dev->private; struct kvmppc_xive_vcpu *xc; int i, r = -EBUSY; + u32 vp_id; pr_devel("connect_vcpu(cpu=%d)\n", cpu); @@ -1228,25 +1268,25 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, return -EPERM; if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT) return -EBUSY; - if (kvmppc_xive_find_server(vcpu->kvm, cpu)) { - pr_devel("Duplicate !\n"); - return -EEXIST; - } - if (cpu >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) { - pr_devel("Out of bounds !\n"); - return -EINVAL; - } - xc = kzalloc(sizeof(*xc), GFP_KERNEL); - if (!xc) - return -ENOMEM; /* We need to synchronize with queue provisioning */ mutex_lock(&xive->lock); + + r = kvmppc_xive_compute_vp_id(xive, cpu, &vp_id); + if (r) + goto bail; + + xc = kzalloc(sizeof(*xc), GFP_KERNEL); + if (!xc) { + r = -ENOMEM; + goto bail; + } + vcpu->arch.xive_vcpu = xc; xc->xive = xive; xc->vcpu = vcpu; xc->server_num = cpu; - xc->vp_id = kvmppc_xive_vp(xive, cpu); + xc->vp_id = vp_id; xc->mfrr = 0xff; xc->valid = true; @@ -1826,6 +1866,43 @@ int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, return 0; } +int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr) +{ + u32 __user *ubufp = (u32 __user *) addr; + u32 nr_servers; + int rc = 0; + + if (get_user(nr_servers, ubufp)) + return -EFAULT; + + pr_devel("%s nr_servers=%u\n", __func__, nr_servers); + + if (!nr_servers || nr_servers > KVM_MAX_VCPU_ID) + return -EINVAL; + + mutex_lock(&xive->lock); + if (xive->vp_base != XIVE_INVALID_VP) + /* The VP block is allocated once and freed when the device + * is released. Better not allow to change its size since its + * used by connect_vcpu to validate vCPU ids are valid (eg, + * setting it back to a higher value could allow connect_vcpu + * to come up with a VP id that goes beyond the VP block, which + * is likely to cause a crash in OPAL). + */ + rc = -EBUSY; + else if (nr_servers > KVM_MAX_VCPUS) + /* We don't need more servers. Higher vCPU ids get packed + * down below KVM_MAX_VCPUS by kvmppc_pack_vcpu_id(). + */ + xive->nr_servers = KVM_MAX_VCPUS; + else + xive->nr_servers = nr_servers; + + mutex_unlock(&xive->lock); + + return rc; +} + static int xive_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { struct kvmppc_xive *xive = dev->private; @@ -1834,6 +1911,11 @@ static int xive_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) switch (attr->group) { case KVM_DEV_XICS_GRP_SOURCES: return xive_set_source(xive, attr->attr, attr->addr); + case KVM_DEV_XICS_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_XICS_NR_SERVERS: + return kvmppc_xive_set_nr_servers(xive, attr->addr); + } } return -ENXIO; } @@ -1859,6 +1941,11 @@ static int xive_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) attr->attr < KVMPPC_XICS_NR_IRQS) return 0; break; + case KVM_DEV_XICS_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_XICS_NR_SERVERS: + return 0; + } } return -ENXIO; } @@ -1993,10 +2080,13 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) { struct kvmppc_xive *xive; struct kvm *kvm = dev->kvm; - int ret = 0; pr_devel("Creating xive for partition\n"); + /* Already there ? */ + if (kvm->arch.xive) + return -EEXIST; + xive = kvmppc_xive_get_device(kvm, type); if (!xive) return -ENOMEM; @@ -2006,12 +2096,6 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) xive->kvm = kvm; mutex_init(&xive->lock); - /* Already there ? */ - if (kvm->arch.xive) - ret = -EEXIST; - else - kvm->arch.xive = xive; - /* We use the default queue size set by the host */ xive->q_order = xive_native_default_eq_shift(); if (xive->q_order < PAGE_SHIFT) @@ -2019,18 +2103,16 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) else xive->q_page_order = xive->q_order - PAGE_SHIFT; - /* Allocate a bunch of VPs */ - xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS); - pr_devel("VP_Base=%x\n", xive->vp_base); - - if (xive->vp_base == XIVE_INVALID_VP) - ret = -ENOMEM; + /* VP allocation is delayed to the first call to connect_vcpu */ + xive->vp_base = XIVE_INVALID_VP; + /* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets + * on a POWER9 system. + */ + xive->nr_servers = KVM_MAX_VCPUS; xive->single_escalation = xive_native_has_single_escalation(); - if (ret) - return ret; - + kvm->arch.xive = xive; return 0; } @@ -2100,9 +2182,9 @@ static int xive_debug_show(struct seq_file *m, void *private) if (!xc) continue; - seq_printf(m, "cpu server %#x CPPR:%#x HWCPPR:%#x" + seq_printf(m, "cpu server %#x VP:%#x CPPR:%#x HWCPPR:%#x" " MFRR:%#x PEND:%#x h_xirr: R=%lld V=%lld\n", - xc->server_num, xc->cppr, xc->hw_cppr, + xc->server_num, xc->vp_id, xc->cppr, xc->hw_cppr, xc->mfrr, xc->pending, xc->stat_rm_h_xirr, xc->stat_vm_h_xirr); diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h index 955b820ffd6d..382e3a56e789 100644 --- a/arch/powerpc/kvm/book3s_xive.h +++ b/arch/powerpc/kvm/book3s_xive.h @@ -135,6 +135,9 @@ struct kvmppc_xive { /* Flags */ u8 single_escalation; + /* Number of entries in the VP block */ + u32 nr_servers; + struct kvmppc_xive_ops *ops; struct address_space *mapping; struct mutex mapping_lock; @@ -220,6 +223,18 @@ static inline u32 kvmppc_xive_vp(struct kvmppc_xive *xive, u32 server) return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server); } +static inline bool kvmppc_xive_vp_in_use(struct kvm *kvm, u32 vp_id) +{ + struct kvm_vcpu *vcpu = NULL; + int i; + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (vcpu->arch.xive_vcpu && vp_id == vcpu->arch.xive_vcpu->vp_id) + return true; + } + return false; +} + /* * Mapping between guest priorities and host priorities * is as follow. @@ -284,6 +299,8 @@ int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type); void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, struct kvmppc_xive_vcpu *xc, int irq); +int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp); +int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr); #endif /* CONFIG_KVM_XICS */ #endif /* _KVM_PPC_BOOK3S_XICS_H */ diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 248c1ea9e788..d83adb1e1490 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -50,6 +50,24 @@ static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio) } } +static int kvmppc_xive_native_configure_queue(u32 vp_id, struct xive_q *q, + u8 prio, __be32 *qpage, + u32 order, bool can_escalate) +{ + int rc; + __be32 *qpage_prev = q->qpage; + + rc = xive_native_configure_queue(vp_id, q, prio, qpage, order, + can_escalate); + if (rc) + return rc; + + if (qpage_prev) + put_page(virt_to_page(qpage_prev)); + + return rc; +} + void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) { struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; @@ -106,6 +124,7 @@ int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, struct kvmppc_xive *xive = dev->private; struct kvmppc_xive_vcpu *xc = NULL; int rc; + u32 vp_id; pr_devel("native_connect_vcpu(server=%d)\n", server_num); @@ -117,18 +136,12 @@ int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, return -EPERM; if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT) return -EBUSY; - if (server_num >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) { - pr_devel("Out of bounds !\n"); - return -EINVAL; - } mutex_lock(&xive->lock); - if (kvmppc_xive_find_server(vcpu->kvm, server_num)) { - pr_devel("Duplicate !\n"); - rc = -EEXIST; + rc = kvmppc_xive_compute_vp_id(xive, server_num, &vp_id); + if (rc) goto bail; - } xc = kzalloc(sizeof(*xc), GFP_KERNEL); if (!xc) { @@ -141,7 +154,7 @@ int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, xc->vcpu = vcpu; xc->server_num = server_num; - xc->vp_id = kvmppc_xive_vp(xive, server_num); + xc->vp_id = vp_id; xc->valid = true; vcpu->arch.irq_type = KVMPPC_IRQ_XIVE; @@ -580,19 +593,14 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, q->guest_qaddr = 0; q->guest_qshift = 0; - rc = xive_native_configure_queue(xc->vp_id, q, priority, - NULL, 0, true); + rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority, + NULL, 0, true); if (rc) { pr_err("Failed to reset queue %d for VCPU %d: %d\n", priority, xc->server_num, rc); return rc; } - if (q->qpage) { - put_page(virt_to_page(q->qpage)); - q->qpage = NULL; - } - return 0; } @@ -622,12 +630,6 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, srcu_idx = srcu_read_lock(&kvm->srcu); gfn = gpa_to_gfn(kvm_eq.qaddr); - page = gfn_to_page(kvm, gfn); - if (is_error_page(page)) { - srcu_read_unlock(&kvm->srcu, srcu_idx); - pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr); - return -EINVAL; - } page_size = kvm_host_page_size(kvm, gfn); if (1ull << kvm_eq.qshift > page_size) { @@ -636,6 +638,13 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, return -EINVAL; } + page = gfn_to_page(kvm, gfn); + if (is_error_page(page)) { + srcu_read_unlock(&kvm->srcu, srcu_idx); + pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr); + return -EINVAL; + } + qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK); srcu_read_unlock(&kvm->srcu, srcu_idx); @@ -651,8 +660,8 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, * OPAL level because the use of END ESBs is not supported by * Linux. */ - rc = xive_native_configure_queue(xc->vp_id, q, priority, - (__be32 *) qaddr, kvm_eq.qshift, true); + rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority, + (__be32 *) qaddr, kvm_eq.qshift, true); if (rc) { pr_err("Failed to configure queue %d for VCPU %d: %d\n", priority, xc->server_num, rc); @@ -926,6 +935,8 @@ static int kvmppc_xive_native_set_attr(struct kvm_device *dev, return kvmppc_xive_reset(xive); case KVM_DEV_XIVE_EQ_SYNC: return kvmppc_xive_native_eq_sync(xive); + case KVM_DEV_XIVE_NR_SERVERS: + return kvmppc_xive_set_nr_servers(xive, attr->addr); } break; case KVM_DEV_XIVE_GRP_SOURCE: @@ -965,6 +976,7 @@ static int kvmppc_xive_native_has_attr(struct kvm_device *dev, switch (attr->attr) { case KVM_DEV_XIVE_RESET: case KVM_DEV_XIVE_EQ_SYNC: + case KVM_DEV_XIVE_NR_SERVERS: return 0; } break; @@ -1065,7 +1077,6 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) { struct kvmppc_xive *xive; struct kvm *kvm = dev->kvm; - int ret = 0; pr_devel("Creating xive native device\n"); @@ -1079,27 +1090,20 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) dev->private = xive; xive->dev = dev; xive->kvm = kvm; - kvm->arch.xive = xive; mutex_init(&xive->mapping_lock); mutex_init(&xive->lock); - /* - * Allocate a bunch of VPs. KVM_MAX_VCPUS is a large value for - * a default. Getting the max number of CPUs the VM was - * configured with would improve our usage of the XIVE VP space. + /* VP allocation is delayed to the first call to connect_vcpu */ + xive->vp_base = XIVE_INVALID_VP; + /* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets + * on a POWER9 system. */ - xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS); - pr_devel("VP_Base=%x\n", xive->vp_base); - - if (xive->vp_base == XIVE_INVALID_VP) - ret = -ENXIO; + xive->nr_servers = KVM_MAX_VCPUS; xive->single_escalation = xive_native_has_single_escalation(); xive->ops = &kvmppc_xive_native_ops; - if (ret) - return ret; - + kvm->arch.xive = xive; return 0; } @@ -1202,8 +1206,8 @@ static int xive_native_debug_show(struct seq_file *m, void *private) if (!xc) continue; - seq_printf(m, "cpu server %#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n", - xc->server_num, + seq_printf(m, "cpu server %#x VP=%#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n", + xc->server_num, xc->vp_id, vcpu->arch.xive_saved_state.nsr, vcpu->arch.xive_saved_state.cppr, vcpu->arch.xive_saved_state.ipb, diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 321db0fdb9db..425d13806645 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -355,9 +355,9 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, if (tlbsel == 1) { struct vm_area_struct *vma; - down_read(¤t->mm->mmap_sem); + down_read(&kvm->mm->mmap_sem); - vma = find_vma(current->mm, hva); + vma = find_vma(kvm->mm, hva); if (vma && hva >= vma->vm_start && (vma->vm_flags & VM_PFNMAP)) { /* @@ -441,7 +441,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1); } - up_read(¤t->mm->mmap_sem); + up_read(&kvm->mm->mmap_sem); } if (likely(!pfnmap)) { diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 3a77bb643452..9e085e931d74 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -522,6 +522,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_IMMEDIATE_EXIT: r = 1; break; + case KVM_CAP_PPC_GUEST_DEBUG_SSTEP: + /* fall through */ case KVM_CAP_PPC_PAIRED_SINGLES: case KVM_CAP_PPC_OSI: case KVM_CAP_PPC_GET_PVINFO: diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index be941d382c8d..c95b7fe9f298 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -31,6 +31,7 @@ #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/memremap.h> +#include <linux/dma-direct.h> #include <asm/pgalloc.h> #include <asm/prom.h> @@ -201,10 +202,10 @@ static int __init mark_nonram_nosave(void) * everything else. GFP_DMA32 page allocations automatically fall back to * ZONE_DMA. * - * By using 31-bit unconditionally, we can exploit ARCH_ZONE_DMA_BITS to - * inform the generic DMA mapping code. 32-bit only devices (if not handled - * by an IOMMU anyway) will take a first dip into ZONE_NORMAL and get - * otherwise served by ZONE_DMA. + * By using 31-bit unconditionally, we can exploit zone_dma_bits to inform the + * generic DMA mapping code. 32-bit only devices (if not handled by an IOMMU + * anyway) will take a first dip into ZONE_NORMAL and get otherwise served by + * ZONE_DMA. */ static unsigned long max_zone_pfns[MAX_NR_ZONES]; @@ -237,9 +238,18 @@ void __init paging_init(void) printk(KERN_DEBUG "Memory hole size: %ldMB\n", (long int)((top_of_ram - total_ram) >> 20)); + /* + * Allow 30-bit DMA for very limited Broadcom wifi chips on many + * powerbooks. + */ + if (IS_ENABLED(CONFIG_PPC32)) + zone_dma_bits = 30; + else + zone_dma_bits = 31; + #ifdef CONFIG_ZONE_DMA max_zone_pfns[ZONE_DMA] = min(max_low_pfn, - 1UL << (ARCH_ZONE_DMA_BITS - PAGE_SHIFT)); + 1UL << (zone_dma_bits - PAGE_SHIFT)); #endif max_zone_pfns[ZONE_NORMAL] = max_low_pfn; #ifdef CONFIG_HIGHMEM diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 02a59946a78a..be3517ef0574 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -1142,6 +1142,19 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) } /* + * If we have seen a tail call, we need a second pass. + * This is because bpf_jit_emit_common_epilogue() is called + * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen. + */ + if (cgctx.seen & SEEN_TAILCALL) { + cgctx.idx = 0; + if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { + fp = org_fp; + goto out_addrs; + } + } + + /* * Pretend to build prologue, given the features we've seen. This will * update ctgtx.idx as it pretends to output instructions, then we can * calculate total size from idx. diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 1d93e55a2de1..2dd452a047cd 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -761,6 +761,7 @@ static int spufs_init_fs_context(struct fs_context *fc) ctx->gid = current_gid(); ctx->mode = 0755; + fc->fs_private = ctx; fc->s_fs_info = sbi; fc->ops = &spufs_context_ops; return 0; diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 6bc24a47e9ef..6f300ab7f0e9 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -42,7 +42,7 @@ void pnv_pcibios_bus_add_device(struct pci_dev *pdev) { struct pci_dn *pdn = pci_get_pdn(pdev); - if (eeh_has_flag(EEH_FORCE_DISABLED)) + if (!pdn || eeh_has_flag(EEH_FORCE_DISABLED)) return; dev_dbg(&pdev->dev, "EEH: Setting up device\n"); diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index fbd6e6b7bbf2..13e251699346 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -146,20 +146,25 @@ static int pnv_smp_cpu_disable(void) return 0; } +static void pnv_flush_interrupts(void) +{ + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + if (xive_enabled()) + xive_flush_interrupt(); + else + icp_opal_flush_interrupt(); + } else { + icp_native_flush_interrupt(); + } +} + static void pnv_smp_cpu_kill_self(void) { + unsigned long srr1, unexpected_mask, wmask; unsigned int cpu; - unsigned long srr1, wmask; u64 lpcr_val; /* Standard hot unplug procedure */ - /* - * This hard disables local interurpts, ensuring we have no lazy - * irqs pending. - */ - WARN_ON(irqs_disabled()); - hard_irq_disable(); - WARN_ON(lazy_irq_pending()); idle_task_exit(); current->active_mm = NULL; /* for sanity */ @@ -173,6 +178,27 @@ static void pnv_smp_cpu_kill_self(void) wmask = SRR1_WAKEMASK_P8; /* + * This turns the irq soft-disabled state we're called with, into a + * hard-disabled state with pending irq_happened interrupts cleared. + * + * PACA_IRQ_DEC - Decrementer should be ignored. + * PACA_IRQ_HMI - Can be ignored, processing is done in real mode. + * PACA_IRQ_DBELL, EE, PMI - Unexpected. + */ + hard_irq_disable(); + if (generic_check_cpu_restart(cpu)) + goto out; + + unexpected_mask = ~(PACA_IRQ_DEC | PACA_IRQ_HMI | PACA_IRQ_HARD_DIS); + if (local_paca->irq_happened & unexpected_mask) { + if (local_paca->irq_happened & PACA_IRQ_EE) + pnv_flush_interrupts(); + DBG("CPU%d Unexpected exit while offline irq_happened=%lx!\n", + cpu, local_paca->irq_happened); + } + local_paca->irq_happened = PACA_IRQ_HARD_DIS; + + /* * We don't want to take decrementer interrupts while we are * offline, so clear LPCR:PECE1. We keep PECE2 (and * LPCR_PECE_HVEE on P9) enabled so as to let IPIs in. @@ -197,6 +223,7 @@ static void pnv_smp_cpu_kill_self(void) srr1 = pnv_cpu_offline(cpu); + WARN_ON_ONCE(!irqs_disabled()); WARN_ON(lazy_irq_pending()); /* @@ -212,13 +239,7 @@ static void pnv_smp_cpu_kill_self(void) */ if (((srr1 & wmask) == SRR1_WAKEEE) || ((srr1 & wmask) == SRR1_WAKEHVI)) { - if (cpu_has_feature(CPU_FTR_ARCH_300)) { - if (xive_enabled()) - xive_flush_interrupt(); - else - icp_opal_flush_interrupt(); - } else - icp_native_flush_interrupt(); + pnv_flush_interrupts(); } else if ((srr1 & wmask) == SRR1_WAKEHDBELL) { unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); asm volatile(PPC_MSGCLR(%0) : : "r" (msg)); @@ -266,7 +287,7 @@ static void pnv_smp_cpu_kill_self(void) */ lpcr_val = mfspr(SPRN_LPCR) | (u64)LPCR_PECE1; pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val); - +out: DBG("CPU%d coming online...\n", cpu); } diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index b53359258d99..f87a5c64e24d 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -1419,6 +1419,9 @@ void __init pseries_lpar_read_hblkrm_characteristics(void) unsigned char local_buffer[SPLPAR_TLB_BIC_MAXLENGTH]; int call_status, len, idx, bpsize; + if (!firmware_has_feature(FW_FEATURE_BLOCK_REMOVE)) + return; + spin_lock(&rtas_data_buf_lock); memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE); call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1, diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 8eebbc8860bb..75a6c9117622 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -164,7 +164,7 @@ config ARCH_RV32I config ARCH_RV64I bool "RV64I" select 64BIT - select ARCH_SUPPORTS_INT128 if GCC_VERSION >= 50000 + select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && GCC_VERSION >= 50000 select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FTRACE_MCOUNT_RECORD diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts index 104d334511cd..88cfcb96bf23 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts @@ -13,6 +13,7 @@ compatible = "sifive,hifive-unleashed-a00", "sifive,fu540-c000"; chosen { + stdout-path = "serial0"; }; cpus { diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index 5a02b7d50940..9c992a88d858 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -22,6 +22,7 @@ #define REG_L __REG_SEL(ld, lw) #define REG_S __REG_SEL(sd, sw) +#define REG_SC __REG_SEL(sc.d, sc.w) #define SZREG __REG_SEL(8, 4) #define LGREG __REG_SEL(3, 2) diff --git a/arch/riscv/include/asm/bug.h b/arch/riscv/include/asm/bug.h index 07ceee8b1747..75604fec1b1b 100644 --- a/arch/riscv/include/asm/bug.h +++ b/arch/riscv/include/asm/bug.h @@ -12,7 +12,6 @@ #include <asm/asm.h> -#ifdef CONFIG_GENERIC_BUG #define __INSN_LENGTH_MASK _UL(0x3) #define __INSN_LENGTH_32 _UL(0x3) #define __COMPRESSED_INSN_MASK _UL(0xffff) @@ -20,7 +19,6 @@ #define __BUG_INSN_32 _UL(0x00100073) /* ebreak */ #define __BUG_INSN_16 _UL(0x9002) /* c.ebreak */ -#ifndef __ASSEMBLY__ typedef u32 bug_insn_t; #ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS @@ -43,6 +41,7 @@ typedef u32 bug_insn_t; RISCV_SHORT " %2" #endif +#ifdef CONFIG_GENERIC_BUG #define __BUG_FLAGS(flags) \ do { \ __asm__ __volatile__ ( \ @@ -58,14 +57,10 @@ do { \ "i" (flags), \ "i" (sizeof(struct bug_entry))); \ } while (0) - -#endif /* !__ASSEMBLY__ */ #else /* CONFIG_GENERIC_BUG */ -#ifndef __ASSEMBLY__ #define __BUG_FLAGS(flags) do { \ __asm__ __volatile__ ("ebreak\n"); \ } while (0) -#endif /* !__ASSEMBLY__ */ #endif /* CONFIG_GENERIC_BUG */ #define BUG() do { \ @@ -79,15 +74,10 @@ do { \ #include <asm-generic/bug.h> -#ifndef __ASSEMBLY__ - struct pt_regs; struct task_struct; -extern void die(struct pt_regs *regs, const char *str); -extern void do_trap(struct pt_regs *regs, int signo, int code, - unsigned long addr); - -#endif /* !__ASSEMBLY__ */ +void die(struct pt_regs *regs, const char *str); +void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr); #endif /* _ASM_RISCV_BUG_H */ diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h index fc1189ad3777..3ba4d93721d3 100644 --- a/arch/riscv/include/asm/io.h +++ b/arch/riscv/include/asm/io.h @@ -13,6 +13,7 @@ #include <linux/types.h> #include <asm/mmiowb.h> +#include <asm/pgtable.h> extern void __iomem *ioremap(phys_addr_t offset, unsigned long size); @@ -162,6 +163,12 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) #endif /* + * I/O port access constants. + */ +#define IO_SPACE_LIMIT (PCI_IO_SIZE - 1) +#define PCI_IOBASE ((void __iomem *)PCI_IO_START) + +/* * Emulation routines for the port-mapped IO space used by some PCI drivers. * These are defined as being "fully synchronous", but also "not guaranteed to * be fully ordered with respect to other memory and I/O operations". We're diff --git a/arch/riscv/include/asm/irq.h b/arch/riscv/include/asm/irq.h index 75576424c0f7..6e1b0e0325eb 100644 --- a/arch/riscv/include/asm/irq.h +++ b/arch/riscv/include/asm/irq.h @@ -7,6 +7,9 @@ #ifndef _ASM_RISCV_IRQ_H #define _ASM_RISCV_IRQ_H +#include <linux/interrupt.h> +#include <linux/linkage.h> + #define NR_IRQS 0 void riscv_timer_interrupt(void); diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 7255f2d8395b..d3221017194d 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -7,6 +7,7 @@ #define _ASM_RISCV_PGTABLE_H #include <linux/mmzone.h> +#include <linux/sizes.h> #include <asm/pgtable-bits.h> @@ -86,14 +87,7 @@ extern pgd_t swapper_pg_dir[]; #define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) #define VMALLOC_END (PAGE_OFFSET - 1) #define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) - -#define FIXADDR_TOP VMALLOC_START -#ifdef CONFIG_64BIT -#define FIXADDR_SIZE PMD_SIZE -#else -#define FIXADDR_SIZE PGDIR_SIZE -#endif -#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) +#define PCI_IO_SIZE SZ_16M /* * Roughly size the vmemmap space to be large enough to fit enough @@ -108,6 +102,17 @@ extern pgd_t swapper_pg_dir[]; #define vmemmap ((struct page *)VMEMMAP_START) +#define PCI_IO_END VMEMMAP_START +#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) +#define FIXADDR_TOP PCI_IO_START + +#ifdef CONFIG_64BIT +#define FIXADDR_SIZE PMD_SIZE +#else +#define FIXADDR_SIZE PGDIR_SIZE +#endif +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) + /* * ZERO_PAGE is a global shared page that is always zero, * used for zero-mapped memory areas, etc. @@ -184,10 +189,7 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot) return __pte((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot)); } -static inline pte_t mk_pte(struct page *page, pgprot_t prot) -{ - return pfn_pte(page_to_pfn(page), prot); -} +#define mk_pte(page, prot) pfn_pte(page_to_pfn(page), prot) #define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) @@ -428,9 +430,7 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -#ifdef CONFIG_FLATMEM #define kern_addr_valid(addr) (1) /* FIXME */ -#endif extern void *dtb_early_va; extern void setup_bootmem(void); diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h index f0227bdce0f0..ee4f0ac62c9d 100644 --- a/arch/riscv/include/asm/switch_to.h +++ b/arch/riscv/include/asm/switch_to.h @@ -6,6 +6,7 @@ #ifndef _ASM_RISCV_SWITCH_TO_H #define _ASM_RISCV_SWITCH_TO_H +#include <linux/sched/task_stack.h> #include <asm/processor.h> #include <asm/ptrace.h> #include <asm/csr.h> diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 37ae4e367ad2..f02188a5b0f4 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -10,10 +10,6 @@ #include <linux/mm_types.h> #include <asm/smp.h> -/* - * Flush entire local TLB. 'sfence.vma' implicitly fences with the instruction - * cache as well, so a 'fence.i' is not necessary. - */ static inline void local_flush_tlb_all(void) { __asm__ __volatile__ ("sfence.vma" : : : "memory"); diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index b1ade9a49347..a5ad00043104 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -10,6 +10,7 @@ #include <asm/processor.h> #include <asm/hwcap.h> #include <asm/smp.h> +#include <asm/switch_to.h> unsigned long elf_hwcap __read_mostly; #ifdef CONFIG_FPU diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index da7aa88113c2..8ca479831142 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -98,7 +98,26 @@ _save_context: */ .macro RESTORE_ALL REG_L a0, PT_SSTATUS(sp) - REG_L a2, PT_SEPC(sp) + /* + * The current load reservation is effectively part of the processor's + * state, in the sense that load reservations cannot be shared between + * different hart contexts. We can't actually save and restore a load + * reservation, so instead here we clear any existing reservation -- + * it's always legal for implementations to clear load reservations at + * any point (as long as the forward progress guarantee is kept, but + * we'll ignore that here). + * + * Dangling load reservations can be the result of taking a trap in the + * middle of an LR/SC sequence, but can also be the result of a taken + * forward branch around an SC -- which is how we implement CAS. As a + * result we need to clear reservations between the last CAS and the + * jump back to the new context. While it is unlikely the store + * completes, implementations are allowed to expand reservations to be + * arbitrarily large. + */ + REG_L a2, PT_SEPC(sp) + REG_SC x0, a2, PT_SEPC(sp) + csrw CSR_SSTATUS, a0 csrw CSR_SEPC, a2 @@ -254,12 +273,11 @@ restore_all: resume_kernel: REG_L s0, TASK_TI_PREEMPT_COUNT(tp) bnez s0, restore_all -need_resched: REG_L s0, TASK_TI_FLAGS(tp) andi s0, s0, _TIF_NEED_RESCHED beqz s0, restore_all call preempt_schedule_irq - j need_resched + j restore_all #endif work_pending: diff --git a/arch/riscv/kernel/head.h b/arch/riscv/kernel/head.h new file mode 100644 index 000000000000..105fb0496b24 --- /dev/null +++ b/arch/riscv/kernel/head.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 SiFive, Inc. + */ +#ifndef __ASM_HEAD_H +#define __ASM_HEAD_H + +#include <linux/linkage.h> +#include <linux/init.h> + +extern atomic_t hart_lottery; + +asmlinkage void do_page_fault(struct pt_regs *regs); +asmlinkage void __init setup_vm(uintptr_t dtb_pa); + +extern void *__cpu_up_stack_pointer[]; +extern void *__cpu_up_task_pointer[]; + +void __init parse_dtb(void); + +#endif /* __ASM_HEAD_H */ diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c index 6d8659388c49..fffac6ddb0e0 100644 --- a/arch/riscv/kernel/irq.c +++ b/arch/riscv/kernel/irq.c @@ -24,7 +24,7 @@ int arch_show_interrupts(struct seq_file *p, int prec) return 0; } -asmlinkage void __irq_entry do_IRQ(struct pt_regs *regs) +asmlinkage __visible void __irq_entry do_IRQ(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); diff --git a/arch/riscv/kernel/module-sections.c b/arch/riscv/kernel/module-sections.c index c9ae48333114..e264e59e596e 100644 --- a/arch/riscv/kernel/module-sections.c +++ b/arch/riscv/kernel/module-sections.c @@ -8,6 +8,7 @@ #include <linux/elf.h> #include <linux/kernel.h> #include <linux/module.h> +#include <linux/moduleloader.h> unsigned long module_emit_got_entry(struct module *mod, unsigned long val) { diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 70bb94ae61c5..b7401858d872 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -315,8 +315,8 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, /* Ignore unresolved weak symbol */ if (ELF_ST_BIND(sym->st_info) == STB_WEAK) continue; - pr_warning("%s: Unknown symbol %s\n", - me->name, strtab + sym->st_name); + pr_warn("%s: Unknown symbol %s\n", + me->name, strtab + sym->st_name); return -ENOENT; } diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index fb3a082362eb..85e3c39bb60b 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -7,6 +7,7 @@ * Copyright (C) 2017 SiFive */ +#include <linux/cpu.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/sched/task_stack.h> @@ -19,6 +20,7 @@ #include <asm/csr.h> #include <asm/string.h> #include <asm/switch_to.h> +#include <asm/thread_info.h> extern asmlinkage void ret_from_fork(void); extern asmlinkage void ret_from_kernel_thread(void); diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index 368751438366..1252113ef8b2 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -148,7 +148,7 @@ long arch_ptrace(struct task_struct *child, long request, * Allows PTRACE_SYSCALL to work. These are called from entry.S in * {handle,ret_from}_syscall. */ -void do_syscall_trace_enter(struct pt_regs *regs) +__visible void do_syscall_trace_enter(struct pt_regs *regs) { if (test_thread_flag(TIF_SYSCALL_TRACE)) if (tracehook_report_syscall_entry(regs)) @@ -162,7 +162,7 @@ void do_syscall_trace_enter(struct pt_regs *regs) audit_syscall_entry(regs->a7, regs->a0, regs->a1, regs->a2, regs->a3); } -void do_syscall_trace_exit(struct pt_regs *regs) +__visible void do_syscall_trace_exit(struct pt_regs *regs) { audit_syscall_exit(regs); diff --git a/arch/riscv/kernel/reset.c b/arch/riscv/kernel/reset.c index d0fe623bfb8f..aa56bb135ec4 100644 --- a/arch/riscv/kernel/reset.c +++ b/arch/riscv/kernel/reset.c @@ -4,6 +4,7 @@ */ #include <linux/reboot.h> +#include <linux/pm.h> #include <asm/sbi.h> static void default_power_off(void) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index a990a6cb184f..845ae0e12115 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -24,6 +24,8 @@ #include <asm/tlbflush.h> #include <asm/thread_info.h> +#include "head.h" + #ifdef CONFIG_DUMMY_CONSOLE struct screen_info screen_info = { .orig_video_lines = 30, diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index b14d7647d800..d0f6f212f5df 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -26,7 +26,7 @@ struct rt_sigframe { #ifdef CONFIG_FPU static long restore_fp_state(struct pt_regs *regs, - union __riscv_fp_state *sc_fpregs) + union __riscv_fp_state __user *sc_fpregs) { long err; struct __riscv_d_ext_state __user *state = &sc_fpregs->d; @@ -53,7 +53,7 @@ static long restore_fp_state(struct pt_regs *regs, } static long save_fp_state(struct pt_regs *regs, - union __riscv_fp_state *sc_fpregs) + union __riscv_fp_state __user *sc_fpregs) { long err; struct __riscv_d_ext_state __user *state = &sc_fpregs->d; @@ -292,8 +292,8 @@ static void do_signal(struct pt_regs *regs) * notification of userspace execution resumption * - triggered by the _TIF_WORK_MASK flags */ -asmlinkage void do_notify_resume(struct pt_regs *regs, - unsigned long thread_info_flags) +asmlinkage __visible void do_notify_resume(struct pt_regs *regs, + unsigned long thread_info_flags) { /* Handle pending signal delivery */ if (thread_info_flags & _TIF_SIGPENDING) diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index b18cd6c8e8fb..5c9ec78422c2 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -8,7 +8,9 @@ * Copyright (C) 2017 SiFive */ +#include <linux/cpu.h> #include <linux/interrupt.h> +#include <linux/profile.h> #include <linux/smp.h> #include <linux/sched.h> #include <linux/seq_file.h> diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 18ae6da5115e..261f4087cc39 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -29,6 +29,9 @@ #include <asm/tlbflush.h> #include <asm/sections.h> #include <asm/sbi.h> +#include <asm/smp.h> + +#include "head.h" void *__cpu_up_stack_pointer[NR_CPUS]; void *__cpu_up_task_pointer[NR_CPUS]; @@ -130,7 +133,7 @@ void __init smp_cpus_done(unsigned int max_cpus) /* * C entry point for a secondary processor. */ -asmlinkage void __init smp_callin(void) +asmlinkage __visible void __init smp_callin(void) { struct mm_struct *mm = &init_mm; diff --git a/arch/riscv/kernel/syscall_table.c b/arch/riscv/kernel/syscall_table.c index e5dd52d8f633..f1ead9df96ca 100644 --- a/arch/riscv/kernel/syscall_table.c +++ b/arch/riscv/kernel/syscall_table.c @@ -8,6 +8,7 @@ #include <linux/syscalls.h> #include <asm-generic/syscalls.h> #include <asm/vdso.h> +#include <asm/syscall.h> #undef __SYSCALL #define __SYSCALL(nr, call) [nr] = (call), diff --git a/arch/riscv/kernel/time.c b/arch/riscv/kernel/time.c index 9dd1f2e64db1..6a53c02e9c73 100644 --- a/arch/riscv/kernel/time.c +++ b/arch/riscv/kernel/time.c @@ -7,6 +7,7 @@ #include <linux/clocksource.h> #include <linux/delay.h> #include <asm/sbi.h> +#include <asm/processor.h> unsigned long riscv_timebase; EXPORT_SYMBOL_GPL(riscv_timebase); diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 424eb72d56b1..473de3ae8bb7 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -3,6 +3,7 @@ * Copyright (C) 2012 Regents of the University of California */ +#include <linux/cpu.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/sched.h> @@ -83,7 +84,7 @@ static void do_trap_error(struct pt_regs *regs, int signo, int code, } #define DO_ERROR_INFO(name, signo, code, str) \ -asmlinkage void name(struct pt_regs *regs) \ +asmlinkage __visible void name(struct pt_regs *regs) \ { \ do_trap_error(regs, signo, code, regs->sepc, "Oops - " str); \ } @@ -111,7 +112,6 @@ DO_ERROR_INFO(do_trap_ecall_s, DO_ERROR_INFO(do_trap_ecall_m, SIGILL, ILL_ILLTRP, "environment call from M-mode"); -#ifdef CONFIG_GENERIC_BUG static inline unsigned long get_break_insn_length(unsigned long pc) { bug_insn_t insn; @@ -120,28 +120,15 @@ static inline unsigned long get_break_insn_length(unsigned long pc) return 0; return (((insn & __INSN_LENGTH_MASK) == __INSN_LENGTH_32) ? 4UL : 2UL); } -#endif /* CONFIG_GENERIC_BUG */ -asmlinkage void do_trap_break(struct pt_regs *regs) +asmlinkage __visible void do_trap_break(struct pt_regs *regs) { -#ifdef CONFIG_GENERIC_BUG - if (!user_mode(regs)) { - enum bug_trap_type type; - - type = report_bug(regs->sepc, regs); - switch (type) { - case BUG_TRAP_TYPE_NONE: - break; - case BUG_TRAP_TYPE_WARN: - regs->sepc += get_break_insn_length(regs->sepc); - break; - case BUG_TRAP_TYPE_BUG: - die(regs, "Kernel BUG"); - } - } -#endif /* CONFIG_GENERIC_BUG */ - - force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)(regs->sepc)); + if (user_mode(regs)) + force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->sepc); + else if (report_bug(regs->sepc, regs) == BUG_TRAP_TYPE_WARN) + regs->sepc += get_break_insn_length(regs->sepc); + else + die(regs, "Kernel BUG"); } #ifdef CONFIG_GENERIC_BUG diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index c9c21e0d5641..484d95a70907 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -6,6 +6,7 @@ * Copyright (C) 2015 Regents of the University of California */ +#include <linux/elf.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/binfmts.h> @@ -25,7 +26,7 @@ static union { struct vdso_data data; u8 page[PAGE_SIZE]; } vdso_data_store __page_aligned_data; -struct vdso_data *vdso_data = &vdso_data_store.data; +static struct vdso_data *vdso_data = &vdso_data_store.data; static int __init vdso_init(void) { diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index beeb5d7f92ea..ca66d44156b6 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -7,6 +7,7 @@ #include <linux/mm.h> #include <asm/tlbflush.h> #include <asm/cacheflush.h> +#include <asm/mmu_context.h> /* * When necessary, performs a deferred icache flush for the given MM context, diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 96add1427a75..247b8c859c44 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -18,6 +18,8 @@ #include <asm/ptrace.h> #include <asm/tlbflush.h> +#include "../kernel/head.h" + /* * This routine handles page faults. It determines the address and the * problem, and then passes it off to one of the appropriate routines. diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index f0ba71304b6e..573463d1c799 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -11,6 +11,7 @@ #include <linux/swap.h> #include <linux/sizes.h> #include <linux/of_fdt.h> +#include <linux/libfdt.h> #include <asm/fixmap.h> #include <asm/tlbflush.h> @@ -18,6 +19,8 @@ #include <asm/pgtable.h> #include <asm/io.h> +#include "../kernel/head.h" + unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); @@ -82,6 +85,8 @@ disable: } #endif /* CONFIG_BLK_DEV_INITRD */ +static phys_addr_t dtb_early_pa __initdata; + void __init setup_bootmem(void) { struct memblock_region *reg; @@ -117,7 +122,12 @@ void __init setup_bootmem(void) setup_initrd(); #endif /* CONFIG_BLK_DEV_INITRD */ - early_init_fdt_reserve_self(); + /* + * Avoid using early_init_fdt_reserve_self() since __pa() does + * not work for DTB pointers that are fixmap addresses + */ + memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va)); + early_init_fdt_scan_reserved_mem(); memblock_allow_resize(); memblock_dump_all(); @@ -329,8 +339,7 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size) */ #ifndef __riscv_cmodel_medany -#error "setup_vm() is called from head.S before relocate so it should " - "not use absolute addressing." +#error "setup_vm() is called from head.S before relocate so it should not use absolute addressing." #endif asmlinkage void __init setup_vm(uintptr_t dtb_pa) @@ -393,6 +402,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) /* Save pointer to DTB for early FDT parsing */ dtb_early_va = (void *)fix_to_virt(FIX_FDT) + (dtb_pa & ~PAGE_MASK); + /* Save physical address for memblock reservation */ + dtb_early_pa = dtb_pa; } static void __init setup_vm_final(void) @@ -448,7 +459,7 @@ void __init paging_init(void) zone_sizes_init(); } -#ifdef CONFIG_SPARSEMEM +#ifdef CONFIG_SPARSEMEM_VMEMMAP int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap) { diff --git a/arch/riscv/mm/sifive_l2_cache.c b/arch/riscv/mm/sifive_l2_cache.c index 2e637ad71c05..a9ffff3277c7 100644 --- a/arch/riscv/mm/sifive_l2_cache.c +++ b/arch/riscv/mm/sifive_l2_cache.c @@ -142,7 +142,7 @@ static irqreturn_t l2_int_handler(int irq, void *device) return IRQ_HANDLED; } -int __init sifive_l2_init(void) +static int __init sifive_l2_init(void) { struct device_node *np; struct resource res; diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 43a81d0ad507..f0df9e48e651 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -246,8 +246,8 @@ choice config MARCH_Z900 bool "IBM zSeries model z800 and z900" - depends on !CC_IS_CLANG select HAVE_MARCH_Z900_FEATURES + depends on $(cc-option,-march=z900) help Select this to enable optimizations for model z800/z900 (2064 and 2066 series). This will enable some optimizations that are not @@ -255,8 +255,8 @@ config MARCH_Z900 config MARCH_Z990 bool "IBM zSeries model z890 and z990" - depends on !CC_IS_CLANG select HAVE_MARCH_Z990_FEATURES + depends on $(cc-option,-march=z990) help Select this to enable optimizations for model z890/z990 (2084 and 2086 series). The kernel will be slightly faster but will not work @@ -264,8 +264,8 @@ config MARCH_Z990 config MARCH_Z9_109 bool "IBM System z9" - depends on !CC_IS_CLANG select HAVE_MARCH_Z9_109_FEATURES + depends on $(cc-option,-march=z9-109) help Select this to enable optimizations for IBM System z9 (2094 and 2096 series). The kernel will be slightly faster but will not work @@ -274,6 +274,7 @@ config MARCH_Z9_109 config MARCH_Z10 bool "IBM System z10" select HAVE_MARCH_Z10_FEATURES + depends on $(cc-option,-march=z10) help Select this to enable optimizations for IBM System z10 (2097 and 2098 series). The kernel will be slightly faster but will not work @@ -282,6 +283,7 @@ config MARCH_Z10 config MARCH_Z196 bool "IBM zEnterprise 114 and 196" select HAVE_MARCH_Z196_FEATURES + depends on $(cc-option,-march=z196) help Select this to enable optimizations for IBM zEnterprise 114 and 196 (2818 and 2817 series). The kernel will be slightly faster but will @@ -290,6 +292,7 @@ config MARCH_Z196 config MARCH_ZEC12 bool "IBM zBC12 and zEC12" select HAVE_MARCH_ZEC12_FEATURES + depends on $(cc-option,-march=zEC12) help Select this to enable optimizations for IBM zBC12 and zEC12 (2828 and 2827 series). The kernel will be slightly faster but will not work on @@ -298,6 +301,7 @@ config MARCH_ZEC12 config MARCH_Z13 bool "IBM z13s and z13" select HAVE_MARCH_Z13_FEATURES + depends on $(cc-option,-march=z13) help Select this to enable optimizations for IBM z13s and z13 (2965 and 2964 series). The kernel will be slightly faster but will not work on @@ -306,6 +310,7 @@ config MARCH_Z13 config MARCH_Z14 bool "IBM z14 ZR1 and z14" select HAVE_MARCH_Z14_FEATURES + depends on $(cc-option,-march=z14) help Select this to enable optimizations for IBM z14 ZR1 and z14 (3907 and 3906 series). The kernel will be slightly faster but will not @@ -314,6 +319,7 @@ config MARCH_Z14 config MARCH_Z15 bool "IBM z15" select HAVE_MARCH_Z15_FEATURES + depends on $(cc-option,-march=z15) help Select this to enable optimizations for IBM z15 (8562 and 8561 series). The kernel will be slightly faster but will not @@ -367,33 +373,39 @@ config TUNE_DEFAULT config TUNE_Z900 bool "IBM zSeries model z800 and z900" - depends on !CC_IS_CLANG + depends on $(cc-option,-mtune=z900) config TUNE_Z990 bool "IBM zSeries model z890 and z990" - depends on !CC_IS_CLANG + depends on $(cc-option,-mtune=z990) config TUNE_Z9_109 bool "IBM System z9" - depends on !CC_IS_CLANG + depends on $(cc-option,-mtune=z9-109) config TUNE_Z10 bool "IBM System z10" + depends on $(cc-option,-mtune=z10) config TUNE_Z196 bool "IBM zEnterprise 114 and 196" + depends on $(cc-option,-mtune=z196) config TUNE_ZEC12 bool "IBM zBC12 and zEC12" + depends on $(cc-option,-mtune=zEC12) config TUNE_Z13 - bool "IBM z13" + bool "IBM z13s and z13" + depends on $(cc-option,-mtune=z13) config TUNE_Z14 - bool "IBM z14" + bool "IBM z14 ZR1 and z14" + depends on $(cc-option,-mtune=z14) config TUNE_Z15 bool "IBM z15" + depends on $(cc-option,-mtune=z15) endchoice diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 596ca7cc4d7b..fbd341ea03b8 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -46,7 +46,7 @@ struct diag_ops __bootdata_preserved(diag_dma_ops) = { .diag0c = _diag0c_dma, .diag308_reset = _diag308_reset_dma }; -static struct diag210 _diag210_tmp_dma __section(".dma.data"); +static struct diag210 _diag210_tmp_dma __section(.dma.data); struct diag210 *__bootdata_preserved(__diag210_tmp_dma) = &_diag210_tmp_dma; void _swsusp_reset_dma(void); unsigned long __bootdata_preserved(__swsusp_reset_dma) = __pa(_swsusp_reset_dma); @@ -101,10 +101,18 @@ static void handle_relocs(unsigned long offset) dynsym = (Elf64_Sym *) vmlinux.dynsym_start; for (rela = rela_start; rela < rela_end; rela++) { loc = rela->r_offset + offset; - val = rela->r_addend + offset; + val = rela->r_addend; r_sym = ELF64_R_SYM(rela->r_info); - if (r_sym) - val += dynsym[r_sym].st_value; + if (r_sym) { + if (dynsym[r_sym].st_shndx != SHN_UNDEF) + val += dynsym[r_sym].st_value + offset; + } else { + /* + * 0 == undefined symbol table index (STN_UNDEF), + * used for R_390_RELATIVE, only add KASLR offset + */ + val += offset; + } r_type = ELF64_R_TYPE(rela->r_info); rc = arch_kexec_do_relocs(r_type, (void *) loc, val, 0); if (rc) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 347f48702edb..38d64030aacf 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -44,6 +44,7 @@ CONFIG_NR_CPUS=512 CONFIG_NUMA=y CONFIG_HZ_100=y CONFIG_KEXEC_FILE=y +CONFIG_KEXEC_SIG=y CONFIG_EXPOLINE=y CONFIG_EXPOLINE_AUTO=y CONFIG_CHSC_SCH=y @@ -69,12 +70,13 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y -CONFIG_MODULE_SIG=y CONFIG_MODULE_SIG_SHA256=y +CONFIG_UNUSED_SYMBOLS=y CONFIG_BLK_DEV_INTEGRITY=y CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y CONFIG_BLK_CGROUP_IOLATENCY=y +CONFIG_BLK_CGROUP_IOCOST=y CONFIG_PARTITION_ADVANCED=y CONFIG_IBM_PARTITION=y CONFIG_BSD_DISKLABEL=y @@ -370,6 +372,7 @@ CONFIG_NETLINK_DIAG=m CONFIG_CGROUP_NET_PRIO=y CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m +# CONFIG_NET_DROP_MONITOR is not set CONFIG_PCI=y CONFIG_PCI_DEBUG=y CONFIG_HOTPLUG_PCI=y @@ -424,6 +427,7 @@ CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m CONFIG_DM_THIN_PROVISIONING=m CONFIG_DM_WRITECACHE=m +CONFIG_DM_CLONE=m CONFIG_DM_MIRROR=m CONFIG_DM_LOG_USERSPACE=m CONFIG_DM_RAID=m @@ -435,6 +439,7 @@ CONFIG_DM_DELAY=m CONFIG_DM_UEVENT=y CONFIG_DM_FLAKEY=m CONFIG_DM_VERITY=m +CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y CONFIG_DM_SWITCH=m CONFIG_NETDEVICES=y CONFIG_BONDING=m @@ -489,6 +494,7 @@ CONFIG_MLX5_CORE_EN=y # CONFIG_NET_VENDOR_NVIDIA is not set # CONFIG_NET_VENDOR_OKI is not set # CONFIG_NET_VENDOR_PACKET_ENGINES is not set +# CONFIG_NET_VENDOR_PENSANDO is not set # CONFIG_NET_VENDOR_QLOGIC is not set # CONFIG_NET_VENDOR_QUALCOMM is not set # CONFIG_NET_VENDOR_RDC is not set @@ -538,15 +544,16 @@ CONFIG_WATCHDOG=y CONFIG_WATCHDOG_NOWAYOUT=y CONFIG_SOFT_WATCHDOG=m CONFIG_DIAG288_WATCHDOG=m -CONFIG_DRM=y -CONFIG_DRM_VIRTIO_GPU=y +CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y # CONFIG_HID is not set # CONFIG_USB_SUPPORT is not set CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_MLX4_INFINIBAND=m CONFIG_MLX5_INFINIBAND=m +CONFIG_SYNC_FILE=y CONFIG_VFIO=m CONFIG_VFIO_PCI=m CONFIG_VFIO_MDEV=m @@ -580,6 +587,8 @@ CONFIG_NILFS2_FS=m CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y +CONFIG_FS_VERITY=y +CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y CONFIG_FANOTIFY=y CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y CONFIG_QUOTA_NETLINK_INTERFACE=y @@ -589,6 +598,7 @@ CONFIG_QFMT_V2=m CONFIG_AUTOFS4_FS=m CONFIG_FUSE_FS=y CONFIG_CUSE=m +CONFIG_VIRTIO_FS=m CONFIG_OVERLAY_FS=m CONFIG_FSCACHE=m CONFIG_CACHEFILES=m @@ -648,12 +658,15 @@ CONFIG_FORTIFY_SOURCE=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_SECURITY_LOCKDOWN_LSM=y +CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y CONFIG_INTEGRITY_SIGNATURE=y CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y CONFIG_IMA=y CONFIG_IMA_DEFAULT_HASH_SHA256=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_APPRAISE=y +CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor" CONFIG_CRYPTO_USER=m # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set CONFIG_CRYPTO_PCRYPT=m @@ -664,10 +677,6 @@ CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_AEGIS128L=m -CONFIG_CRYPTO_AEGIS256=m -CONFIG_CRYPTO_MORUS640=m -CONFIG_CRYPTO_MORUS1280=m CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m @@ -739,7 +748,6 @@ CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_DWARF4=y CONFIG_GDB_SCRIPTS=y CONFIG_FRAME_WARN=1024 -CONFIG_UNUSED_SYMBOLS=y CONFIG_HEADERS_INSTALL=y CONFIG_HEADERS_CHECK=y CONFIG_DEBUG_SECTION_MISMATCH=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 8514b8b9500f..25f799849582 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -44,6 +44,7 @@ CONFIG_NUMA=y # CONFIG_NUMA_EMU is not set CONFIG_HZ_100=y CONFIG_KEXEC_FILE=y +CONFIG_KEXEC_SIG=y CONFIG_EXPOLINE=y CONFIG_EXPOLINE_AUTO=y CONFIG_CHSC_SCH=y @@ -66,11 +67,12 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y -CONFIG_MODULE_SIG=y CONFIG_MODULE_SIG_SHA256=y +CONFIG_UNUSED_SYMBOLS=y CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y CONFIG_BLK_CGROUP_IOLATENCY=y +CONFIG_BLK_CGROUP_IOCOST=y CONFIG_PARTITION_ADVANCED=y CONFIG_IBM_PARTITION=y CONFIG_BSD_DISKLABEL=y @@ -363,6 +365,7 @@ CONFIG_NETLINK_DIAG=m CONFIG_CGROUP_NET_PRIO=y CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m +# CONFIG_NET_DROP_MONITOR is not set CONFIG_PCI=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y @@ -418,6 +421,7 @@ CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m CONFIG_DM_THIN_PROVISIONING=m CONFIG_DM_WRITECACHE=m +CONFIG_DM_CLONE=m CONFIG_DM_MIRROR=m CONFIG_DM_LOG_USERSPACE=m CONFIG_DM_RAID=m @@ -429,6 +433,7 @@ CONFIG_DM_DELAY=m CONFIG_DM_UEVENT=y CONFIG_DM_FLAKEY=m CONFIG_DM_VERITY=m +CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y CONFIG_DM_SWITCH=m CONFIG_DM_INTEGRITY=m CONFIG_NETDEVICES=y @@ -484,6 +489,7 @@ CONFIG_MLX5_CORE_EN=y # CONFIG_NET_VENDOR_NVIDIA is not set # CONFIG_NET_VENDOR_OKI is not set # CONFIG_NET_VENDOR_PACKET_ENGINES is not set +# CONFIG_NET_VENDOR_PENSANDO is not set # CONFIG_NET_VENDOR_QLOGIC is not set # CONFIG_NET_VENDOR_QUALCOMM is not set # CONFIG_NET_VENDOR_RDC is not set @@ -533,16 +539,16 @@ CONFIG_WATCHDOG_CORE=y CONFIG_WATCHDOG_NOWAYOUT=y CONFIG_SOFT_WATCHDOG=m CONFIG_DIAG288_WATCHDOG=m -CONFIG_DRM=y -CONFIG_DRM_VIRTIO_GPU=y -# CONFIG_BACKLIGHT_CLASS_DEVICE is not set +CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y # CONFIG_HID is not set # CONFIG_USB_SUPPORT is not set CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_MLX4_INFINIBAND=m CONFIG_MLX5_INFINIBAND=m +CONFIG_SYNC_FILE=y CONFIG_VFIO=m CONFIG_VFIO_PCI=m CONFIG_VFIO_MDEV=m @@ -573,6 +579,8 @@ CONFIG_NILFS2_FS=m CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y +CONFIG_FS_VERITY=y +CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y CONFIG_FANOTIFY=y CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y CONFIG_QUOTA_NETLINK_INTERFACE=y @@ -581,6 +589,7 @@ CONFIG_QFMT_V2=m CONFIG_AUTOFS4_FS=m CONFIG_FUSE_FS=y CONFIG_CUSE=m +CONFIG_VIRTIO_FS=m CONFIG_OVERLAY_FS=m CONFIG_FSCACHE=m CONFIG_CACHEFILES=m @@ -639,12 +648,15 @@ CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_SECURITY_LOCKDOWN_LSM=y +CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y CONFIG_INTEGRITY_SIGNATURE=y CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y CONFIG_IMA=y CONFIG_IMA_DEFAULT_HASH_SHA256=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_APPRAISE=y +CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor" CONFIG_CRYPTO_FIPS=y CONFIG_CRYPTO_USER=m # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set @@ -656,10 +668,6 @@ CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_AEGIS128L=m -CONFIG_CRYPTO_AEGIS256=m -CONFIG_CRYPTO_MORUS640=m -CONFIG_CRYPTO_MORUS1280=m CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_OFB=m @@ -727,7 +735,6 @@ CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_DWARF4=y CONFIG_GDB_SCRIPTS=y CONFIG_FRAME_WARN=1024 -CONFIG_UNUSED_SYMBOLS=y CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_MEMORY_INIT=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index be09a208b608..20c51e5d9353 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -61,7 +61,7 @@ CONFIG_RAW_DRIVER=y CONFIG_CONFIGFS_FS=y # CONFIG_MISC_FILESYSTEMS is not set # CONFIG_NETWORK_FILESYSTEMS is not set -# CONFIG_DIMLIB is not set +CONFIG_LSM="yama,loadpin,safesetid,integrity" CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_FS=y diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 9803e96d2924..ead0b2c9881d 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -44,7 +44,7 @@ struct s390_aes_ctx { int key_len; unsigned long fc; union { - struct crypto_sync_skcipher *blk; + struct crypto_skcipher *skcipher; struct crypto_cipher *cip; } fallback; }; @@ -54,7 +54,7 @@ struct s390_xts_ctx { u8 pcc_key[32]; int key_len; unsigned long fc; - struct crypto_sync_skcipher *fallback; + struct crypto_skcipher *fallback; }; struct gcm_sg_walk { @@ -178,66 +178,41 @@ static struct crypto_alg aes_alg = { } }; -static int setkey_fallback_blk(struct crypto_tfm *tfm, const u8 *key, - unsigned int len) +static int setkey_fallback_skcipher(struct crypto_skcipher *tfm, const u8 *key, + unsigned int len) { - struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); - unsigned int ret; - - crypto_sync_skcipher_clear_flags(sctx->fallback.blk, - CRYPTO_TFM_REQ_MASK); - crypto_sync_skcipher_set_flags(sctx->fallback.blk, tfm->crt_flags & - CRYPTO_TFM_REQ_MASK); - - ret = crypto_sync_skcipher_setkey(sctx->fallback.blk, key, len); - - tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; - tfm->crt_flags |= crypto_sync_skcipher_get_flags(sctx->fallback.blk) & - CRYPTO_TFM_RES_MASK; - - return ret; -} - -static int fallback_blk_dec(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) -{ - unsigned int ret; - struct crypto_blkcipher *tfm = desc->tfm; - struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(tfm); - SYNC_SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk); - - skcipher_request_set_sync_tfm(req, sctx->fallback.blk); - skcipher_request_set_callback(req, desc->flags, NULL, NULL); - skcipher_request_set_crypt(req, src, dst, nbytes, desc->info); - - ret = crypto_skcipher_decrypt(req); + struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm); + int ret; - skcipher_request_zero(req); + crypto_skcipher_clear_flags(sctx->fallback.skcipher, + CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(sctx->fallback.skcipher, + crypto_skcipher_get_flags(tfm) & + CRYPTO_TFM_REQ_MASK); + ret = crypto_skcipher_setkey(sctx->fallback.skcipher, key, len); + crypto_skcipher_set_flags(tfm, + crypto_skcipher_get_flags(sctx->fallback.skcipher) & + CRYPTO_TFM_RES_MASK); return ret; } -static int fallback_blk_enc(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int fallback_skcipher_crypt(struct s390_aes_ctx *sctx, + struct skcipher_request *req, + unsigned long modifier) { - unsigned int ret; - struct crypto_blkcipher *tfm = desc->tfm; - struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(tfm); - SYNC_SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk); - - skcipher_request_set_sync_tfm(req, sctx->fallback.blk); - skcipher_request_set_callback(req, desc->flags, NULL, NULL); - skcipher_request_set_crypt(req, src, dst, nbytes, desc->info); + struct skcipher_request *subreq = skcipher_request_ctx(req); - ret = crypto_skcipher_encrypt(req); - return ret; + *subreq = *req; + skcipher_request_set_tfm(subreq, sctx->fallback.skcipher); + return (modifier & CPACF_DECRYPT) ? + crypto_skcipher_decrypt(subreq) : + crypto_skcipher_encrypt(subreq); } -static int ecb_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, +static int ecb_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { - struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm); unsigned long fc; /* Pick the correct function code based on the key length */ @@ -248,111 +223,92 @@ static int ecb_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, /* Check if the function code is available */ sctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0; if (!sctx->fc) - return setkey_fallback_blk(tfm, in_key, key_len); + return setkey_fallback_skcipher(tfm, in_key, key_len); sctx->key_len = key_len; memcpy(sctx->key, in_key, key_len); return 0; } -static int ecb_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier, - struct blkcipher_walk *walk) +static int ecb_aes_crypt(struct skcipher_request *req, unsigned long modifier) { - struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; unsigned int nbytes, n; int ret; - ret = blkcipher_walk_virt(desc, walk); - while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) { + if (unlikely(!sctx->fc)) + return fallback_skcipher_crypt(sctx, req, modifier); + + ret = skcipher_walk_virt(&walk, req, false); + while ((nbytes = walk.nbytes) != 0) { /* only use complete blocks */ n = nbytes & ~(AES_BLOCK_SIZE - 1); cpacf_km(sctx->fc | modifier, sctx->key, - walk->dst.virt.addr, walk->src.virt.addr, n); - ret = blkcipher_walk_done(desc, walk, nbytes - n); + walk.dst.virt.addr, walk.src.virt.addr, n); + ret = skcipher_walk_done(&walk, nbytes - n); } - return ret; } -static int ecb_aes_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int ecb_aes_encrypt(struct skcipher_request *req) { - struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; - - if (unlikely(!sctx->fc)) - return fallback_blk_enc(desc, dst, src, nbytes); - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_aes_crypt(desc, 0, &walk); + return ecb_aes_crypt(req, 0); } -static int ecb_aes_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int ecb_aes_decrypt(struct skcipher_request *req) { - struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; - - if (unlikely(!sctx->fc)) - return fallback_blk_dec(desc, dst, src, nbytes); - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_aes_crypt(desc, CPACF_DECRYPT, &walk); + return ecb_aes_crypt(req, CPACF_DECRYPT); } -static int fallback_init_blk(struct crypto_tfm *tfm) +static int fallback_init_skcipher(struct crypto_skcipher *tfm) { - const char *name = tfm->__crt_alg->cra_name; - struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + const char *name = crypto_tfm_alg_name(&tfm->base); + struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm); - sctx->fallback.blk = crypto_alloc_sync_skcipher(name, 0, - CRYPTO_ALG_NEED_FALLBACK); + sctx->fallback.skcipher = crypto_alloc_skcipher(name, 0, + CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC); - if (IS_ERR(sctx->fallback.blk)) { + if (IS_ERR(sctx->fallback.skcipher)) { pr_err("Allocating AES fallback algorithm %s failed\n", name); - return PTR_ERR(sctx->fallback.blk); + return PTR_ERR(sctx->fallback.skcipher); } + crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) + + crypto_skcipher_reqsize(sctx->fallback.skcipher)); return 0; } -static void fallback_exit_blk(struct crypto_tfm *tfm) +static void fallback_exit_skcipher(struct crypto_skcipher *tfm) { - struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm); - crypto_free_sync_skcipher(sctx->fallback.blk); + crypto_free_skcipher(sctx->fallback.skcipher); } -static struct crypto_alg ecb_aes_alg = { - .cra_name = "ecb(aes)", - .cra_driver_name = "ecb-aes-s390", - .cra_priority = 401, /* combo: aes + ecb + 1 */ - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_NEED_FALLBACK, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct s390_aes_ctx), - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = fallback_init_blk, - .cra_exit = fallback_exit_blk, - .cra_u = { - .blkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .setkey = ecb_aes_set_key, - .encrypt = ecb_aes_encrypt, - .decrypt = ecb_aes_decrypt, - } - } +static struct skcipher_alg ecb_aes_alg = { + .base.cra_name = "ecb(aes)", + .base.cra_driver_name = "ecb-aes-s390", + .base.cra_priority = 401, /* combo: aes + ecb + 1 */ + .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct s390_aes_ctx), + .base.cra_module = THIS_MODULE, + .init = fallback_init_skcipher, + .exit = fallback_exit_skcipher, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = ecb_aes_set_key, + .encrypt = ecb_aes_encrypt, + .decrypt = ecb_aes_decrypt, }; -static int cbc_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, +static int cbc_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { - struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm); unsigned long fc; /* Pick the correct function code based on the key length */ @@ -363,17 +319,18 @@ static int cbc_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, /* Check if the function code is available */ sctx->fc = (fc && cpacf_test_func(&kmc_functions, fc)) ? fc : 0; if (!sctx->fc) - return setkey_fallback_blk(tfm, in_key, key_len); + return setkey_fallback_skcipher(tfm, in_key, key_len); sctx->key_len = key_len; memcpy(sctx->key, in_key, key_len); return 0; } -static int cbc_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier, - struct blkcipher_walk *walk) +static int cbc_aes_crypt(struct skcipher_request *req, unsigned long modifier) { - struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; unsigned int nbytes, n; int ret; struct { @@ -381,134 +338,74 @@ static int cbc_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier, u8 key[AES_MAX_KEY_SIZE]; } param; - ret = blkcipher_walk_virt(desc, walk); - memcpy(param.iv, walk->iv, AES_BLOCK_SIZE); + if (unlikely(!sctx->fc)) + return fallback_skcipher_crypt(sctx, req, modifier); + + ret = skcipher_walk_virt(&walk, req, false); + if (ret) + return ret; + memcpy(param.iv, walk.iv, AES_BLOCK_SIZE); memcpy(param.key, sctx->key, sctx->key_len); - while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) { + while ((nbytes = walk.nbytes) != 0) { /* only use complete blocks */ n = nbytes & ~(AES_BLOCK_SIZE - 1); cpacf_kmc(sctx->fc | modifier, ¶m, - walk->dst.virt.addr, walk->src.virt.addr, n); - ret = blkcipher_walk_done(desc, walk, nbytes - n); + walk.dst.virt.addr, walk.src.virt.addr, n); + memcpy(walk.iv, param.iv, AES_BLOCK_SIZE); + ret = skcipher_walk_done(&walk, nbytes - n); } - memcpy(walk->iv, param.iv, AES_BLOCK_SIZE); return ret; } -static int cbc_aes_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int cbc_aes_encrypt(struct skcipher_request *req) { - struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; - - if (unlikely(!sctx->fc)) - return fallback_blk_enc(desc, dst, src, nbytes); - - blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_aes_crypt(desc, 0, &walk); + return cbc_aes_crypt(req, 0); } -static int cbc_aes_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int cbc_aes_decrypt(struct skcipher_request *req) { - struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; - - if (unlikely(!sctx->fc)) - return fallback_blk_dec(desc, dst, src, nbytes); - - blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_aes_crypt(desc, CPACF_DECRYPT, &walk); + return cbc_aes_crypt(req, CPACF_DECRYPT); } -static struct crypto_alg cbc_aes_alg = { - .cra_name = "cbc(aes)", - .cra_driver_name = "cbc-aes-s390", - .cra_priority = 402, /* ecb-aes-s390 + 1 */ - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_NEED_FALLBACK, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct s390_aes_ctx), - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = fallback_init_blk, - .cra_exit = fallback_exit_blk, - .cra_u = { - .blkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = cbc_aes_set_key, - .encrypt = cbc_aes_encrypt, - .decrypt = cbc_aes_decrypt, - } - } +static struct skcipher_alg cbc_aes_alg = { + .base.cra_name = "cbc(aes)", + .base.cra_driver_name = "cbc-aes-s390", + .base.cra_priority = 402, /* ecb-aes-s390 + 1 */ + .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct s390_aes_ctx), + .base.cra_module = THIS_MODULE, + .init = fallback_init_skcipher, + .exit = fallback_exit_skcipher, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = cbc_aes_set_key, + .encrypt = cbc_aes_encrypt, + .decrypt = cbc_aes_decrypt, }; -static int xts_fallback_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int len) -{ - struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm); - unsigned int ret; - - crypto_sync_skcipher_clear_flags(xts_ctx->fallback, - CRYPTO_TFM_REQ_MASK); - crypto_sync_skcipher_set_flags(xts_ctx->fallback, tfm->crt_flags & - CRYPTO_TFM_REQ_MASK); - - ret = crypto_sync_skcipher_setkey(xts_ctx->fallback, key, len); - - tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; - tfm->crt_flags |= crypto_sync_skcipher_get_flags(xts_ctx->fallback) & - CRYPTO_TFM_RES_MASK; - - return ret; -} - -static int xts_fallback_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) -{ - struct crypto_blkcipher *tfm = desc->tfm; - struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(tfm); - SYNC_SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback); - unsigned int ret; - - skcipher_request_set_sync_tfm(req, xts_ctx->fallback); - skcipher_request_set_callback(req, desc->flags, NULL, NULL); - skcipher_request_set_crypt(req, src, dst, nbytes, desc->info); - - ret = crypto_skcipher_decrypt(req); - - skcipher_request_zero(req); - return ret; -} - -static int xts_fallback_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int xts_fallback_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int len) { - struct crypto_blkcipher *tfm = desc->tfm; - struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(tfm); - SYNC_SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback); - unsigned int ret; - - skcipher_request_set_sync_tfm(req, xts_ctx->fallback); - skcipher_request_set_callback(req, desc->flags, NULL, NULL); - skcipher_request_set_crypt(req, src, dst, nbytes, desc->info); - - ret = crypto_skcipher_encrypt(req); + struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm); + int ret; - skcipher_request_zero(req); + crypto_skcipher_clear_flags(xts_ctx->fallback, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(xts_ctx->fallback, + crypto_skcipher_get_flags(tfm) & + CRYPTO_TFM_REQ_MASK); + ret = crypto_skcipher_setkey(xts_ctx->fallback, key, len); + crypto_skcipher_set_flags(tfm, + crypto_skcipher_get_flags(xts_ctx->fallback) & + CRYPTO_TFM_RES_MASK); return ret; } -static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, +static int xts_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { - struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm); + struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm); unsigned long fc; int err; @@ -518,7 +415,7 @@ static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, /* In fips mode only 128 bit or 256 bit keys are valid */ if (fips_enabled && key_len != 32 && key_len != 64) { - tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; } @@ -539,10 +436,11 @@ static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, return 0; } -static int xts_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier, - struct blkcipher_walk *walk) +static int xts_aes_crypt(struct skcipher_request *req, unsigned long modifier) { - struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; unsigned int offset, nbytes, n; int ret; struct { @@ -557,113 +455,100 @@ static int xts_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier, u8 init[16]; } xts_param; - ret = blkcipher_walk_virt(desc, walk); + if (req->cryptlen < AES_BLOCK_SIZE) + return -EINVAL; + + if (unlikely(!xts_ctx->fc || (req->cryptlen % AES_BLOCK_SIZE) != 0)) { + struct skcipher_request *subreq = skcipher_request_ctx(req); + + *subreq = *req; + skcipher_request_set_tfm(subreq, xts_ctx->fallback); + return (modifier & CPACF_DECRYPT) ? + crypto_skcipher_decrypt(subreq) : + crypto_skcipher_encrypt(subreq); + } + + ret = skcipher_walk_virt(&walk, req, false); + if (ret) + return ret; offset = xts_ctx->key_len & 0x10; memset(pcc_param.block, 0, sizeof(pcc_param.block)); memset(pcc_param.bit, 0, sizeof(pcc_param.bit)); memset(pcc_param.xts, 0, sizeof(pcc_param.xts)); - memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak)); + memcpy(pcc_param.tweak, walk.iv, sizeof(pcc_param.tweak)); memcpy(pcc_param.key + offset, xts_ctx->pcc_key, xts_ctx->key_len); cpacf_pcc(xts_ctx->fc, pcc_param.key + offset); memcpy(xts_param.key + offset, xts_ctx->key, xts_ctx->key_len); memcpy(xts_param.init, pcc_param.xts, 16); - while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) { + while ((nbytes = walk.nbytes) != 0) { /* only use complete blocks */ n = nbytes & ~(AES_BLOCK_SIZE - 1); cpacf_km(xts_ctx->fc | modifier, xts_param.key + offset, - walk->dst.virt.addr, walk->src.virt.addr, n); - ret = blkcipher_walk_done(desc, walk, nbytes - n); + walk.dst.virt.addr, walk.src.virt.addr, n); + ret = skcipher_walk_done(&walk, nbytes - n); } return ret; } -static int xts_aes_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int xts_aes_encrypt(struct skcipher_request *req) { - struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; - - if (!nbytes) - return -EINVAL; - - if (unlikely(!xts_ctx->fc || (nbytes % XTS_BLOCK_SIZE) != 0)) - return xts_fallback_encrypt(desc, dst, src, nbytes); - - blkcipher_walk_init(&walk, dst, src, nbytes); - return xts_aes_crypt(desc, 0, &walk); + return xts_aes_crypt(req, 0); } -static int xts_aes_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int xts_aes_decrypt(struct skcipher_request *req) { - struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; - - if (!nbytes) - return -EINVAL; - - if (unlikely(!xts_ctx->fc || (nbytes % XTS_BLOCK_SIZE) != 0)) - return xts_fallback_decrypt(desc, dst, src, nbytes); - - blkcipher_walk_init(&walk, dst, src, nbytes); - return xts_aes_crypt(desc, CPACF_DECRYPT, &walk); + return xts_aes_crypt(req, CPACF_DECRYPT); } -static int xts_fallback_init(struct crypto_tfm *tfm) +static int xts_fallback_init(struct crypto_skcipher *tfm) { - const char *name = tfm->__crt_alg->cra_name; - struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm); + const char *name = crypto_tfm_alg_name(&tfm->base); + struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm); - xts_ctx->fallback = crypto_alloc_sync_skcipher(name, 0, - CRYPTO_ALG_NEED_FALLBACK); + xts_ctx->fallback = crypto_alloc_skcipher(name, 0, + CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC); if (IS_ERR(xts_ctx->fallback)) { pr_err("Allocating XTS fallback algorithm %s failed\n", name); return PTR_ERR(xts_ctx->fallback); } + crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) + + crypto_skcipher_reqsize(xts_ctx->fallback)); return 0; } -static void xts_fallback_exit(struct crypto_tfm *tfm) +static void xts_fallback_exit(struct crypto_skcipher *tfm) { - struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm); + struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm); - crypto_free_sync_skcipher(xts_ctx->fallback); + crypto_free_skcipher(xts_ctx->fallback); } -static struct crypto_alg xts_aes_alg = { - .cra_name = "xts(aes)", - .cra_driver_name = "xts-aes-s390", - .cra_priority = 402, /* ecb-aes-s390 + 1 */ - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_NEED_FALLBACK, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct s390_xts_ctx), - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = xts_fallback_init, - .cra_exit = xts_fallback_exit, - .cra_u = { - .blkcipher = { - .min_keysize = 2 * AES_MIN_KEY_SIZE, - .max_keysize = 2 * AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = xts_aes_set_key, - .encrypt = xts_aes_encrypt, - .decrypt = xts_aes_decrypt, - } - } +static struct skcipher_alg xts_aes_alg = { + .base.cra_name = "xts(aes)", + .base.cra_driver_name = "xts-aes-s390", + .base.cra_priority = 402, /* ecb-aes-s390 + 1 */ + .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct s390_xts_ctx), + .base.cra_module = THIS_MODULE, + .init = xts_fallback_init, + .exit = xts_fallback_exit, + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = xts_aes_set_key, + .encrypt = xts_aes_encrypt, + .decrypt = xts_aes_decrypt, }; -static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, +static int ctr_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { - struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm); unsigned long fc; /* Pick the correct function code based on the key length */ @@ -674,7 +559,7 @@ static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, /* Check if the function code is available */ sctx->fc = (fc && cpacf_test_func(&kmctr_functions, fc)) ? fc : 0; if (!sctx->fc) - return setkey_fallback_blk(tfm, in_key, key_len); + return setkey_fallback_skcipher(tfm, in_key, key_len); sctx->key_len = key_len; memcpy(sctx->key, in_key, key_len); @@ -696,30 +581,34 @@ static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes) return n; } -static int ctr_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier, - struct blkcipher_walk *walk) +static int ctr_aes_crypt(struct skcipher_request *req) { - struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm); u8 buf[AES_BLOCK_SIZE], *ctrptr; + struct skcipher_walk walk; unsigned int n, nbytes; int ret, locked; + if (unlikely(!sctx->fc)) + return fallback_skcipher_crypt(sctx, req, 0); + locked = mutex_trylock(&ctrblk_lock); - ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE); - while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) { + ret = skcipher_walk_virt(&walk, req, false); + while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { n = AES_BLOCK_SIZE; + if (nbytes >= 2*AES_BLOCK_SIZE && locked) - n = __ctrblk_init(ctrblk, walk->iv, nbytes); - ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk->iv; - cpacf_kmctr(sctx->fc | modifier, sctx->key, - walk->dst.virt.addr, walk->src.virt.addr, - n, ctrptr); + n = __ctrblk_init(ctrblk, walk.iv, nbytes); + ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk.iv; + cpacf_kmctr(sctx->fc, sctx->key, walk.dst.virt.addr, + walk.src.virt.addr, n, ctrptr); if (ctrptr == ctrblk) - memcpy(walk->iv, ctrptr + n - AES_BLOCK_SIZE, + memcpy(walk.iv, ctrptr + n - AES_BLOCK_SIZE, AES_BLOCK_SIZE); - crypto_inc(walk->iv, AES_BLOCK_SIZE); - ret = blkcipher_walk_done(desc, walk, nbytes - n); + crypto_inc(walk.iv, AES_BLOCK_SIZE); + ret = skcipher_walk_done(&walk, nbytes - n); } if (locked) mutex_unlock(&ctrblk_lock); @@ -727,67 +616,33 @@ static int ctr_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier, * final block may be < AES_BLOCK_SIZE, copy only nbytes */ if (nbytes) { - cpacf_kmctr(sctx->fc | modifier, sctx->key, - buf, walk->src.virt.addr, - AES_BLOCK_SIZE, walk->iv); - memcpy(walk->dst.virt.addr, buf, nbytes); - crypto_inc(walk->iv, AES_BLOCK_SIZE); - ret = blkcipher_walk_done(desc, walk, 0); + cpacf_kmctr(sctx->fc, sctx->key, buf, walk.src.virt.addr, + AES_BLOCK_SIZE, walk.iv); + memcpy(walk.dst.virt.addr, buf, nbytes); + crypto_inc(walk.iv, AES_BLOCK_SIZE); + ret = skcipher_walk_done(&walk, 0); } return ret; } -static int ctr_aes_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) -{ - struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; - - if (unlikely(!sctx->fc)) - return fallback_blk_enc(desc, dst, src, nbytes); - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ctr_aes_crypt(desc, 0, &walk); -} - -static int ctr_aes_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) -{ - struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; - - if (unlikely(!sctx->fc)) - return fallback_blk_dec(desc, dst, src, nbytes); - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ctr_aes_crypt(desc, CPACF_DECRYPT, &walk); -} - -static struct crypto_alg ctr_aes_alg = { - .cra_name = "ctr(aes)", - .cra_driver_name = "ctr-aes-s390", - .cra_priority = 402, /* ecb-aes-s390 + 1 */ - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_NEED_FALLBACK, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct s390_aes_ctx), - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = fallback_init_blk, - .cra_exit = fallback_exit_blk, - .cra_u = { - .blkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ctr_aes_set_key, - .encrypt = ctr_aes_encrypt, - .decrypt = ctr_aes_decrypt, - } - } +static struct skcipher_alg ctr_aes_alg = { + .base.cra_name = "ctr(aes)", + .base.cra_driver_name = "ctr-aes-s390", + .base.cra_priority = 402, /* ecb-aes-s390 + 1 */ + .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct s390_aes_ctx), + .base.cra_module = THIS_MODULE, + .init = fallback_init_skcipher, + .exit = fallback_exit_skcipher, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ctr_aes_set_key, + .encrypt = ctr_aes_crypt, + .decrypt = ctr_aes_crypt, + .chunksize = AES_BLOCK_SIZE, }; static int gcm_aes_setkey(struct crypto_aead *tfm, const u8 *key, @@ -1116,24 +971,27 @@ static struct aead_alg gcm_aes_aead = { }, }; -static struct crypto_alg *aes_s390_algs_ptr[5]; -static int aes_s390_algs_num; +static struct crypto_alg *aes_s390_alg; +static struct skcipher_alg *aes_s390_skcipher_algs[4]; +static int aes_s390_skciphers_num; static struct aead_alg *aes_s390_aead_alg; -static int aes_s390_register_alg(struct crypto_alg *alg) +static int aes_s390_register_skcipher(struct skcipher_alg *alg) { int ret; - ret = crypto_register_alg(alg); + ret = crypto_register_skcipher(alg); if (!ret) - aes_s390_algs_ptr[aes_s390_algs_num++] = alg; + aes_s390_skcipher_algs[aes_s390_skciphers_num++] = alg; return ret; } static void aes_s390_fini(void) { - while (aes_s390_algs_num--) - crypto_unregister_alg(aes_s390_algs_ptr[aes_s390_algs_num]); + if (aes_s390_alg) + crypto_unregister_alg(aes_s390_alg); + while (aes_s390_skciphers_num--) + crypto_unregister_skcipher(aes_s390_skcipher_algs[aes_s390_skciphers_num]); if (ctrblk) free_page((unsigned long) ctrblk); @@ -1154,10 +1012,11 @@ static int __init aes_s390_init(void) if (cpacf_test_func(&km_functions, CPACF_KM_AES_128) || cpacf_test_func(&km_functions, CPACF_KM_AES_192) || cpacf_test_func(&km_functions, CPACF_KM_AES_256)) { - ret = aes_s390_register_alg(&aes_alg); + ret = crypto_register_alg(&aes_alg); if (ret) goto out_err; - ret = aes_s390_register_alg(&ecb_aes_alg); + aes_s390_alg = &aes_alg; + ret = aes_s390_register_skcipher(&ecb_aes_alg); if (ret) goto out_err; } @@ -1165,14 +1024,14 @@ static int __init aes_s390_init(void) if (cpacf_test_func(&kmc_functions, CPACF_KMC_AES_128) || cpacf_test_func(&kmc_functions, CPACF_KMC_AES_192) || cpacf_test_func(&kmc_functions, CPACF_KMC_AES_256)) { - ret = aes_s390_register_alg(&cbc_aes_alg); + ret = aes_s390_register_skcipher(&cbc_aes_alg); if (ret) goto out_err; } if (cpacf_test_func(&km_functions, CPACF_KM_XTS_128) || cpacf_test_func(&km_functions, CPACF_KM_XTS_256)) { - ret = aes_s390_register_alg(&xts_aes_alg); + ret = aes_s390_register_skcipher(&xts_aes_alg); if (ret) goto out_err; } @@ -1185,7 +1044,7 @@ static int __init aes_s390_init(void) ret = -ENOMEM; goto out_err; } - ret = aes_s390_register_alg(&ctr_aes_alg); + ret = aes_s390_register_skcipher(&ctr_aes_alg); if (ret) goto out_err; } diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index 439b100c6f2e..bfbafd35bcbd 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -17,6 +17,7 @@ #include <linux/mutex.h> #include <crypto/algapi.h> #include <crypto/internal/des.h> +#include <crypto/internal/skcipher.h> #include <asm/cpacf.h> #define DES3_KEY_SIZE (3 * DES_KEY_SIZE) @@ -45,6 +46,12 @@ static int des_setkey(struct crypto_tfm *tfm, const u8 *key, return 0; } +static int des_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, + unsigned int key_len) +{ + return des_setkey(crypto_skcipher_tfm(tfm), key, key_len); +} + static void s390_des_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); @@ -79,28 +86,30 @@ static struct crypto_alg des_alg = { } }; -static int ecb_desall_crypt(struct blkcipher_desc *desc, unsigned long fc, - struct blkcipher_walk *walk) +static int ecb_desall_crypt(struct skcipher_request *req, unsigned long fc) { - struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct s390_des_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; unsigned int nbytes, n; int ret; - ret = blkcipher_walk_virt(desc, walk); - while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) { + ret = skcipher_walk_virt(&walk, req, false); + while ((nbytes = walk.nbytes) != 0) { /* only use complete blocks */ n = nbytes & ~(DES_BLOCK_SIZE - 1); - cpacf_km(fc, ctx->key, walk->dst.virt.addr, - walk->src.virt.addr, n); - ret = blkcipher_walk_done(desc, walk, nbytes - n); + cpacf_km(fc, ctx->key, walk.dst.virt.addr, + walk.src.virt.addr, n); + ret = skcipher_walk_done(&walk, nbytes - n); } return ret; } -static int cbc_desall_crypt(struct blkcipher_desc *desc, unsigned long fc, - struct blkcipher_walk *walk) +static int cbc_desall_crypt(struct skcipher_request *req, unsigned long fc) { - struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct s390_des_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; unsigned int nbytes, n; int ret; struct { @@ -108,99 +117,69 @@ static int cbc_desall_crypt(struct blkcipher_desc *desc, unsigned long fc, u8 key[DES3_KEY_SIZE]; } param; - ret = blkcipher_walk_virt(desc, walk); - memcpy(param.iv, walk->iv, DES_BLOCK_SIZE); + ret = skcipher_walk_virt(&walk, req, false); + if (ret) + return ret; + memcpy(param.iv, walk.iv, DES_BLOCK_SIZE); memcpy(param.key, ctx->key, DES3_KEY_SIZE); - while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) { + while ((nbytes = walk.nbytes) != 0) { /* only use complete blocks */ n = nbytes & ~(DES_BLOCK_SIZE - 1); - cpacf_kmc(fc, ¶m, walk->dst.virt.addr, - walk->src.virt.addr, n); - ret = blkcipher_walk_done(desc, walk, nbytes - n); + cpacf_kmc(fc, ¶m, walk.dst.virt.addr, + walk.src.virt.addr, n); + memcpy(walk.iv, param.iv, DES_BLOCK_SIZE); + ret = skcipher_walk_done(&walk, nbytes - n); } - memcpy(walk->iv, param.iv, DES_BLOCK_SIZE); return ret; } -static int ecb_des_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int ecb_des_encrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_desall_crypt(desc, CPACF_KM_DEA, &walk); + return ecb_desall_crypt(req, CPACF_KM_DEA); } -static int ecb_des_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int ecb_des_decrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_desall_crypt(desc, CPACF_KM_DEA | CPACF_DECRYPT, &walk); + return ecb_desall_crypt(req, CPACF_KM_DEA | CPACF_DECRYPT); } -static struct crypto_alg ecb_des_alg = { - .cra_name = "ecb(des)", - .cra_driver_name = "ecb-des-s390", - .cra_priority = 400, /* combo: des + ecb */ - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = DES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct s390_des_ctx), - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = DES_KEY_SIZE, - .max_keysize = DES_KEY_SIZE, - .setkey = des_setkey, - .encrypt = ecb_des_encrypt, - .decrypt = ecb_des_decrypt, - } - } +static struct skcipher_alg ecb_des_alg = { + .base.cra_name = "ecb(des)", + .base.cra_driver_name = "ecb-des-s390", + .base.cra_priority = 400, /* combo: des + ecb */ + .base.cra_blocksize = DES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct s390_des_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .setkey = des_setkey_skcipher, + .encrypt = ecb_des_encrypt, + .decrypt = ecb_des_decrypt, }; -static int cbc_des_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int cbc_des_encrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_desall_crypt(desc, CPACF_KMC_DEA, &walk); + return cbc_desall_crypt(req, CPACF_KMC_DEA); } -static int cbc_des_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int cbc_des_decrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_desall_crypt(desc, CPACF_KMC_DEA | CPACF_DECRYPT, &walk); + return cbc_desall_crypt(req, CPACF_KMC_DEA | CPACF_DECRYPT); } -static struct crypto_alg cbc_des_alg = { - .cra_name = "cbc(des)", - .cra_driver_name = "cbc-des-s390", - .cra_priority = 400, /* combo: des + cbc */ - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = DES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct s390_des_ctx), - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = DES_KEY_SIZE, - .max_keysize = DES_KEY_SIZE, - .ivsize = DES_BLOCK_SIZE, - .setkey = des_setkey, - .encrypt = cbc_des_encrypt, - .decrypt = cbc_des_decrypt, - } - } +static struct skcipher_alg cbc_des_alg = { + .base.cra_name = "cbc(des)", + .base.cra_driver_name = "cbc-des-s390", + .base.cra_priority = 400, /* combo: des + cbc */ + .base.cra_blocksize = DES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct s390_des_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = des_setkey_skcipher, + .encrypt = cbc_des_encrypt, + .decrypt = cbc_des_decrypt, }; /* @@ -232,6 +211,12 @@ static int des3_setkey(struct crypto_tfm *tfm, const u8 *key, return 0; } +static int des3_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, + unsigned int key_len) +{ + return des3_setkey(crypto_skcipher_tfm(tfm), key, key_len); +} + static void des3_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); @@ -266,87 +251,53 @@ static struct crypto_alg des3_alg = { } }; -static int ecb_des3_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int ecb_des3_encrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_desall_crypt(desc, CPACF_KM_TDEA_192, &walk); + return ecb_desall_crypt(req, CPACF_KM_TDEA_192); } -static int ecb_des3_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int ecb_des3_decrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_desall_crypt(desc, CPACF_KM_TDEA_192 | CPACF_DECRYPT, - &walk); + return ecb_desall_crypt(req, CPACF_KM_TDEA_192 | CPACF_DECRYPT); } -static struct crypto_alg ecb_des3_alg = { - .cra_name = "ecb(des3_ede)", - .cra_driver_name = "ecb-des3_ede-s390", - .cra_priority = 400, /* combo: des3 + ecb */ - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = DES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct s390_des_ctx), - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = DES3_KEY_SIZE, - .max_keysize = DES3_KEY_SIZE, - .setkey = des3_setkey, - .encrypt = ecb_des3_encrypt, - .decrypt = ecb_des3_decrypt, - } - } +static struct skcipher_alg ecb_des3_alg = { + .base.cra_name = "ecb(des3_ede)", + .base.cra_driver_name = "ecb-des3_ede-s390", + .base.cra_priority = 400, /* combo: des3 + ecb */ + .base.cra_blocksize = DES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct s390_des_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = DES3_KEY_SIZE, + .max_keysize = DES3_KEY_SIZE, + .setkey = des3_setkey_skcipher, + .encrypt = ecb_des3_encrypt, + .decrypt = ecb_des3_decrypt, }; -static int cbc_des3_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int cbc_des3_encrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192, &walk); + return cbc_desall_crypt(req, CPACF_KMC_TDEA_192); } -static int cbc_des3_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int cbc_des3_decrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192 | CPACF_DECRYPT, - &walk); + return cbc_desall_crypt(req, CPACF_KMC_TDEA_192 | CPACF_DECRYPT); } -static struct crypto_alg cbc_des3_alg = { - .cra_name = "cbc(des3_ede)", - .cra_driver_name = "cbc-des3_ede-s390", - .cra_priority = 400, /* combo: des3 + cbc */ - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = DES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct s390_des_ctx), - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = DES3_KEY_SIZE, - .max_keysize = DES3_KEY_SIZE, - .ivsize = DES_BLOCK_SIZE, - .setkey = des3_setkey, - .encrypt = cbc_des3_encrypt, - .decrypt = cbc_des3_decrypt, - } - } +static struct skcipher_alg cbc_des3_alg = { + .base.cra_name = "cbc(des3_ede)", + .base.cra_driver_name = "cbc-des3_ede-s390", + .base.cra_priority = 400, /* combo: des3 + cbc */ + .base.cra_blocksize = DES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct s390_des_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = DES3_KEY_SIZE, + .max_keysize = DES3_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = des3_setkey_skcipher, + .encrypt = cbc_des3_encrypt, + .decrypt = cbc_des3_decrypt, }; static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes) @@ -364,128 +315,90 @@ static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes) return n; } -static int ctr_desall_crypt(struct blkcipher_desc *desc, unsigned long fc, - struct blkcipher_walk *walk) +static int ctr_desall_crypt(struct skcipher_request *req, unsigned long fc) { - struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct s390_des_ctx *ctx = crypto_skcipher_ctx(tfm); u8 buf[DES_BLOCK_SIZE], *ctrptr; + struct skcipher_walk walk; unsigned int n, nbytes; int ret, locked; locked = mutex_trylock(&ctrblk_lock); - ret = blkcipher_walk_virt_block(desc, walk, DES_BLOCK_SIZE); - while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) { + ret = skcipher_walk_virt(&walk, req, false); + while ((nbytes = walk.nbytes) >= DES_BLOCK_SIZE) { n = DES_BLOCK_SIZE; if (nbytes >= 2*DES_BLOCK_SIZE && locked) - n = __ctrblk_init(ctrblk, walk->iv, nbytes); - ctrptr = (n > DES_BLOCK_SIZE) ? ctrblk : walk->iv; - cpacf_kmctr(fc, ctx->key, walk->dst.virt.addr, - walk->src.virt.addr, n, ctrptr); + n = __ctrblk_init(ctrblk, walk.iv, nbytes); + ctrptr = (n > DES_BLOCK_SIZE) ? ctrblk : walk.iv; + cpacf_kmctr(fc, ctx->key, walk.dst.virt.addr, + walk.src.virt.addr, n, ctrptr); if (ctrptr == ctrblk) - memcpy(walk->iv, ctrptr + n - DES_BLOCK_SIZE, + memcpy(walk.iv, ctrptr + n - DES_BLOCK_SIZE, DES_BLOCK_SIZE); - crypto_inc(walk->iv, DES_BLOCK_SIZE); - ret = blkcipher_walk_done(desc, walk, nbytes - n); + crypto_inc(walk.iv, DES_BLOCK_SIZE); + ret = skcipher_walk_done(&walk, nbytes - n); } if (locked) mutex_unlock(&ctrblk_lock); /* final block may be < DES_BLOCK_SIZE, copy only nbytes */ if (nbytes) { - cpacf_kmctr(fc, ctx->key, buf, walk->src.virt.addr, - DES_BLOCK_SIZE, walk->iv); - memcpy(walk->dst.virt.addr, buf, nbytes); - crypto_inc(walk->iv, DES_BLOCK_SIZE); - ret = blkcipher_walk_done(desc, walk, 0); + cpacf_kmctr(fc, ctx->key, buf, walk.src.virt.addr, + DES_BLOCK_SIZE, walk.iv); + memcpy(walk.dst.virt.addr, buf, nbytes); + crypto_inc(walk.iv, DES_BLOCK_SIZE); + ret = skcipher_walk_done(&walk, 0); } return ret; } -static int ctr_des_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) -{ - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ctr_desall_crypt(desc, CPACF_KMCTR_DEA, &walk); -} - -static int ctr_des_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int ctr_des_crypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ctr_desall_crypt(desc, CPACF_KMCTR_DEA | CPACF_DECRYPT, &walk); + return ctr_desall_crypt(req, CPACF_KMCTR_DEA); } -static struct crypto_alg ctr_des_alg = { - .cra_name = "ctr(des)", - .cra_driver_name = "ctr-des-s390", - .cra_priority = 400, /* combo: des + ctr */ - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct s390_des_ctx), - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = DES_KEY_SIZE, - .max_keysize = DES_KEY_SIZE, - .ivsize = DES_BLOCK_SIZE, - .setkey = des_setkey, - .encrypt = ctr_des_encrypt, - .decrypt = ctr_des_decrypt, - } - } +static struct skcipher_alg ctr_des_alg = { + .base.cra_name = "ctr(des)", + .base.cra_driver_name = "ctr-des-s390", + .base.cra_priority = 400, /* combo: des + ctr */ + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct s390_des_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = des_setkey_skcipher, + .encrypt = ctr_des_crypt, + .decrypt = ctr_des_crypt, + .chunksize = DES_BLOCK_SIZE, }; -static int ctr_des3_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) -{ - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192, &walk); -} - -static int ctr_des3_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int ctr_des3_crypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192 | CPACF_DECRYPT, - &walk); + return ctr_desall_crypt(req, CPACF_KMCTR_TDEA_192); } -static struct crypto_alg ctr_des3_alg = { - .cra_name = "ctr(des3_ede)", - .cra_driver_name = "ctr-des3_ede-s390", - .cra_priority = 400, /* combo: des3 + ede */ - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct s390_des_ctx), - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = DES3_KEY_SIZE, - .max_keysize = DES3_KEY_SIZE, - .ivsize = DES_BLOCK_SIZE, - .setkey = des3_setkey, - .encrypt = ctr_des3_encrypt, - .decrypt = ctr_des3_decrypt, - } - } +static struct skcipher_alg ctr_des3_alg = { + .base.cra_name = "ctr(des3_ede)", + .base.cra_driver_name = "ctr-des3_ede-s390", + .base.cra_priority = 400, /* combo: des3 + ede */ + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct s390_des_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = DES3_KEY_SIZE, + .max_keysize = DES3_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = des3_setkey_skcipher, + .encrypt = ctr_des3_crypt, + .decrypt = ctr_des3_crypt, + .chunksize = DES_BLOCK_SIZE, }; -static struct crypto_alg *des_s390_algs_ptr[8]; +static struct crypto_alg *des_s390_algs_ptr[2]; static int des_s390_algs_num; +static struct skcipher_alg *des_s390_skciphers_ptr[6]; +static int des_s390_skciphers_num; static int des_s390_register_alg(struct crypto_alg *alg) { @@ -497,10 +410,22 @@ static int des_s390_register_alg(struct crypto_alg *alg) return ret; } +static int des_s390_register_skcipher(struct skcipher_alg *alg) +{ + int ret; + + ret = crypto_register_skcipher(alg); + if (!ret) + des_s390_skciphers_ptr[des_s390_skciphers_num++] = alg; + return ret; +} + static void des_s390_exit(void) { while (des_s390_algs_num--) crypto_unregister_alg(des_s390_algs_ptr[des_s390_algs_num]); + while (des_s390_skciphers_num--) + crypto_unregister_skcipher(des_s390_skciphers_ptr[des_s390_skciphers_num]); if (ctrblk) free_page((unsigned long) ctrblk); } @@ -518,12 +443,12 @@ static int __init des_s390_init(void) ret = des_s390_register_alg(&des_alg); if (ret) goto out_err; - ret = des_s390_register_alg(&ecb_des_alg); + ret = des_s390_register_skcipher(&ecb_des_alg); if (ret) goto out_err; } if (cpacf_test_func(&kmc_functions, CPACF_KMC_DEA)) { - ret = des_s390_register_alg(&cbc_des_alg); + ret = des_s390_register_skcipher(&cbc_des_alg); if (ret) goto out_err; } @@ -531,12 +456,12 @@ static int __init des_s390_init(void) ret = des_s390_register_alg(&des3_alg); if (ret) goto out_err; - ret = des_s390_register_alg(&ecb_des3_alg); + ret = des_s390_register_skcipher(&ecb_des3_alg); if (ret) goto out_err; } if (cpacf_test_func(&kmc_functions, CPACF_KMC_TDEA_192)) { - ret = des_s390_register_alg(&cbc_des3_alg); + ret = des_s390_register_skcipher(&cbc_des3_alg); if (ret) goto out_err; } @@ -551,12 +476,12 @@ static int __init des_s390_init(void) } if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_DEA)) { - ret = des_s390_register_alg(&ctr_des_alg); + ret = des_s390_register_skcipher(&ctr_des_alg); if (ret) goto out_err; } if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_TDEA_192)) { - ret = des_s390_register_alg(&ctr_des3_alg); + ret = des_s390_register_skcipher(&ctr_des3_alg); if (ret) goto out_err; } diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index 6184dceed340..c7119c617b6e 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -21,6 +21,7 @@ #include <linux/cpufeature.h> #include <linux/init.h> #include <linux/spinlock.h> +#include <crypto/internal/skcipher.h> #include <crypto/xts.h> #include <asm/cpacf.h> #include <asm/pkey.h> @@ -123,27 +124,27 @@ static int __paes_set_key(struct s390_paes_ctx *ctx) return ctx->fc ? 0 : -EINVAL; } -static int ecb_paes_init(struct crypto_tfm *tfm) +static int ecb_paes_init(struct crypto_skcipher *tfm) { - struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm); + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); ctx->kb.key = NULL; return 0; } -static void ecb_paes_exit(struct crypto_tfm *tfm) +static void ecb_paes_exit(struct crypto_skcipher *tfm) { - struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm); + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); _free_kb_keybuf(&ctx->kb); } -static int ecb_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key, +static int ecb_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { int rc; - struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm); + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); _free_kb_keybuf(&ctx->kb); rc = _copy_key_to_kb(&ctx->kb, in_key, key_len); @@ -151,91 +152,75 @@ static int ecb_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key, return rc; if (__paes_set_key(ctx)) { - tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; } return 0; } -static int ecb_paes_crypt(struct blkcipher_desc *desc, - unsigned long modifier, - struct blkcipher_walk *walk) +static int ecb_paes_crypt(struct skcipher_request *req, unsigned long modifier) { - struct s390_paes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; unsigned int nbytes, n, k; int ret; - ret = blkcipher_walk_virt(desc, walk); - while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) { + ret = skcipher_walk_virt(&walk, req, false); + while ((nbytes = walk.nbytes) != 0) { /* only use complete blocks */ n = nbytes & ~(AES_BLOCK_SIZE - 1); k = cpacf_km(ctx->fc | modifier, ctx->pk.protkey, - walk->dst.virt.addr, walk->src.virt.addr, n); + walk.dst.virt.addr, walk.src.virt.addr, n); if (k) - ret = blkcipher_walk_done(desc, walk, nbytes - k); + ret = skcipher_walk_done(&walk, nbytes - k); if (k < n) { if (__paes_set_key(ctx) != 0) - return blkcipher_walk_done(desc, walk, -EIO); + return skcipher_walk_done(&walk, -EIO); } } return ret; } -static int ecb_paes_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int ecb_paes_encrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_paes_crypt(desc, CPACF_ENCRYPT, &walk); + return ecb_paes_crypt(req, 0); } -static int ecb_paes_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int ecb_paes_decrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_paes_crypt(desc, CPACF_DECRYPT, &walk); + return ecb_paes_crypt(req, CPACF_DECRYPT); } -static struct crypto_alg ecb_paes_alg = { - .cra_name = "ecb(paes)", - .cra_driver_name = "ecb-paes-s390", - .cra_priority = 401, /* combo: aes + ecb + 1 */ - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct s390_paes_ctx), - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(ecb_paes_alg.cra_list), - .cra_init = ecb_paes_init, - .cra_exit = ecb_paes_exit, - .cra_u = { - .blkcipher = { - .min_keysize = PAES_MIN_KEYSIZE, - .max_keysize = PAES_MAX_KEYSIZE, - .setkey = ecb_paes_set_key, - .encrypt = ecb_paes_encrypt, - .decrypt = ecb_paes_decrypt, - } - } +static struct skcipher_alg ecb_paes_alg = { + .base.cra_name = "ecb(paes)", + .base.cra_driver_name = "ecb-paes-s390", + .base.cra_priority = 401, /* combo: aes + ecb + 1 */ + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct s390_paes_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_list = LIST_HEAD_INIT(ecb_paes_alg.base.cra_list), + .init = ecb_paes_init, + .exit = ecb_paes_exit, + .min_keysize = PAES_MIN_KEYSIZE, + .max_keysize = PAES_MAX_KEYSIZE, + .setkey = ecb_paes_set_key, + .encrypt = ecb_paes_encrypt, + .decrypt = ecb_paes_decrypt, }; -static int cbc_paes_init(struct crypto_tfm *tfm) +static int cbc_paes_init(struct crypto_skcipher *tfm) { - struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm); + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); ctx->kb.key = NULL; return 0; } -static void cbc_paes_exit(struct crypto_tfm *tfm) +static void cbc_paes_exit(struct crypto_skcipher *tfm) { - struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm); + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); _free_kb_keybuf(&ctx->kb); } @@ -258,11 +243,11 @@ static int __cbc_paes_set_key(struct s390_paes_ctx *ctx) return ctx->fc ? 0 : -EINVAL; } -static int cbc_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key, +static int cbc_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { int rc; - struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm); + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); _free_kb_keybuf(&ctx->kb); rc = _copy_key_to_kb(&ctx->kb, in_key, key_len); @@ -270,16 +255,17 @@ static int cbc_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key, return rc; if (__cbc_paes_set_key(ctx)) { - tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; } return 0; } -static int cbc_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier, - struct blkcipher_walk *walk) +static int cbc_paes_crypt(struct skcipher_request *req, unsigned long modifier) { - struct s390_paes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; unsigned int nbytes, n, k; int ret; struct { @@ -287,73 +273,60 @@ static int cbc_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier, u8 key[MAXPROTKEYSIZE]; } param; - ret = blkcipher_walk_virt(desc, walk); - memcpy(param.iv, walk->iv, AES_BLOCK_SIZE); + ret = skcipher_walk_virt(&walk, req, false); + if (ret) + return ret; + memcpy(param.iv, walk.iv, AES_BLOCK_SIZE); memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE); - while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) { + while ((nbytes = walk.nbytes) != 0) { /* only use complete blocks */ n = nbytes & ~(AES_BLOCK_SIZE - 1); k = cpacf_kmc(ctx->fc | modifier, ¶m, - walk->dst.virt.addr, walk->src.virt.addr, n); - if (k) - ret = blkcipher_walk_done(desc, walk, nbytes - k); + walk.dst.virt.addr, walk.src.virt.addr, n); + if (k) { + memcpy(walk.iv, param.iv, AES_BLOCK_SIZE); + ret = skcipher_walk_done(&walk, nbytes - k); + } if (k < n) { if (__cbc_paes_set_key(ctx) != 0) - return blkcipher_walk_done(desc, walk, -EIO); + return skcipher_walk_done(&walk, -EIO); memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE); } } - memcpy(walk->iv, param.iv, AES_BLOCK_SIZE); return ret; } -static int cbc_paes_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int cbc_paes_encrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_paes_crypt(desc, 0, &walk); + return cbc_paes_crypt(req, 0); } -static int cbc_paes_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int cbc_paes_decrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_paes_crypt(desc, CPACF_DECRYPT, &walk); + return cbc_paes_crypt(req, CPACF_DECRYPT); } -static struct crypto_alg cbc_paes_alg = { - .cra_name = "cbc(paes)", - .cra_driver_name = "cbc-paes-s390", - .cra_priority = 402, /* ecb-paes-s390 + 1 */ - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct s390_paes_ctx), - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(cbc_paes_alg.cra_list), - .cra_init = cbc_paes_init, - .cra_exit = cbc_paes_exit, - .cra_u = { - .blkcipher = { - .min_keysize = PAES_MIN_KEYSIZE, - .max_keysize = PAES_MAX_KEYSIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = cbc_paes_set_key, - .encrypt = cbc_paes_encrypt, - .decrypt = cbc_paes_decrypt, - } - } +static struct skcipher_alg cbc_paes_alg = { + .base.cra_name = "cbc(paes)", + .base.cra_driver_name = "cbc-paes-s390", + .base.cra_priority = 402, /* ecb-paes-s390 + 1 */ + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct s390_paes_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_list = LIST_HEAD_INIT(cbc_paes_alg.base.cra_list), + .init = cbc_paes_init, + .exit = cbc_paes_exit, + .min_keysize = PAES_MIN_KEYSIZE, + .max_keysize = PAES_MAX_KEYSIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = cbc_paes_set_key, + .encrypt = cbc_paes_encrypt, + .decrypt = cbc_paes_decrypt, }; -static int xts_paes_init(struct crypto_tfm *tfm) +static int xts_paes_init(struct crypto_skcipher *tfm) { - struct s390_pxts_ctx *ctx = crypto_tfm_ctx(tfm); + struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); ctx->kb[0].key = NULL; ctx->kb[1].key = NULL; @@ -361,9 +334,9 @@ static int xts_paes_init(struct crypto_tfm *tfm) return 0; } -static void xts_paes_exit(struct crypto_tfm *tfm) +static void xts_paes_exit(struct crypto_skcipher *tfm) { - struct s390_pxts_ctx *ctx = crypto_tfm_ctx(tfm); + struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); _free_kb_keybuf(&ctx->kb[0]); _free_kb_keybuf(&ctx->kb[1]); @@ -391,11 +364,11 @@ static int __xts_paes_set_key(struct s390_pxts_ctx *ctx) return ctx->fc ? 0 : -EINVAL; } -static int xts_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key, +static int xts_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int xts_key_len) { int rc; - struct s390_pxts_ctx *ctx = crypto_tfm_ctx(tfm); + struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); u8 ckey[2 * AES_MAX_KEY_SIZE]; unsigned int ckey_len, key_len; @@ -414,7 +387,7 @@ static int xts_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key, return rc; if (__xts_paes_set_key(ctx)) { - tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; } @@ -427,13 +400,14 @@ static int xts_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key, AES_KEYSIZE_128 : AES_KEYSIZE_256; memcpy(ckey, ctx->pk[0].protkey, ckey_len); memcpy(ckey + ckey_len, ctx->pk[1].protkey, ckey_len); - return xts_check_key(tfm, ckey, 2*ckey_len); + return xts_verify_key(tfm, ckey, 2*ckey_len); } -static int xts_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier, - struct blkcipher_walk *walk) +static int xts_paes_crypt(struct skcipher_request *req, unsigned long modifier) { - struct s390_pxts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; unsigned int keylen, offset, nbytes, n, k; int ret; struct { @@ -448,90 +422,76 @@ static int xts_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier, u8 init[16]; } xts_param; - ret = blkcipher_walk_virt(desc, walk); + ret = skcipher_walk_virt(&walk, req, false); + if (ret) + return ret; keylen = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 48 : 64; offset = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 16 : 0; retry: memset(&pcc_param, 0, sizeof(pcc_param)); - memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak)); + memcpy(pcc_param.tweak, walk.iv, sizeof(pcc_param.tweak)); memcpy(pcc_param.key + offset, ctx->pk[1].protkey, keylen); cpacf_pcc(ctx->fc, pcc_param.key + offset); memcpy(xts_param.key + offset, ctx->pk[0].protkey, keylen); memcpy(xts_param.init, pcc_param.xts, 16); - while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) { + while ((nbytes = walk.nbytes) != 0) { /* only use complete blocks */ n = nbytes & ~(AES_BLOCK_SIZE - 1); k = cpacf_km(ctx->fc | modifier, xts_param.key + offset, - walk->dst.virt.addr, walk->src.virt.addr, n); + walk.dst.virt.addr, walk.src.virt.addr, n); if (k) - ret = blkcipher_walk_done(desc, walk, nbytes - k); + ret = skcipher_walk_done(&walk, nbytes - k); if (k < n) { if (__xts_paes_set_key(ctx) != 0) - return blkcipher_walk_done(desc, walk, -EIO); + return skcipher_walk_done(&walk, -EIO); goto retry; } } return ret; } -static int xts_paes_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int xts_paes_encrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return xts_paes_crypt(desc, 0, &walk); + return xts_paes_crypt(req, 0); } -static int xts_paes_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int xts_paes_decrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return xts_paes_crypt(desc, CPACF_DECRYPT, &walk); + return xts_paes_crypt(req, CPACF_DECRYPT); } -static struct crypto_alg xts_paes_alg = { - .cra_name = "xts(paes)", - .cra_driver_name = "xts-paes-s390", - .cra_priority = 402, /* ecb-paes-s390 + 1 */ - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct s390_pxts_ctx), - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(xts_paes_alg.cra_list), - .cra_init = xts_paes_init, - .cra_exit = xts_paes_exit, - .cra_u = { - .blkcipher = { - .min_keysize = 2 * PAES_MIN_KEYSIZE, - .max_keysize = 2 * PAES_MAX_KEYSIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = xts_paes_set_key, - .encrypt = xts_paes_encrypt, - .decrypt = xts_paes_decrypt, - } - } +static struct skcipher_alg xts_paes_alg = { + .base.cra_name = "xts(paes)", + .base.cra_driver_name = "xts-paes-s390", + .base.cra_priority = 402, /* ecb-paes-s390 + 1 */ + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct s390_pxts_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_list = LIST_HEAD_INIT(xts_paes_alg.base.cra_list), + .init = xts_paes_init, + .exit = xts_paes_exit, + .min_keysize = 2 * PAES_MIN_KEYSIZE, + .max_keysize = 2 * PAES_MAX_KEYSIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = xts_paes_set_key, + .encrypt = xts_paes_encrypt, + .decrypt = xts_paes_decrypt, }; -static int ctr_paes_init(struct crypto_tfm *tfm) +static int ctr_paes_init(struct crypto_skcipher *tfm) { - struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm); + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); ctx->kb.key = NULL; return 0; } -static void ctr_paes_exit(struct crypto_tfm *tfm) +static void ctr_paes_exit(struct crypto_skcipher *tfm) { - struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm); + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); _free_kb_keybuf(&ctx->kb); } @@ -555,11 +515,11 @@ static int __ctr_paes_set_key(struct s390_paes_ctx *ctx) return ctx->fc ? 0 : -EINVAL; } -static int ctr_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key, +static int ctr_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { int rc; - struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm); + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); _free_kb_keybuf(&ctx->kb); rc = _copy_key_to_kb(&ctx->kb, in_key, key_len); @@ -567,7 +527,7 @@ static int ctr_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key, return rc; if (__ctr_paes_set_key(ctx)) { - tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; } return 0; @@ -588,37 +548,37 @@ static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes) return n; } -static int ctr_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier, - struct blkcipher_walk *walk) +static int ctr_paes_crypt(struct skcipher_request *req) { - struct s390_paes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); u8 buf[AES_BLOCK_SIZE], *ctrptr; + struct skcipher_walk walk; unsigned int nbytes, n, k; int ret, locked; locked = spin_trylock(&ctrblk_lock); - ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE); - while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) { + ret = skcipher_walk_virt(&walk, req, false); + while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { n = AES_BLOCK_SIZE; if (nbytes >= 2*AES_BLOCK_SIZE && locked) - n = __ctrblk_init(ctrblk, walk->iv, nbytes); - ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk->iv; - k = cpacf_kmctr(ctx->fc | modifier, ctx->pk.protkey, - walk->dst.virt.addr, walk->src.virt.addr, - n, ctrptr); + n = __ctrblk_init(ctrblk, walk.iv, nbytes); + ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk.iv; + k = cpacf_kmctr(ctx->fc, ctx->pk.protkey, walk.dst.virt.addr, + walk.src.virt.addr, n, ctrptr); if (k) { if (ctrptr == ctrblk) - memcpy(walk->iv, ctrptr + k - AES_BLOCK_SIZE, + memcpy(walk.iv, ctrptr + k - AES_BLOCK_SIZE, AES_BLOCK_SIZE); - crypto_inc(walk->iv, AES_BLOCK_SIZE); - ret = blkcipher_walk_done(desc, walk, nbytes - n); + crypto_inc(walk.iv, AES_BLOCK_SIZE); + ret = skcipher_walk_done(&walk, nbytes - n); } if (k < n) { if (__ctr_paes_set_key(ctx) != 0) { if (locked) spin_unlock(&ctrblk_lock); - return blkcipher_walk_done(desc, walk, -EIO); + return skcipher_walk_done(&walk, -EIO); } } } @@ -629,80 +589,54 @@ static int ctr_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier, */ if (nbytes) { while (1) { - if (cpacf_kmctr(ctx->fc | modifier, - ctx->pk.protkey, buf, - walk->src.virt.addr, AES_BLOCK_SIZE, - walk->iv) == AES_BLOCK_SIZE) + if (cpacf_kmctr(ctx->fc, ctx->pk.protkey, buf, + walk.src.virt.addr, AES_BLOCK_SIZE, + walk.iv) == AES_BLOCK_SIZE) break; if (__ctr_paes_set_key(ctx) != 0) - return blkcipher_walk_done(desc, walk, -EIO); + return skcipher_walk_done(&walk, -EIO); } - memcpy(walk->dst.virt.addr, buf, nbytes); - crypto_inc(walk->iv, AES_BLOCK_SIZE); - ret = blkcipher_walk_done(desc, walk, 0); + memcpy(walk.dst.virt.addr, buf, nbytes); + crypto_inc(walk.iv, AES_BLOCK_SIZE); + ret = skcipher_walk_done(&walk, 0); } return ret; } -static int ctr_paes_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) -{ - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ctr_paes_crypt(desc, 0, &walk); -} - -static int ctr_paes_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) -{ - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ctr_paes_crypt(desc, CPACF_DECRYPT, &walk); -} - -static struct crypto_alg ctr_paes_alg = { - .cra_name = "ctr(paes)", - .cra_driver_name = "ctr-paes-s390", - .cra_priority = 402, /* ecb-paes-s390 + 1 */ - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct s390_paes_ctx), - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(ctr_paes_alg.cra_list), - .cra_init = ctr_paes_init, - .cra_exit = ctr_paes_exit, - .cra_u = { - .blkcipher = { - .min_keysize = PAES_MIN_KEYSIZE, - .max_keysize = PAES_MAX_KEYSIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ctr_paes_set_key, - .encrypt = ctr_paes_encrypt, - .decrypt = ctr_paes_decrypt, - } - } +static struct skcipher_alg ctr_paes_alg = { + .base.cra_name = "ctr(paes)", + .base.cra_driver_name = "ctr-paes-s390", + .base.cra_priority = 402, /* ecb-paes-s390 + 1 */ + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct s390_paes_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_list = LIST_HEAD_INIT(ctr_paes_alg.base.cra_list), + .init = ctr_paes_init, + .exit = ctr_paes_exit, + .min_keysize = PAES_MIN_KEYSIZE, + .max_keysize = PAES_MAX_KEYSIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ctr_paes_set_key, + .encrypt = ctr_paes_crypt, + .decrypt = ctr_paes_crypt, + .chunksize = AES_BLOCK_SIZE, }; -static inline void __crypto_unregister_alg(struct crypto_alg *alg) +static inline void __crypto_unregister_skcipher(struct skcipher_alg *alg) { - if (!list_empty(&alg->cra_list)) - crypto_unregister_alg(alg); + if (!list_empty(&alg->base.cra_list)) + crypto_unregister_skcipher(alg); } static void paes_s390_fini(void) { if (ctrblk) free_page((unsigned long) ctrblk); - __crypto_unregister_alg(&ctr_paes_alg); - __crypto_unregister_alg(&xts_paes_alg); - __crypto_unregister_alg(&cbc_paes_alg); - __crypto_unregister_alg(&ecb_paes_alg); + __crypto_unregister_skcipher(&ctr_paes_alg); + __crypto_unregister_skcipher(&xts_paes_alg); + __crypto_unregister_skcipher(&cbc_paes_alg); + __crypto_unregister_skcipher(&ecb_paes_alg); } static int __init paes_s390_init(void) @@ -717,7 +651,7 @@ static int __init paes_s390_init(void) if (cpacf_test_func(&km_functions, CPACF_KM_PAES_128) || cpacf_test_func(&km_functions, CPACF_KM_PAES_192) || cpacf_test_func(&km_functions, CPACF_KM_PAES_256)) { - ret = crypto_register_alg(&ecb_paes_alg); + ret = crypto_register_skcipher(&ecb_paes_alg); if (ret) goto out_err; } @@ -725,14 +659,14 @@ static int __init paes_s390_init(void) if (cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_128) || cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_192) || cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_256)) { - ret = crypto_register_alg(&cbc_paes_alg); + ret = crypto_register_skcipher(&cbc_paes_alg); if (ret) goto out_err; } if (cpacf_test_func(&km_functions, CPACF_KM_PXTS_128) || cpacf_test_func(&km_functions, CPACF_KM_PXTS_256)) { - ret = crypto_register_alg(&xts_paes_alg); + ret = crypto_register_skcipher(&xts_paes_alg); if (ret) goto out_err; } @@ -740,7 +674,7 @@ static int __init paes_s390_init(void) if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_128) || cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_192) || cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_256)) { - ret = crypto_register_alg(&ctr_paes_alg); + ret = crypto_register_skcipher(&ctr_paes_alg); if (ret) goto out_err; ctrblk = (u8 *) __get_free_page(GFP_KERNEL); diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c index d39e0f079217..686fe7aa192f 100644 --- a/arch/s390/crypto/sha_common.c +++ b/arch/s390/crypto/sha_common.c @@ -74,14 +74,17 @@ int s390_sha_final(struct shash_desc *desc, u8 *out) struct s390_sha_ctx *ctx = shash_desc_ctx(desc); unsigned int bsize = crypto_shash_blocksize(desc->tfm); u64 bits; - unsigned int n, mbl_offset; + unsigned int n; + int mbl_offset; n = ctx->count % bsize; bits = ctx->count * 8; - mbl_offset = s390_crypto_shash_parmsize(ctx->func) / sizeof(u32); + mbl_offset = s390_crypto_shash_parmsize(ctx->func); if (mbl_offset < 0) return -EINVAL; + mbl_offset = mbl_offset / sizeof(u32); + /* set total msg bit length (mbl) in CPACF parmblock */ switch (ctx->func) { case CPACF_KLMD_SHA_1: diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h index c2cf7bcdef9b..1c8a38f762a3 100644 --- a/arch/s390/include/asm/alternative.h +++ b/arch/s390/include/asm/alternative.h @@ -139,10 +139,10 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end); * without volatile and memory clobber. */ #define alternative(oldinstr, altinstr, facility) \ - asm volatile(ALTERNATIVE(oldinstr, altinstr, facility) : : : "memory") + asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, facility) : : : "memory") #define alternative_2(oldinstr, altinstr1, facility1, altinstr2, facility2) \ - asm volatile(ALTERNATIVE_2(oldinstr, altinstr1, facility1, \ + asm_inline volatile(ALTERNATIVE_2(oldinstr, altinstr1, facility1, \ altinstr2, facility2) ::: "memory") #endif /* __ASSEMBLY__ */ diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h index d3f09526ee19..61467b9eecc7 100644 --- a/arch/s390/include/asm/atomic_ops.h +++ b/arch/s390/include/asm/atomic_ops.h @@ -41,7 +41,7 @@ __ATOMIC_OPS(__atomic64_xor, long, "laxg") #undef __ATOMIC_OP #define __ATOMIC_CONST_OP(op_name, op_type, op_string, op_barrier) \ -static inline void op_name(op_type val, op_type *ptr) \ +static __always_inline void op_name(op_type val, op_type *ptr) \ { \ asm volatile( \ op_string " %[ptr],%[val]\n" \ diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index b8833ac983fa..eb7eed43e780 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -56,7 +56,7 @@ __bitops_byte(unsigned long nr, volatile unsigned long *ptr) return ((unsigned char *)ptr) + ((nr ^ (BITS_PER_LONG - 8)) >> 3); } -static inline void arch_set_bit(unsigned long nr, volatile unsigned long *ptr) +static __always_inline void arch_set_bit(unsigned long nr, volatile unsigned long *ptr) { unsigned long *addr = __bitops_word(nr, ptr); unsigned long mask; @@ -77,7 +77,7 @@ static inline void arch_set_bit(unsigned long nr, volatile unsigned long *ptr) __atomic64_or(mask, (long *)addr); } -static inline void arch_clear_bit(unsigned long nr, volatile unsigned long *ptr) +static __always_inline void arch_clear_bit(unsigned long nr, volatile unsigned long *ptr) { unsigned long *addr = __bitops_word(nr, ptr); unsigned long mask; @@ -98,8 +98,8 @@ static inline void arch_clear_bit(unsigned long nr, volatile unsigned long *ptr) __atomic64_and(mask, (long *)addr); } -static inline void arch_change_bit(unsigned long nr, - volatile unsigned long *ptr) +static __always_inline void arch_change_bit(unsigned long nr, + volatile unsigned long *ptr) { unsigned long *addr = __bitops_word(nr, ptr); unsigned long mask; diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h index 713fc9735ffb..a2b11ac00f60 100644 --- a/arch/s390/include/asm/bug.h +++ b/arch/s390/include/asm/bug.h @@ -9,7 +9,7 @@ #ifdef CONFIG_DEBUG_BUGVERBOSE #define __EMIT_BUG(x) do { \ - asm volatile( \ + asm_inline volatile( \ "0: j 0b+2\n" \ "1:\n" \ ".section .rodata.str,\"aMS\",@progbits,1\n" \ @@ -28,7 +28,7 @@ #else /* CONFIG_DEBUG_BUGVERBOSE */ #define __EMIT_BUG(x) do { \ - asm volatile( \ + asm_inline volatile( \ "0: j 0b+2\n" \ "1:\n" \ ".section __bug_table,\"awM\",@progbits,%1\n" \ diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h index a092f63aac6a..c0f3bfeddcbe 100644 --- a/arch/s390/include/asm/cpacf.h +++ b/arch/s390/include/asm/cpacf.h @@ -171,7 +171,7 @@ typedef struct { unsigned char bytes[16]; } cpacf_mask_t; * * Returns 1 if @func is available for @opcode, 0 otherwise */ -static inline void __cpacf_query(unsigned int opcode, cpacf_mask_t *mask) +static __always_inline void __cpacf_query(unsigned int opcode, cpacf_mask_t *mask) { register unsigned long r0 asm("0") = 0; /* query function */ register unsigned long r1 asm("1") = (unsigned long) mask; diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index ceeb552d3472..819803a97c2b 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -28,6 +28,8 @@ asm(".include \"asm/cpu_mf-insn.h\"\n"); CPU_MF_INT_SF_PRA|CPU_MF_INT_SF_SACA| \ CPU_MF_INT_SF_LSDA) +#define CPU_MF_SF_RIBM_NOTAV 0x1 /* Sampling unavailable */ + /* CPU measurement facility support */ static inline int cpum_cf_avail(void) { @@ -69,7 +71,8 @@ struct hws_qsi_info_block { /* Bit(s) */ unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/ unsigned long tear; /* 24-31: TEAR contents */ unsigned long dear; /* 32-39: DEAR contents */ - unsigned int rsvrd0; /* 40-43: reserved */ + unsigned int rsvrd0:24; /* 40-42: reserved */ + unsigned int ribm:8; /* 43: Reserved by IBM */ unsigned int cpu_speed; /* 44-47: CPU speed */ unsigned long long rsvrd1; /* 48-55: reserved */ unsigned long long rsvrd2; /* 56-63: reserved */ @@ -220,7 +223,8 @@ enum stcctm_ctr_set { MT_DIAG = 5, MT_DIAG_CLEARING = 9, /* clears loss-of-MT-ctr-data alert */ }; -static inline int stcctm(enum stcctm_ctr_set set, u64 range, u64 *dest) + +static __always_inline int stcctm(enum stcctm_ctr_set set, u64 range, u64 *dest) { int cc; diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h index 60f907516335..ed5efbb531c4 100644 --- a/arch/s390/include/asm/ctl_reg.h +++ b/arch/s390/include/asm/ctl_reg.h @@ -11,6 +11,7 @@ #include <linux/bits.h> #define CR0_CLOCK_COMPARATOR_SIGN BIT(63 - 10) +#define CR0_LOW_ADDRESS_PROTECTION BIT(63 - 35) #define CR0_EMERGENCY_SIGNAL_SUBMASK BIT(63 - 49) #define CR0_EXTERNAL_CALL_SUBMASK BIT(63 - 50) #define CR0_CLOCK_COMPARATOR_SUBMASK BIT(63 - 52) diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index bb59dd964590..de8f0bf5f238 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -12,8 +12,6 @@ #include <asm/page.h> #include <asm/pgtable.h> - -#define is_hugepage_only_range(mm, addr, len) 0 #define hugetlb_free_pgd_range free_pgd_range #define hugepages_supported() (MACHINE_HAS_EDAT1) @@ -23,6 +21,13 @@ pte_t huge_ptep_get(pte_t *ptep); pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); +static inline bool is_hugepage_only_range(struct mm_struct *mm, + unsigned long addr, + unsigned long len) +{ + return false; +} + /* * If the arch doesn't supply something else, assume that hugepage * size aligned regions are ok without further preparation. diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h index e548ec1ec12c..39f747d63758 100644 --- a/arch/s390/include/asm/jump_label.h +++ b/arch/s390/include/asm/jump_label.h @@ -20,7 +20,7 @@ * We use a brcl 0,2 instruction for jump labels at compile time so it * can be easily distinguished from a hotpatch generated instruction. */ -static inline bool arch_static_branch(struct static_key *key, bool branch) +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { asm_volatile_goto("0: brcl 0,"__stringify(JUMP_LABEL_NOP_OFFSET)"\n" ".pushsection __jump_table,\"aw\"\n" @@ -34,7 +34,7 @@ label: return true; } -static inline bool arch_static_branch_jump(struct static_key *key, bool branch) +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { asm_volatile_goto("0: brcl 15,%l[label]\n" ".pushsection __jump_table,\"aw\"\n" diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index abe60268335d..02f4c21c57f6 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -392,6 +392,7 @@ struct kvm_vcpu_stat { u64 diagnose_10; u64 diagnose_44; u64 diagnose_9c; + u64 diagnose_9c_ignored; u64 diagnose_258; u64 diagnose_308; u64 diagnose_500; diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 823578c6b9e2..a4d38092530a 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -177,8 +177,6 @@ static inline int devmem_is_allowed(unsigned long pfn) #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) -#define ARCH_ZONE_DMA_BITS 31 - #include <asm-generic/memory_model.h> #include <asm-generic/getorder.h> diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index bccb8f4a63e2..77606c4acd58 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -56,7 +56,12 @@ static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long address) crst_table_init(table, _REGION2_ENTRY_EMPTY); return (p4d_t *) table; } -#define p4d_free(mm, p4d) crst_table_free(mm, (unsigned long *) p4d) + +static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) +{ + if (!mm_p4d_folded(mm)) + crst_table_free(mm, (unsigned long *) p4d); +} static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) { @@ -65,7 +70,12 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) crst_table_init(table, _REGION3_ENTRY_EMPTY); return (pud_t *) table; } -#define pud_free(mm, pud) crst_table_free(mm, (unsigned long *) pud) + +static inline void pud_free(struct mm_struct *mm, pud_t *pud) +{ + if (!mm_pud_folded(mm)) + crst_table_free(mm, (unsigned long *) pud); +} static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr) { @@ -83,6 +93,8 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr) static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) { + if (mm_pmd_folded(mm)) + return; pgtable_pmd_page_dtor(virt_to_page(pmd)); crst_table_free(mm, (unsigned long *) pmd); } diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 36c578c0ff96..7b03037a8475 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -266,11 +266,9 @@ static inline int is_module_addr(void *addr) #endif #define _REGION_ENTRY_BITS 0xfffffffffffff22fUL -#define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe2fUL /* Bits in the segment table entry */ #define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL -#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL #define _SEGMENT_ENTRY_HARDWARE_BITS 0xfffffffffffffe30UL #define _SEGMENT_ENTRY_HARDWARE_BITS_LARGE 0xfffffffffff00730UL #define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */ @@ -699,10 +697,8 @@ static inline int pmd_large(pmd_t pmd) static inline int pmd_bad(pmd_t pmd) { - if ((pmd_val(pmd) & _SEGMENT_ENTRY_TYPE_MASK) > 0) + if ((pmd_val(pmd) & _SEGMENT_ENTRY_TYPE_MASK) > 0 || pmd_large(pmd)) return 1; - if (pmd_large(pmd)) - return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0; return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0; } @@ -710,12 +706,10 @@ static inline int pud_bad(pud_t pud) { unsigned long type = pud_val(pud) & _REGION_ENTRY_TYPE_MASK; - if (type > _REGION_ENTRY_TYPE_R3) + if (type > _REGION_ENTRY_TYPE_R3 || pud_large(pud)) return 1; if (type < _REGION_ENTRY_TYPE_R3) return 0; - if (pud_large(pud)) - return (pud_val(pud) & ~_REGION_ENTRY_BITS_LARGE) != 0; return (pud_val(pud) & ~_REGION_ENTRY_BITS) != 0; } @@ -758,18 +752,12 @@ static inline int pmd_write(pmd_t pmd) static inline int pmd_dirty(pmd_t pmd) { - int dirty = 1; - if (pmd_large(pmd)) - dirty = (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0; - return dirty; + return (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0; } static inline int pmd_young(pmd_t pmd) { - int young = 1; - if (pmd_large(pmd)) - young = (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0; - return young; + return (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0; } static inline int pte_present(pte_t pte) @@ -997,9 +985,9 @@ static inline pte_t pte_mkhuge(pte_t pte) #define IPTE_NODAT 0x400 #define IPTE_GUEST_ASCE 0x800 -static inline void __ptep_ipte(unsigned long address, pte_t *ptep, - unsigned long opt, unsigned long asce, - int local) +static __always_inline void __ptep_ipte(unsigned long address, pte_t *ptep, + unsigned long opt, unsigned long asce, + int local) { unsigned long pto = (unsigned long) ptep; @@ -1020,8 +1008,8 @@ static inline void __ptep_ipte(unsigned long address, pte_t *ptep, : [r1] "a" (pto), [m4] "i" (local) : "memory"); } -static inline void __ptep_ipte_range(unsigned long address, int nr, - pte_t *ptep, int local) +static __always_inline void __ptep_ipte_range(unsigned long address, int nr, + pte_t *ptep, int local) { unsigned long pto = (unsigned long) ptep; @@ -1173,8 +1161,6 @@ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr); static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t entry) { - if (!MACHINE_HAS_NX) - pte_val(entry) &= ~_PAGE_NOEXEC; if (pte_present(entry)) pte_val(entry) &= ~_PAGE_UNUSED; if (mm_has_pgste(mm)) @@ -1191,6 +1177,8 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) { pte_t __pte; pte_val(__pte) = physpage + pgprot_val(pgprot); + if (!MACHINE_HAS_NX) + pte_val(__pte) &= ~_PAGE_NOEXEC; return pte_mkyoung(__pte); } @@ -1269,7 +1257,8 @@ static inline pte_t *pte_offset(pmd_t *pmd, unsigned long address) #define pte_offset_kernel(pmd, address) pte_offset(pmd, address) #define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address) -#define pte_unmap(pte) do { } while (0) + +static inline void pte_unmap(pte_t *pte) { } static inline bool gup_fast_permitted(unsigned long start, unsigned long end) { @@ -1296,29 +1285,23 @@ static inline pmd_t pmd_wrprotect(pmd_t pmd) static inline pmd_t pmd_mkwrite(pmd_t pmd) { pmd_val(pmd) |= _SEGMENT_ENTRY_WRITE; - if (pmd_large(pmd) && !(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY)) - return pmd; - pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; + if (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) + pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; return pmd; } static inline pmd_t pmd_mkclean(pmd_t pmd) { - if (pmd_large(pmd)) { - pmd_val(pmd) &= ~_SEGMENT_ENTRY_DIRTY; - pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; - } + pmd_val(pmd) &= ~_SEGMENT_ENTRY_DIRTY; + pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; return pmd; } static inline pmd_t pmd_mkdirty(pmd_t pmd) { - if (pmd_large(pmd)) { - pmd_val(pmd) |= _SEGMENT_ENTRY_DIRTY | - _SEGMENT_ENTRY_SOFT_DIRTY; - if (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) - pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; - } + pmd_val(pmd) |= _SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_SOFT_DIRTY; + if (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) + pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; return pmd; } @@ -1332,29 +1315,23 @@ static inline pud_t pud_wrprotect(pud_t pud) static inline pud_t pud_mkwrite(pud_t pud) { pud_val(pud) |= _REGION3_ENTRY_WRITE; - if (pud_large(pud) && !(pud_val(pud) & _REGION3_ENTRY_DIRTY)) - return pud; - pud_val(pud) &= ~_REGION_ENTRY_PROTECT; + if (pud_val(pud) & _REGION3_ENTRY_DIRTY) + pud_val(pud) &= ~_REGION_ENTRY_PROTECT; return pud; } static inline pud_t pud_mkclean(pud_t pud) { - if (pud_large(pud)) { - pud_val(pud) &= ~_REGION3_ENTRY_DIRTY; - pud_val(pud) |= _REGION_ENTRY_PROTECT; - } + pud_val(pud) &= ~_REGION3_ENTRY_DIRTY; + pud_val(pud) |= _REGION_ENTRY_PROTECT; return pud; } static inline pud_t pud_mkdirty(pud_t pud) { - if (pud_large(pud)) { - pud_val(pud) |= _REGION3_ENTRY_DIRTY | - _REGION3_ENTRY_SOFT_DIRTY; - if (pud_val(pud) & _REGION3_ENTRY_WRITE) - pud_val(pud) &= ~_REGION_ENTRY_PROTECT; - } + pud_val(pud) |= _REGION3_ENTRY_DIRTY | _REGION3_ENTRY_SOFT_DIRTY; + if (pud_val(pud) & _REGION3_ENTRY_WRITE) + pud_val(pud) &= ~_REGION_ENTRY_PROTECT; return pud; } @@ -1378,38 +1355,29 @@ static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot) static inline pmd_t pmd_mkyoung(pmd_t pmd) { - if (pmd_large(pmd)) { - pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG; - if (pmd_val(pmd) & _SEGMENT_ENTRY_READ) - pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID; - } + pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG; + if (pmd_val(pmd) & _SEGMENT_ENTRY_READ) + pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID; return pmd; } static inline pmd_t pmd_mkold(pmd_t pmd) { - if (pmd_large(pmd)) { - pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG; - pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID; - } + pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG; + pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID; return pmd; } static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) { - if (pmd_large(pmd)) { - pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN_LARGE | - _SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_YOUNG | - _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SOFT_DIRTY; - pmd_val(pmd) |= massage_pgprot_pmd(newprot); - if (!(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY)) - pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; - if (!(pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG)) - pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID; - return pmd; - } - pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN; + pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN_LARGE | + _SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_YOUNG | + _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SOFT_DIRTY; pmd_val(pmd) |= massage_pgprot_pmd(newprot); + if (!(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY)) + pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; + if (!(pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG)) + pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID; return pmd; } @@ -1435,9 +1403,9 @@ static inline void __pmdp_csp(pmd_t *pmdp) #define IDTE_NODAT 0x1000 #define IDTE_GUEST_ASCE 0x2000 -static inline void __pmdp_idte(unsigned long addr, pmd_t *pmdp, - unsigned long opt, unsigned long asce, - int local) +static __always_inline void __pmdp_idte(unsigned long addr, pmd_t *pmdp, + unsigned long opt, unsigned long asce, + int local) { unsigned long sto; @@ -1461,9 +1429,9 @@ static inline void __pmdp_idte(unsigned long addr, pmd_t *pmdp, } } -static inline void __pudp_idte(unsigned long addr, pud_t *pudp, - unsigned long opt, unsigned long asce, - int local) +static __always_inline void __pudp_idte(unsigned long addr, pud_t *pudp, + unsigned long opt, unsigned long asce, + int local) { unsigned long r3o; diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 51a0e4a2dc96..881fc37c11c6 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -206,7 +206,7 @@ unsigned long get_wchan(struct task_struct *p); /* Has task runtime instrumentation enabled ? */ #define is_ri_task(tsk) (!!(tsk)->thread.ri_cb) -static inline unsigned long current_stack_pointer(void) +static __always_inline unsigned long current_stack_pointer(void) { unsigned long sp; diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 78e8a888306d..71e3f0146cda 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -111,7 +111,7 @@ struct qib { /* private: */ u8 res[88]; /* public: */ - u8 parm[QDIO_MAX_BUFFERS_PER_Q]; + u8 parm[128]; } __attribute__ ((packed, aligned(256))); /** @@ -276,6 +276,7 @@ struct qdio_outbuf_state { #define CHSC_AC2_MULTI_BUFFER_AVAILABLE 0x0080 #define CHSC_AC2_MULTI_BUFFER_ENABLED 0x0040 #define CHSC_AC2_DATA_DIV_AVAILABLE 0x0010 +#define CHSC_AC2_SNIFFER_AVAILABLE 0x0008 #define CHSC_AC2_DATA_DIV_ENABLED 0x0002 #define CHSC_AC3_FORMAT2_CQ_AVAILABLE 0x8000 diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index c02bff33f6c7..3a37172d5398 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -85,7 +85,7 @@ static inline int arch_spin_trylock(arch_spinlock_t *lp) static inline void arch_spin_unlock(arch_spinlock_t *lp) { typecheck(int, lp->lock); - asm volatile( + asm_inline volatile( ALTERNATIVE("", ".long 0xb2fa0070", 49) /* NIAI 7 */ " sth %1,%0\n" : "=Q" (((unsigned short *) &lp->lock)[1]) diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h index 0ae4bbf7779c..fee40212af11 100644 --- a/arch/s390/include/asm/stacktrace.h +++ b/arch/s390/include/asm/stacktrace.h @@ -38,7 +38,7 @@ static inline unsigned long get_stack_pointer(struct task_struct *task, { if (regs) return (unsigned long) kernel_stack_pointer(regs); - if (task == current) + if (!task || task == current) return current_stack_pointer(); return (unsigned long) task->thread.ksp; } diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 64539c221672..6da8885251d6 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -10,8 +10,9 @@ #ifndef _ASM_S390_TIMEX_H #define _ASM_S390_TIMEX_H -#include <asm/lowcore.h> +#include <linux/preempt.h> #include <linux/time64.h> +#include <asm/lowcore.h> /* The value of the TOD clock for 1.1.1970. */ #define TOD_UNIX_EPOCH 0x7d91048bca000000ULL @@ -179,22 +180,24 @@ static inline cycles_t get_cycles(void) int get_phys_clock(unsigned long *clock); void init_cpu_timer(void); -unsigned long long monotonic_clock(void); extern unsigned char tod_clock_base[16] __aligned(8); /** * get_clock_monotonic - returns current time in clock rate units * - * The caller must ensure that preemption is disabled. * The clock and tod_clock_base get changed via stop_machine. - * Therefore preemption must be disabled when calling this - * function, otherwise the returned value is not guaranteed to - * be monotonic. + * Therefore preemption must be disabled, otherwise the returned + * value is not guaranteed to be monotonic. */ static inline unsigned long long get_tod_clock_monotonic(void) { - return get_tod_clock() - *(unsigned long long *) &tod_clock_base[1]; + unsigned long long tod; + + preempt_disable(); + tod = get_tod_clock() - *(unsigned long long *) &tod_clock_base[1]; + preempt_enable(); + return tod; } /** diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index bd2fd9a7821d..a470f1fa9f2a 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -83,7 +83,7 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n); __rc; \ }) -static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) +static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) { unsigned long spec = 0x010000UL; int rc; @@ -113,7 +113,7 @@ static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) return rc; } -static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) +static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) { unsigned long spec = 0x01UL; int rc; diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h index d827b5b9a32c..eaaefeceef6f 100644 --- a/arch/s390/include/asm/unwind.h +++ b/arch/s390/include/asm/unwind.h @@ -35,6 +35,7 @@ struct unwind_state { struct task_struct *task; struct pt_regs *regs; unsigned long sp, ip; + bool reuse_sp; int graph_idx; bool reliable; bool error; diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 7abe6ae261b4..f304802ecf7b 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -461,10 +461,11 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr) ptr += sprintf(ptr, "%%c%i", value); else if (operand->flags & OPERAND_VR) ptr += sprintf(ptr, "%%v%i", value); - else if (operand->flags & OPERAND_PCREL) - ptr += sprintf(ptr, "%lx", (signed int) value - + addr); - else if (operand->flags & OPERAND_SIGNED) + else if (operand->flags & OPERAND_PCREL) { + void *pcrel = (void *)((int)value + addr); + + ptr += sprintf(ptr, "%px", pcrel); + } else if (operand->flags & OPERAND_SIGNED) ptr += sprintf(ptr, "%i", value); else ptr += sprintf(ptr, "%u", value); @@ -536,7 +537,7 @@ void show_code(struct pt_regs *regs) else *ptr++ = ' '; addr = regs->psw.addr + start - 32; - ptr += sprintf(ptr, "%016lx: ", addr); + ptr += sprintf(ptr, "%px: ", (void *)addr); if (start + opsize >= end) break; for (i = 0; i < opsize; i++) @@ -564,7 +565,7 @@ void print_fn_code(unsigned char *code, unsigned long len) opsize = insn_length(*code); if (opsize > len) break; - ptr += sprintf(ptr, "%p: ", code); + ptr += sprintf(ptr, "%px: ", code); for (i = 0; i < opsize; i++) ptr += sprintf(ptr, "%02x", code[i]); *ptr++ = '\t'; diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index b432d63d0b37..db32a55daaec 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -30,6 +30,7 @@ #include <asm/sclp.h> #include <asm/facility.h> #include <asm/boot_data.h> +#include <asm/switch_to.h> #include "entry.h" static void __init reset_tod_clock(void) @@ -238,7 +239,7 @@ static __init void detect_machine_facilities(void) S390_lowcore.machine_flags |= MACHINE_FLAG_VX; __ctl_set_bit(0, 17); } - if (test_facility(130)) { + if (test_facility(130) && !noexec_disabled) { S390_lowcore.machine_flags |= MACHINE_FLAG_NX; __ctl_set_bit(0, 20); } @@ -260,6 +261,24 @@ static inline void save_vector_registers(void) #endif } +static inline void setup_control_registers(void) +{ + unsigned long reg; + + __ctl_store(reg, 0, 0); + reg |= CR0_LOW_ADDRESS_PROTECTION; + reg |= CR0_EMERGENCY_SIGNAL_SUBMASK; + reg |= CR0_EXTERNAL_CALL_SUBMASK; + __ctl_load(reg, 0, 0); +} + +static inline void setup_access_registers(void) +{ + unsigned int acrs[NUM_ACRS] = { 0 }; + + restore_access_regs(acrs); +} + static int __init disable_vector_extension(char *str) { S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX; @@ -268,21 +287,6 @@ static int __init disable_vector_extension(char *str) } early_param("novx", disable_vector_extension); -static int __init noexec_setup(char *str) -{ - bool enabled; - int rc; - - rc = kstrtobool(str, &enabled); - if (!rc && !enabled) { - /* Disable no-execute support */ - S390_lowcore.machine_flags &= ~MACHINE_FLAG_NX; - __ctl_clear_bit(0, 20); - } - return rc; -} -early_param("noexec", noexec_setup); - static int __init cad_setup(char *str) { bool enabled; @@ -332,5 +336,7 @@ void __init startup_init(void) save_vector_registers(); setup_topology(); sclp_early_detect(); + setup_control_registers(); + setup_access_registers(); lockdep_on(); } diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 0d9ee198f4eb..b9e585f528a6 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -26,8 +26,6 @@ ENTRY(startup_continue) 0: larl %r1,tod_clock_base mvc 0(16,%r1),__LC_BOOT_CLOCK larl %r13,.LPG1 # get base - larl %r0,boot_vdso_data - stg %r0,__LC_VDSO_PER_CPU # # Setup stack # @@ -37,19 +35,8 @@ ENTRY(startup_continue) #ifdef CONFIG_KASAN brasl %r14,kasan_early_init #endif -# -# Early machine initialization and detection functions. -# - brasl %r14,startup_init - -# check control registers - stctg %c0,%c15,0(%r15) - oi 6(%r15),0x60 # enable sigp emergency & external call - oi 4(%r15),0x10 # switch on low address proctection - lctlg %c0,%c15,0(%r15) - - lam 0,15,.Laregs-.LPG1(%r13) # load acrs needed by uaccess - brasl %r14,start_kernel # go to C code + brasl %r14,startup_init # s390 specific early init + brasl %r14,start_kernel # common init code # # We returned from start_kernel ?!? PANIK # @@ -59,4 +46,3 @@ ENTRY(startup_continue) .align 16 .LPG1: .Ldw: .quad 0x0002000180000000,0x0000000000000000 -.Laregs:.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index b9d8fe45737a..8f8456816d83 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c @@ -69,18 +69,26 @@ DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL); static ssize_t show_idle_time(struct device *dev, struct device_attribute *attr, char *buf) { + unsigned long long now, idle_time, idle_enter, idle_exit, in_idle; struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); - unsigned long long now, idle_time, idle_enter, idle_exit; unsigned int seq; do { - now = get_tod_clock(); seq = read_seqcount_begin(&idle->seqcount); idle_time = READ_ONCE(idle->idle_time); idle_enter = READ_ONCE(idle->clock_idle_enter); idle_exit = READ_ONCE(idle->clock_idle_exit); } while (read_seqcount_retry(&idle->seqcount, seq)); - idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0; + in_idle = 0; + now = get_tod_clock(); + if (idle_enter) { + if (idle_exit) { + in_idle = idle_exit - idle_enter; + } else if (now > idle_enter) { + in_idle = now - idle_enter; + } + } + idle_time += in_idle; return sprintf(buf, "%llu\n", idle_time >> 12); } DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); @@ -88,17 +96,24 @@ DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); u64 arch_cpu_idle_time(int cpu) { struct s390_idle_data *idle = &per_cpu(s390_idle, cpu); - unsigned long long now, idle_enter, idle_exit; + unsigned long long now, idle_enter, idle_exit, in_idle; unsigned int seq; do { - now = get_tod_clock(); seq = read_seqcount_begin(&idle->seqcount); idle_enter = READ_ONCE(idle->clock_idle_enter); idle_exit = READ_ONCE(idle->clock_idle_exit); } while (read_seqcount_retry(&idle->seqcount, seq)); - - return cputime_to_nsecs(idle_enter ? ((idle_exit ?: now) - idle_enter) : 0); + in_idle = 0; + now = get_tod_clock(); + if (idle_enter) { + if (idle_exit) { + in_idle = idle_exit - idle_enter; + } else if (now > idle_enter) { + in_idle = now - idle_enter; + } + } + return cputime_to_nsecs(in_idle); } void arch_cpu_idle_enter(void) diff --git a/arch/s390/kernel/machine_kexec_reloc.c b/arch/s390/kernel/machine_kexec_reloc.c index 3b664cb3ec4d..d5035de9020e 100644 --- a/arch/s390/kernel/machine_kexec_reloc.c +++ b/arch/s390/kernel/machine_kexec_reloc.c @@ -27,6 +27,7 @@ int arch_kexec_do_relocs(int r_type, void *loc, unsigned long val, *(u32 *)loc = val; break; case R_390_64: /* Direct 64 bit. */ + case R_390_GLOB_DAT: *(u64 *)loc = val; break; case R_390_PC16: /* PC relative 16 bit. */ diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 48d48b6187c0..0eb1d1cc53a8 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -199,7 +199,7 @@ static const int cpumf_generic_events_user[] = { [PERF_COUNT_HW_BUS_CYCLES] = -1, }; -static int __hw_perf_event_init(struct perf_event *event) +static int __hw_perf_event_init(struct perf_event *event, unsigned int type) { struct perf_event_attr *attr = &event->attr; struct hw_perf_event *hwc = &event->hw; @@ -207,7 +207,7 @@ static int __hw_perf_event_init(struct perf_event *event) int err = 0; u64 ev; - switch (attr->type) { + switch (type) { case PERF_TYPE_RAW: /* Raw events are used to access counters directly, * hence do not permit excludes */ @@ -294,17 +294,16 @@ static int __hw_perf_event_init(struct perf_event *event) static int cpumf_pmu_event_init(struct perf_event *event) { + unsigned int type = event->attr.type; int err; - switch (event->attr.type) { - case PERF_TYPE_HARDWARE: - case PERF_TYPE_HW_CACHE: - case PERF_TYPE_RAW: - err = __hw_perf_event_init(event); - break; - default: + if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW) + err = __hw_perf_event_init(event, type); + else if (event->pmu->type == type) + /* Registered as unknown PMU */ + err = __hw_perf_event_init(event, PERF_TYPE_RAW); + else return -ENOENT; - } if (unlikely(err) && event->destroy) event->destroy(event); @@ -553,7 +552,7 @@ static int __init cpumf_pmu_init(void) return -ENODEV; cpumf_pmu.attr_groups = cpumf_cf_event_group(); - rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW); + rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", -1); if (rc) pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc); return rc; diff --git a/arch/s390/kernel/perf_cpum_cf_diag.c b/arch/s390/kernel/perf_cpum_cf_diag.c index 5f1fd1581330..e949ab832ed7 100644 --- a/arch/s390/kernel/perf_cpum_cf_diag.c +++ b/arch/s390/kernel/perf_cpum_cf_diag.c @@ -243,13 +243,13 @@ static int cf_diag_event_init(struct perf_event *event) int err = -ENOENT; debug_sprintf_event(cf_diag_dbg, 5, - "%s event %p cpu %d config %#llx " + "%s event %p cpu %d config %#llx type:%u " "sample_type %#llx cf_diag_events %d\n", __func__, - event, event->cpu, attr->config, attr->sample_type, - atomic_read(&cf_diag_events)); + event, event->cpu, attr->config, event->pmu->type, + attr->sample_type, atomic_read(&cf_diag_events)); if (event->attr.config != PERF_EVENT_CPUM_CF_DIAG || - event->attr.type != PERF_TYPE_RAW) + event->attr.type != event->pmu->type) goto out; /* Raw events are used to access counters directly, @@ -390,7 +390,7 @@ static size_t cf_diag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset, debug_sprintf_event(cf_diag_dbg, 6, "%s ctrset %d ctrset_size %zu cfvn %d csvn %d" - " need %zd rc:%d\n", + " need %zd rc %d\n", __func__, ctrset, ctrset_size, cpuhw->info.cfvn, cpuhw->info.csvn, need, rc); return need; @@ -567,7 +567,7 @@ static int cf_diag_add(struct perf_event *event, int flags) int err = 0; debug_sprintf_event(cf_diag_dbg, 5, - "%s event %p cpu %d flags %#x cpuhw:%p\n", + "%s event %p cpu %d flags %#x cpuhw %p\n", __func__, event, event->cpu, flags, cpuhw); if (cpuhw->flags & PMU_F_IN_USE) { @@ -693,7 +693,7 @@ static int __init cf_diag_init(void) } debug_register_view(cf_diag_dbg, &debug_sprintf_view); - rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", PERF_TYPE_RAW); + rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", -1); if (rc) { debug_unregister_view(cf_diag_dbg, &debug_sprintf_view); debug_unregister(cf_diag_dbg); diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 544a02e944c6..69506fdbd9a1 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -156,8 +156,8 @@ static void free_sampling_buffer(struct sf_buffer *sfb) } } - debug_sprintf_event(sfdbg, 5, - "free_sampling_buffer: freed sdbt=%p\n", sfb->sdbt); + debug_sprintf_event(sfdbg, 5, "%s freed sdbt %p\n", __func__, + sfb->sdbt); memset(sfb, 0, sizeof(*sfb)); } @@ -212,10 +212,10 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb, * the sampling buffer origin. */ if (sfb->sdbt != get_next_sdbt(tail)) { - debug_sprintf_event(sfdbg, 3, "realloc_sampling_buffer: " - "sampling buffer is not linked: origin=%p" - "tail=%p\n", - (void *) sfb->sdbt, (void *) tail); + debug_sprintf_event(sfdbg, 3, "%s: " + "sampling buffer is not linked: origin %p" + " tail %p\n", __func__, + (void *)sfb->sdbt, (void *)tail); return -EINVAL; } @@ -252,7 +252,7 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb, sfb->tail = tail; debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer" - " settings: sdbt=%lu sdb=%lu\n", + " settings: sdbt %lu sdb %lu\n", sfb->num_sdbt, sfb->num_sdb); return rc; } @@ -293,11 +293,11 @@ static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb) if (rc) { free_sampling_buffer(sfb); debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: " - "realloc_sampling_buffer failed with rc=%i\n", rc); + "realloc_sampling_buffer failed with rc %i\n", rc); } else debug_sprintf_event(sfdbg, 4, - "alloc_sampling_buffer: tear=%p dear=%p\n", - sfb->sdbt, (void *) *sfb->sdbt); + "alloc_sampling_buffer: tear %p dear %p\n", + sfb->sdbt, (void *)*sfb->sdbt); return rc; } @@ -404,8 +404,8 @@ static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc) return 0; debug_sprintf_event(sfdbg, 3, - "allocate_buffers: rate=%lu f=%lu sdb=%lu/%lu" - " sample_size=%lu cpuhw=%p\n", + "%s: rate %lu f %lu sdb %lu/%lu" + " sample_size %lu cpuhw %p\n", __func__, SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc), sample_size, cpuhw); @@ -465,8 +465,8 @@ static void sfb_account_overflows(struct cpu_hw_sf *cpuhw, if (num) sfb_account_allocs(num, hwc); - debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow=%llu ratio=%lu" - " num=%lu\n", OVERFLOW_REG(hwc), ratio, num); + debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow %llu ratio %lu" + " num %lu\n", OVERFLOW_REG(hwc), ratio, num); OVERFLOW_REG(hwc) = 0; } @@ -505,11 +505,11 @@ static void extend_sampling_buffer(struct sf_buffer *sfb, rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC); if (rc) debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc " - "failed with rc=%i\n", rc); + "failed with rc %i\n", rc); if (sfb_has_pending_allocs(sfb, hwc)) debug_sprintf_event(sfdbg, 5, "sfb: extend: " - "req=%lu alloc=%lu remaining=%lu\n", + "req %lu alloc %lu remaining %lu\n", num, sfb->num_sdb - num_old, sfb_pending_allocs(sfb, hwc)); } @@ -538,20 +538,22 @@ static void setup_pmc_cpu(void *flags) err = sf_disable(); if (err) pr_err("Switching off the sampling facility failed " - "with rc=%i\n", err); + "with rc %i\n", err); debug_sprintf_event(sfdbg, 5, - "setup_pmc_cpu: initialized: cpuhw=%p\n", cpusf); + "%s: initialized: cpuhw %p\n", __func__, + cpusf); break; case PMC_RELEASE: cpusf->flags &= ~PMU_F_RESERVED; err = sf_disable(); if (err) { pr_err("Switching off the sampling facility failed " - "with rc=%i\n", err); + "with rc %i\n", err); } else deallocate_buffers(cpusf); debug_sprintf_event(sfdbg, 5, - "setup_pmc_cpu: released: cpuhw=%p\n", cpusf); + "%s: released: cpuhw %p\n", __func__, + cpusf); break; } if (err) @@ -744,7 +746,7 @@ static int __hw_perf_event_init_rate(struct perf_event *event, SAMPL_RATE(hwc) = rate; hw_init_period(hwc, SAMPL_RATE(hwc)); debug_sprintf_event(sfdbg, 4, "__hw_perf_event_init_rate:" - "cpu:%d period:%llx freq:%d,%#lx\n", event->cpu, + "cpu:%d period:%#llx freq:%d,%#lx\n", event->cpu, event->attr.sample_period, event->attr.freq, SAMPLE_FREQ_MODE(hwc)); return 0; @@ -803,6 +805,12 @@ static int __hw_perf_event_init(struct perf_event *event) goto out; } + if (si.ribm & CPU_MF_SF_RIBM_NOTAV) { + pr_warn("CPU Measurement Facility sampling is temporarily not available\n"); + err = -EBUSY; + goto out; + } + /* Always enable basic sampling */ SAMPL_FLAGS(hwc) = PERF_CPUM_SF_BASIC_MODE; @@ -895,7 +903,7 @@ static int cpumsf_pmu_event_init(struct perf_event *event) /* Check online status of the CPU to which the event is pinned */ if (event->cpu >= 0 && !cpu_online(event->cpu)) - return -ENODEV; + return -ENODEV; /* Force reset of idle/hv excludes regardless of what the * user requested. @@ -957,7 +965,7 @@ static void cpumsf_pmu_enable(struct pmu *pmu) err = lsctl(&cpuhw->lsctl); if (err) { cpuhw->flags &= ~PMU_F_ENABLED; - pr_err("Loading sampling controls failed: op=%i err=%i\n", + pr_err("Loading sampling controls failed: op %i err %i\n", 1, err); return; } @@ -965,8 +973,8 @@ static void cpumsf_pmu_enable(struct pmu *pmu) /* Load current program parameter */ lpp(&S390_lowcore.lpp); - debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i " - "interval:%lx tear=%p dear=%p\n", + debug_sprintf_event(sfdbg, 6, "pmu_enable: es %i cs %i ed %i cd %i " + "interval %#lx tear %p dear %p\n", cpuhw->lsctl.es, cpuhw->lsctl.cs, cpuhw->lsctl.ed, cpuhw->lsctl.cd, cpuhw->lsctl.interval, (void *) cpuhw->lsctl.tear, @@ -993,13 +1001,14 @@ static void cpumsf_pmu_disable(struct pmu *pmu) err = lsctl(&inactive); if (err) { - pr_err("Loading sampling controls failed: op=%i err=%i\n", + pr_err("Loading sampling controls failed: op %i err %i\n", 2, err); return; } /* Save state of TEAR and DEAR register contents */ - if (!qsi(&si)) { + err = qsi(&si); + if (!err) { /* TEAR/DEAR values are valid only if the sampling facility is * enabled. Note that cpumsf_pmu_disable() might be called even * for a disabled sampling facility because cpumsf_pmu_enable() @@ -1011,7 +1020,7 @@ static void cpumsf_pmu_disable(struct pmu *pmu) } } else debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: " - "qsi() failed with err=%i\n", err); + "qsi() failed with err %i\n", err); cpuhw->flags &= ~PMU_F_ENABLED; } @@ -1124,15 +1133,6 @@ static void perf_event_count_update(struct perf_event *event, u64 count) local64_add(count, &event->count); } -static void debug_sample_entry(struct hws_basic_entry *sample, - struct hws_trailer_entry *te) -{ - debug_sprintf_event(sfdbg, 4, "hw_collect_samples: Found unknown " - "sampling data entry: te->f=%i basic.def=%04x " - "(%p)\n", - te->f, sample->def, sample); -} - /* hw_collect_samples() - Walk through a sample-data-block and collect samples * @event: The perf event * @sdbt: Sample-data-block table @@ -1186,7 +1186,11 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, /* Count discarded samples */ *overflow += 1; } else { - debug_sample_entry(sample, te); + debug_sprintf_event(sfdbg, 4, + "%s: Found unknown" + " sampling data entry: te->f %i" + " basic.def %#4x (%p)\n", __func__, + te->f, sample->def, sample); /* Sample slot is not yet written or other record. * * This condition can occur if the buffer was reused @@ -1261,9 +1265,9 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) sampl_overflow += te->overflow; /* Timestamps are valid for full sample-data-blocks only */ - debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p " - "overflow=%llu timestamp=%#llx\n", - sdbt, te->overflow, + debug_sprintf_event(sfdbg, 6, "%s: sdbt %p " + "overflow %llu timestamp %#llx\n", + __func__, sdbt, te->overflow, (te->f) ? trailer_timestamp(te) : 0ULL); /* Collect all samples from a single sample-data-block and @@ -1307,9 +1311,9 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) + sampl_overflow, 1 + num_sdb); if (sampl_overflow || event_overflow) - debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: " - "overflow stats: sample=%llu event=%llu\n", - sampl_overflow, event_overflow); + debug_sprintf_event(sfdbg, 4, "%s: " + "overflow stats: sample %llu event %llu\n", + __func__, sampl_overflow, event_overflow); } #define AUX_SDB_INDEX(aux, i) ((i) % aux->sfb.num_sdb) @@ -1362,7 +1366,7 @@ static void aux_output_end(struct perf_output_handle *handle) te = aux_sdb_trailer(aux, aux->alert_mark); te->flags &= ~SDB_TE_ALERT_REQ_MASK; - debug_sprintf_event(sfdbg, 6, "aux_output_end: collect %lx SDBs\n", i); + debug_sprintf_event(sfdbg, 6, "%s: collect %#lx SDBs\n", __func__, i); } /* @@ -1422,8 +1426,8 @@ static int aux_output_begin(struct perf_output_handle *handle, debug_sprintf_event(sfdbg, 6, "aux_output_begin: " "head->alert_mark->empty_mark (num_alert, range)" - "[%lx -> %lx -> %lx] (%lx, %lx) " - "tear index %lx, tear %lx dear %lx\n", + "[%#lx -> %#lx -> %#lx] (%#lx, %#lx) " + "tear index %#lx, tear %#lx dear %#lx\n", aux->head, aux->alert_mark, aux->empty_mark, AUX_SDB_NUM_ALERT(aux), range, head / CPUM_SF_SDB_PER_TABLE, @@ -1590,13 +1594,13 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw) perf_aux_output_end(&cpuhw->handle, size); pr_err("Sample data caused the AUX buffer with %lu " "pages to overflow\n", num_sdb); - debug_sprintf_event(sfdbg, 1, "head %lx range %lx " - "overflow %llx\n", + debug_sprintf_event(sfdbg, 1, "head %#lx range %#lx " + "overflow %#llx\n", aux->head, range, overflow); } else { size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT; perf_aux_output_end(&cpuhw->handle, size); - debug_sprintf_event(sfdbg, 6, "head %lx alert %lx " + debug_sprintf_event(sfdbg, 6, "head %#lx alert %#lx " "already full, try another\n", aux->head, aux->alert_mark); } @@ -1604,7 +1608,7 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw) if (done) debug_sprintf_event(sfdbg, 6, "aux_reset_buffer: " - "[%lx -> %lx -> %lx] (%lx, %lx)\n", + "[%#lx -> %#lx -> %#lx] (%#lx, %#lx)\n", aux->head, aux->alert_mark, aux->empty_mark, AUX_SDB_NUM_ALERT(aux), range); } @@ -1794,7 +1798,7 @@ static int cpumsf_pmu_check_period(struct perf_event *event, u64 value) SAMPL_RATE(&event->hw) = rate; hw_init_period(&event->hw, SAMPL_RATE(&event->hw)); debug_sprintf_event(sfdbg, 4, "cpumsf_pmu_check_period:" - "cpu:%d value:%llx period:%llx freq:%d\n", + "cpu:%d value:%#llx period:%#llx freq:%d\n", event->cpu, value, event->attr.sample_period, do_freq); return 0; @@ -2105,7 +2109,7 @@ static int param_set_sfb_size(const char *val, const struct kernel_param *kp) sfb_set_limits(min, max); pr_info("The sampling buffer limits have changed to: " - "min=%lu max=%lu (diag=x%lu)\n", + "min %lu max %lu (diag %lu)\n", CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB, CPUM_SF_SDB_DIAG_FACTOR); return 0; } @@ -2123,7 +2127,7 @@ static const struct kernel_param_ops param_ops_sfb_size = { static void __init pr_cpumsf_err(unsigned int reason) { pr_err("Sampling facility support for perf is not available: " - "reason=%04x\n", reason); + "reason %#x\n", reason); } static int __init init_cpum_sampling_pmu(void) diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index fcb6c2e92b07..1e75cc983546 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -224,9 +224,13 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { struct unwind_state state; + unsigned long addr; - unwind_for_each_frame(&state, current, regs, 0) - perf_callchain_store(entry, state.ip); + unwind_for_each_frame(&state, current, regs, 0) { + addr = unwind_get_return_address(&state); + if (!addr || perf_callchain_store(entry, addr)) + return; + } } /* Perf definitions for PMU event attributes in sysfs */ diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index b0afec673f77..6ccef5f29761 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -40,6 +40,7 @@ #include <asm/stacktrace.h> #include <asm/switch_to.h> #include <asm/runtime_instr.h> +#include <asm/unwind.h> #include "entry.h" asmlinkage void ret_from_fork(void) asm ("ret_from_fork"); @@ -178,9 +179,8 @@ EXPORT_SYMBOL(dump_fpu); unsigned long get_wchan(struct task_struct *p) { - struct stack_frame *sf, *low, *high; - unsigned long return_address; - int count; + struct unwind_state state; + unsigned long ip = 0; if (!p || p == current || p->state == TASK_RUNNING || !task_stack_page(p)) return 0; @@ -188,26 +188,22 @@ unsigned long get_wchan(struct task_struct *p) if (!try_get_task_stack(p)) return 0; - low = task_stack_page(p); - high = (struct stack_frame *) task_pt_regs(p); - sf = (struct stack_frame *) p->thread.ksp; - if (sf <= low || sf > high) { - return_address = 0; - goto out; - } - for (count = 0; count < 16; count++) { - sf = (struct stack_frame *)READ_ONCE_NOCHECK(sf->back_chain); - if (sf <= low || sf > high) { - return_address = 0; - goto out; + unwind_for_each_frame(&state, p, NULL, 0) { + if (state.stack_info.type != STACK_TYPE_TASK) { + ip = 0; + break; } - return_address = READ_ONCE_NOCHECK(sf->gprs[8]); - if (!in_sched_functions(return_address)) - goto out; + + ip = unwind_get_return_address(&state); + if (!ip) + break; + + if (!in_sched_functions(ip)) + break; } -out: + put_task_stack(p); - return return_address; + return ip; } unsigned long arch_align_stack(unsigned long sp) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 44974654cbd0..6acdcf1d4074 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -724,39 +724,67 @@ static void __ref smp_get_core_info(struct sclp_core_info *info, int early) static int smp_add_present_cpu(int cpu); -static int __smp_rescan_cpus(struct sclp_core_info *info, int sysfs_add) +static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail, + bool configured, bool early) { struct pcpu *pcpu; - cpumask_t avail; - int cpu, nr, i, j; + int cpu, nr, i; u16 address; nr = 0; - cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); - cpu = cpumask_first(&avail); - for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) { - if (sclp.has_core_type && info->core[i].type != boot_core_type) + if (sclp.has_core_type && core->type != boot_core_type) + return nr; + cpu = cpumask_first(avail); + address = core->core_id << smp_cpu_mt_shift; + for (i = 0; (i <= smp_cpu_mtid) && (cpu < nr_cpu_ids); i++) { + if (pcpu_find_address(cpu_present_mask, address + i)) continue; - address = info->core[i].core_id << smp_cpu_mt_shift; - for (j = 0; j <= smp_cpu_mtid; j++) { - if (pcpu_find_address(cpu_present_mask, address + j)) - continue; - pcpu = pcpu_devices + cpu; - pcpu->address = address + j; - pcpu->state = - (cpu >= info->configured*(smp_cpu_mtid + 1)) ? - CPU_STATE_STANDBY : CPU_STATE_CONFIGURED; - smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); - set_cpu_present(cpu, true); - if (sysfs_add && smp_add_present_cpu(cpu) != 0) - set_cpu_present(cpu, false); - else - nr++; - cpu = cpumask_next(cpu, &avail); - if (cpu >= nr_cpu_ids) + pcpu = pcpu_devices + cpu; + pcpu->address = address + i; + if (configured) + pcpu->state = CPU_STATE_CONFIGURED; + else + pcpu->state = CPU_STATE_STANDBY; + smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + set_cpu_present(cpu, true); + if (!early && smp_add_present_cpu(cpu) != 0) + set_cpu_present(cpu, false); + else + nr++; + cpumask_clear_cpu(cpu, avail); + cpu = cpumask_next(cpu, avail); + } + return nr; +} + +static int __smp_rescan_cpus(struct sclp_core_info *info, bool early) +{ + struct sclp_core_entry *core; + cpumask_t avail; + bool configured; + u16 core_id; + int nr, i; + + nr = 0; + cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); + /* + * Add IPL core first (which got logical CPU number 0) to make sure + * that all SMT threads get subsequent logical CPU numbers. + */ + if (early) { + core_id = pcpu_devices[0].address >> smp_cpu_mt_shift; + for (i = 0; i < info->configured; i++) { + core = &info->core[i]; + if (core->core_id == core_id) { + nr += smp_add_core(core, &avail, true, early); break; + } } } + for (i = 0; i < info->combined; i++) { + configured = i < info->configured; + nr += smp_add_core(&info->core[i], &avail, configured, early); + } return nr; } @@ -805,7 +833,7 @@ void __init smp_detect_cpus(void) /* Add CPUs present at boot */ get_online_cpus(); - __smp_rescan_cpus(info, 0); + __smp_rescan_cpus(info, true); put_online_cpus(); memblock_free_early((unsigned long)info, sizeof(*info)); } @@ -1148,7 +1176,7 @@ int __ref smp_rescan_cpus(void) smp_get_core_info(info, 0); get_online_cpus(); mutex_lock(&smp_cpu_state_mutex); - nr = __smp_rescan_cpus(info, 1); + nr = __smp_rescan_cpus(info, false); mutex_unlock(&smp_cpu_state_mutex); put_online_cpus(); kfree(info); diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index e8766beee5ad..f9d070d016e3 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -110,15 +110,6 @@ unsigned long long notrace sched_clock(void) } NOKPROBE_SYMBOL(sched_clock); -/* - * Monotonic_clock - returns # of nanoseconds passed since time_init() - */ -unsigned long long monotonic_clock(void) -{ - return sched_clock(); -} -EXPORT_SYMBOL(monotonic_clock); - static void ext_to_timespec64(unsigned char *clk, struct timespec64 *xt) { unsigned long long high, low, rem, sec, nsec; diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c index 8fc9daae47a2..fa111d3d378f 100644 --- a/arch/s390/kernel/unwind_bc.c +++ b/arch/s390/kernel/unwind_bc.c @@ -46,10 +46,15 @@ bool unwind_next_frame(struct unwind_state *state) regs = state->regs; if (unlikely(regs)) { - sp = READ_ONCE_NOCHECK(regs->gprs[15]); - if (unlikely(outside_of_stack(state, sp))) { - if (!update_stack_info(state, sp)) - goto out_err; + if (state->reuse_sp) { + sp = state->sp; + state->reuse_sp = false; + } else { + sp = READ_ONCE_NOCHECK(regs->gprs[15]); + if (unlikely(outside_of_stack(state, sp))) { + if (!update_stack_info(state, sp)) + goto out_err; + } } sf = (struct stack_frame *) sp; ip = READ_ONCE_NOCHECK(sf->gprs[8]); @@ -80,12 +85,7 @@ bool unwind_next_frame(struct unwind_state *state) } } -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - /* Decode any ftrace redirection */ - if (ip == (unsigned long) return_to_handler) - ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, - ip, (void *) sp); -#endif + ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, (void *) sp); /* Update unwind state */ state->sp = sp; @@ -107,9 +107,9 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, { struct stack_info *info = &state->stack_info; unsigned long *mask = &state->stack_mask; + bool reliable, reuse_sp; struct stack_frame *sf; unsigned long ip; - bool reliable; memset(state, 0, sizeof(*state)); state->task = task; @@ -134,22 +134,20 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, if (regs) { ip = READ_ONCE_NOCHECK(regs->psw.addr); reliable = true; + reuse_sp = true; } else { sf = (struct stack_frame *) sp; ip = READ_ONCE_NOCHECK(sf->gprs[8]); reliable = false; + reuse_sp = false; } -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - /* Decode any ftrace redirection */ - if (ip == (unsigned long) return_to_handler) - ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, - ip, NULL); -#endif + ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, NULL); /* Update unwind state */ state->sp = sp; state->ip = ip; state->reliable = reliable; + state->reuse_sp = reuse_sp; } EXPORT_SYMBOL_GPL(__unwind_start); diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 45634b3d2e0a..3fb54ec2cf3e 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -158,14 +158,28 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; vcpu->stat.diagnose_9c++; - VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d", tid); + /* yield to self */ if (tid == vcpu->vcpu_id) - return 0; + goto no_yield; + /* yield to invalid */ tcpu = kvm_get_vcpu_by_id(vcpu->kvm, tid); - if (tcpu) - kvm_vcpu_yield_to(tcpu); + if (!tcpu) + goto no_yield; + + /* target already running */ + if (READ_ONCE(tcpu->cpu) >= 0) + goto no_yield; + + if (kvm_vcpu_yield_to(tcpu) <= 0) + goto no_yield; + + VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d: done", tid); + return 0; +no_yield: + VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d: ignored", tid); + vcpu->stat.diagnose_9c_ignored++; return 0; } diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index d1ccc168c071..165dea4c7f19 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1477,8 +1477,7 @@ static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) return 0; } -static int __inject_sigp_restart(struct kvm_vcpu *vcpu, - struct kvm_s390_irq *irq) +static int __inject_sigp_restart(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; @@ -2007,7 +2006,7 @@ static int do_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) rc = __inject_sigp_stop(vcpu, irq); break; case KVM_S390_RESTART: - rc = __inject_sigp_restart(vcpu, irq); + rc = __inject_sigp_restart(vcpu); break; case KVM_S390_INT_CLOCK_COMP: rc = __inject_ckc(vcpu); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index f6db0f1bc867..d9e6bf3d54f0 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -155,6 +155,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "instruction_diag_10", VCPU_STAT(diagnose_10) }, { "instruction_diag_44", VCPU_STAT(diagnose_44) }, { "instruction_diag_9c", VCPU_STAT(diagnose_9c) }, + { "diag_9c_ignored", VCPU_STAT(diagnose_9c_ignored) }, { "instruction_diag_258", VCPU_STAT(diagnose_258) }, { "instruction_diag_308", VCPU_STAT(diagnose_308) }, { "instruction_diag_500", VCPU_STAT(diagnose_500) }, @@ -332,7 +333,7 @@ static inline int plo_test_bit(unsigned char nr) return cc == 0; } -static inline void __insn32_query(unsigned int opcode, u8 query[32]) +static __always_inline void __insn32_query(unsigned int opcode, u8 *query) { register unsigned long r0 asm("0") = 0; /* query function */ register unsigned long r1 asm("1") = (unsigned long) query; @@ -340,9 +341,9 @@ static inline void __insn32_query(unsigned int opcode, u8 query[32]) asm volatile( /* Parameter regs are ignored */ " .insn rrf,%[opc] << 16,2,4,6,0\n" - : "=m" (*query) + : : "d" (r0), "a" (r1), [opc] "i" (opcode) - : "cc"); + : "cc", "memory"); } #define INSN_SORTL 0xb938 @@ -453,16 +454,14 @@ static void kvm_s390_cpu_feat_init(void) int kvm_arch_init(void *opaque) { - int rc; + int rc = -ENOMEM; kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long)); if (!kvm_s390_dbf) return -ENOMEM; - if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) { - rc = -ENOMEM; - goto out_debug_unreg; - } + if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) + goto out; kvm_s390_cpu_feat_init(); @@ -470,19 +469,17 @@ int kvm_arch_init(void *opaque) rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); if (rc) { pr_err("A FLIC registration call failed with rc=%d\n", rc); - goto out_debug_unreg; + goto out; } rc = kvm_s390_gib_init(GAL_ISC); if (rc) - goto out_gib_destroy; + goto out; return 0; -out_gib_destroy: - kvm_s390_gib_destroy(); -out_debug_unreg: - debug_unregister(kvm_s390_dbf); +out: + kvm_arch_exit(); return rc; } diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index 30a7c8c29964..ce1e4bbe53aa 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -74,7 +74,7 @@ static inline int arch_load_niai4(int *lock) { int owner; - asm volatile( + asm_inline volatile( ALTERNATIVE("", ".long 0xb2fa0040", 49) /* NIAI 4 */ " l %0,%1\n" : "=d" (owner) : "Q" (*lock) : "memory"); @@ -85,7 +85,7 @@ static inline int arch_cmpxchg_niai8(int *lock, int old, int new) { int expected = old; - asm volatile( + asm_inline volatile( ALTERNATIVE("", ".long 0xb2fa0080", 49) /* NIAI 8 */ " cs %0,%3,%1\n" : "=d" (old), "=Q" (*lock) diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 510a18299196..a51c892f14f3 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -298,16 +298,16 @@ static int cmm_timeout_handler(struct ctl_table *ctl, int write, } if (write) { - len = *lenp; - if (copy_from_user(buf, buffer, - len > sizeof(buf) ? sizeof(buf) : len)) + len = min(*lenp, sizeof(buf)); + if (copy_from_user(buf, buffer, len)) return -EFAULT; - buf[sizeof(buf) - 1] = '\0'; + buf[len - 1] = '\0'; cmm_skip_blanks(buf, &p); nr = simple_strtoul(p, &p, 0); cmm_skip_blanks(p, &p); seconds = simple_strtoul(p, &p, 0); cmm_set_timeout(nr, seconds); + *ppos += *lenp; } else { len = sprintf(buf, "%ld %ld\n", cmm_timeout_pages, cmm_timeout_seconds); @@ -315,9 +315,9 @@ static int cmm_timeout_handler(struct ctl_table *ctl, int write, len = *lenp; if (copy_to_user(buffer, buf, len)) return -EFAULT; + *lenp = len; + *ppos += len; } - *lenp = len; - *ppos += len; return 0; } diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index a124f19f7b3c..f0ce22220565 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -118,6 +118,7 @@ void __init paging_init(void) sparse_memory_present_with_active_regions(MAX_NUMNODES); sparse_init(); + zone_dma_bits = 31; memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS); max_zone_pfns[ZONE_NORMAL] = max_low_pfn; diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 1864a8bb9622..59ad7997fed1 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -70,7 +70,7 @@ void notrace s390_kernel_write(void *dst, const void *src, size_t size) spin_unlock_irqrestore(&s390_kernel_write_lock, flags); } -static int __memcpy_real(void *dest, void *src, size_t count) +static int __no_sanitize_address __memcpy_real(void *dest, void *src, size_t count) { register unsigned long _dest asm("2") = (unsigned long) dest; register unsigned long _len1 asm("3") = (unsigned long) count; @@ -91,19 +91,23 @@ static int __memcpy_real(void *dest, void *src, size_t count) return rc; } -static unsigned long _memcpy_real(unsigned long dest, unsigned long src, - unsigned long count) +static unsigned long __no_sanitize_address _memcpy_real(unsigned long dest, + unsigned long src, + unsigned long count) { int irqs_disabled, rc; unsigned long flags; if (!count) return 0; - flags = __arch_local_irq_stnsm(0xf8UL); + flags = arch_local_irq_save(); irqs_disabled = arch_irqs_disabled_flags(flags); if (!irqs_disabled) trace_hardirqs_off(); + __arch_local_irq_stnsm(0xf8); // disable DAT rc = __memcpy_real((void *) dest, (void *) src, (size_t) count); + if (flags & PSW_MASK_DAT) + __arch_local_irq_stosm(0x04); // enable DAT if (!irqs_disabled) trace_hardirqs_on(); __arch_local_irq_ssm(flags); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index ce88211b9c6c..8d2134136290 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -23,6 +23,8 @@ #include <linux/filter.h> #include <linux/init.h> #include <linux/bpf.h> +#include <linux/mm.h> +#include <linux/kernel.h> #include <asm/cacheflush.h> #include <asm/dis.h> #include <asm/facility.h> @@ -38,10 +40,11 @@ struct bpf_jit { int size; /* Size of program and literal pool */ int size_prg; /* Size of program */ int prg; /* Current position in program */ - int lit_start; /* Start of literal pool */ - int lit; /* Current position in literal pool */ + int lit32_start; /* Start of 32-bit literal pool */ + int lit32; /* Current position in 32-bit literal pool */ + int lit64_start; /* Start of 64-bit literal pool */ + int lit64; /* Current position in 64-bit literal pool */ int base_ip; /* Base address for literal pool */ - int ret0_ip; /* Address of return 0 */ int exit_ip; /* Address of exit */ int r1_thunk_ip; /* Address of expoline thunk for 'br %r1' */ int r14_thunk_ip; /* Address of expoline thunk for 'br %r14' */ @@ -49,14 +52,10 @@ struct bpf_jit { int labels[1]; /* Labels for local jumps */ }; -#define BPF_SIZE_MAX 0xffff /* Max size for program (16 bit branches) */ - -#define SEEN_MEM (1 << 0) /* use mem[] for temporary storage */ -#define SEEN_RET0 (1 << 1) /* ret0_ip points to a valid return 0 */ -#define SEEN_LITERAL (1 << 2) /* code uses literals */ -#define SEEN_FUNC (1 << 3) /* calls C functions */ -#define SEEN_TAIL_CALL (1 << 4) /* code uses tail calls */ -#define SEEN_REG_AX (1 << 5) /* code uses constant blinding */ +#define SEEN_MEM BIT(0) /* use mem[] for temporary storage */ +#define SEEN_LITERAL BIT(1) /* code uses literals */ +#define SEEN_FUNC BIT(2) /* calls C functions */ +#define SEEN_TAIL_CALL BIT(3) /* code uses tail calls */ #define SEEN_STACK (SEEN_FUNC | SEEN_MEM) /* @@ -131,13 +130,13 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) #define _EMIT2(op) \ ({ \ if (jit->prg_buf) \ - *(u16 *) (jit->prg_buf + jit->prg) = op; \ + *(u16 *) (jit->prg_buf + jit->prg) = (op); \ jit->prg += 2; \ }) #define EMIT2(op, b1, b2) \ ({ \ - _EMIT2(op | reg(b1, b2)); \ + _EMIT2((op) | reg(b1, b2)); \ REG_SET_SEEN(b1); \ REG_SET_SEEN(b2); \ }) @@ -145,20 +144,20 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) #define _EMIT4(op) \ ({ \ if (jit->prg_buf) \ - *(u32 *) (jit->prg_buf + jit->prg) = op; \ + *(u32 *) (jit->prg_buf + jit->prg) = (op); \ jit->prg += 4; \ }) #define EMIT4(op, b1, b2) \ ({ \ - _EMIT4(op | reg(b1, b2)); \ + _EMIT4((op) | reg(b1, b2)); \ REG_SET_SEEN(b1); \ REG_SET_SEEN(b2); \ }) #define EMIT4_RRF(op, b1, b2, b3) \ ({ \ - _EMIT4(op | reg_high(b3) << 8 | reg(b1, b2)); \ + _EMIT4((op) | reg_high(b3) << 8 | reg(b1, b2)); \ REG_SET_SEEN(b1); \ REG_SET_SEEN(b2); \ REG_SET_SEEN(b3); \ @@ -167,13 +166,13 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) #define _EMIT4_DISP(op, disp) \ ({ \ unsigned int __disp = (disp) & 0xfff; \ - _EMIT4(op | __disp); \ + _EMIT4((op) | __disp); \ }) #define EMIT4_DISP(op, b1, b2, disp) \ ({ \ - _EMIT4_DISP(op | reg_high(b1) << 16 | \ - reg_high(b2) << 8, disp); \ + _EMIT4_DISP((op) | reg_high(b1) << 16 | \ + reg_high(b2) << 8, (disp)); \ REG_SET_SEEN(b1); \ REG_SET_SEEN(b2); \ }) @@ -181,21 +180,27 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) #define EMIT4_IMM(op, b1, imm) \ ({ \ unsigned int __imm = (imm) & 0xffff; \ - _EMIT4(op | reg_high(b1) << 16 | __imm); \ + _EMIT4((op) | reg_high(b1) << 16 | __imm); \ REG_SET_SEEN(b1); \ }) #define EMIT4_PCREL(op, pcrel) \ ({ \ long __pcrel = ((pcrel) >> 1) & 0xffff; \ - _EMIT4(op | __pcrel); \ + _EMIT4((op) | __pcrel); \ +}) + +#define EMIT4_PCREL_RIC(op, mask, target) \ +({ \ + int __rel = ((target) - jit->prg) / 2; \ + _EMIT4((op) | (mask) << 20 | (__rel & 0xffff)); \ }) #define _EMIT6(op1, op2) \ ({ \ if (jit->prg_buf) { \ - *(u32 *) (jit->prg_buf + jit->prg) = op1; \ - *(u16 *) (jit->prg_buf + jit->prg + 4) = op2; \ + *(u32 *) (jit->prg_buf + jit->prg) = (op1); \ + *(u16 *) (jit->prg_buf + jit->prg + 4) = (op2); \ } \ jit->prg += 6; \ }) @@ -203,20 +208,20 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) #define _EMIT6_DISP(op1, op2, disp) \ ({ \ unsigned int __disp = (disp) & 0xfff; \ - _EMIT6(op1 | __disp, op2); \ + _EMIT6((op1) | __disp, op2); \ }) #define _EMIT6_DISP_LH(op1, op2, disp) \ ({ \ - u32 _disp = (u32) disp; \ + u32 _disp = (u32) (disp); \ unsigned int __disp_h = _disp & 0xff000; \ unsigned int __disp_l = _disp & 0x00fff; \ - _EMIT6(op1 | __disp_l, op2 | __disp_h >> 4); \ + _EMIT6((op1) | __disp_l, (op2) | __disp_h >> 4); \ }) #define EMIT6_DISP_LH(op1, op2, b1, b2, b3, disp) \ ({ \ - _EMIT6_DISP_LH(op1 | reg(b1, b2) << 16 | \ + _EMIT6_DISP_LH((op1) | reg(b1, b2) << 16 | \ reg_high(b3) << 8, op2, disp); \ REG_SET_SEEN(b1); \ REG_SET_SEEN(b2); \ @@ -226,8 +231,8 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) #define EMIT6_PCREL_LABEL(op1, op2, b1, b2, label, mask) \ ({ \ int rel = (jit->labels[label] - jit->prg) >> 1; \ - _EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff), \ - op2 | mask << 12); \ + _EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), \ + (op2) | (mask) << 12); \ REG_SET_SEEN(b1); \ REG_SET_SEEN(b2); \ }) @@ -235,68 +240,83 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) #define EMIT6_PCREL_IMM_LABEL(op1, op2, b1, imm, label, mask) \ ({ \ int rel = (jit->labels[label] - jit->prg) >> 1; \ - _EMIT6(op1 | (reg_high(b1) | mask) << 16 | \ - (rel & 0xffff), op2 | (imm & 0xff) << 8); \ + _EMIT6((op1) | (reg_high(b1) | (mask)) << 16 | \ + (rel & 0xffff), (op2) | ((imm) & 0xff) << 8); \ REG_SET_SEEN(b1); \ - BUILD_BUG_ON(((unsigned long) imm) > 0xff); \ + BUILD_BUG_ON(((unsigned long) (imm)) > 0xff); \ }) #define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask) \ ({ \ /* Branch instruction needs 6 bytes */ \ - int rel = (addrs[i + off + 1] - (addrs[i + 1] - 6)) / 2;\ - _EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff), op2 | mask); \ + int rel = (addrs[(i) + (off) + 1] - (addrs[(i) + 1] - 6)) / 2;\ + _EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), (op2) | (mask));\ REG_SET_SEEN(b1); \ REG_SET_SEEN(b2); \ }) #define EMIT6_PCREL_RILB(op, b, target) \ ({ \ - int rel = (target - jit->prg) / 2; \ - _EMIT6(op | reg_high(b) << 16 | rel >> 16, rel & 0xffff); \ + unsigned int rel = (int)((target) - jit->prg) / 2; \ + _EMIT6((op) | reg_high(b) << 16 | rel >> 16, rel & 0xffff);\ REG_SET_SEEN(b); \ }) #define EMIT6_PCREL_RIL(op, target) \ ({ \ - int rel = (target - jit->prg) / 2; \ - _EMIT6(op | rel >> 16, rel & 0xffff); \ + unsigned int rel = (int)((target) - jit->prg) / 2; \ + _EMIT6((op) | rel >> 16, rel & 0xffff); \ +}) + +#define EMIT6_PCREL_RILC(op, mask, target) \ +({ \ + EMIT6_PCREL_RIL((op) | (mask) << 20, (target)); \ }) #define _EMIT6_IMM(op, imm) \ ({ \ unsigned int __imm = (imm); \ - _EMIT6(op | (__imm >> 16), __imm & 0xffff); \ + _EMIT6((op) | (__imm >> 16), __imm & 0xffff); \ }) #define EMIT6_IMM(op, b1, imm) \ ({ \ - _EMIT6_IMM(op | reg_high(b1) << 16, imm); \ + _EMIT6_IMM((op) | reg_high(b1) << 16, imm); \ REG_SET_SEEN(b1); \ }) -#define EMIT_CONST_U32(val) \ +#define _EMIT_CONST_U32(val) \ ({ \ unsigned int ret; \ - ret = jit->lit - jit->base_ip; \ - jit->seen |= SEEN_LITERAL; \ + ret = jit->lit32; \ if (jit->prg_buf) \ - *(u32 *) (jit->prg_buf + jit->lit) = (u32) val; \ - jit->lit += 4; \ + *(u32 *)(jit->prg_buf + jit->lit32) = (u32)(val);\ + jit->lit32 += 4; \ ret; \ }) -#define EMIT_CONST_U64(val) \ +#define EMIT_CONST_U32(val) \ ({ \ - unsigned int ret; \ - ret = jit->lit - jit->base_ip; \ jit->seen |= SEEN_LITERAL; \ + _EMIT_CONST_U32(val) - jit->base_ip; \ +}) + +#define _EMIT_CONST_U64(val) \ +({ \ + unsigned int ret; \ + ret = jit->lit64; \ if (jit->prg_buf) \ - *(u64 *) (jit->prg_buf + jit->lit) = (u64) val; \ - jit->lit += 8; \ + *(u64 *)(jit->prg_buf + jit->lit64) = (u64)(val);\ + jit->lit64 += 8; \ ret; \ }) +#define EMIT_CONST_U64(val) \ +({ \ + jit->seen |= SEEN_LITERAL; \ + _EMIT_CONST_U64(val) - jit->base_ip; \ +}) + #define EMIT_ZERO(b1) \ ({ \ if (!fp->aux->verifier_zext) { \ @@ -307,6 +327,67 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) }) /* + * Return whether this is the first pass. The first pass is special, since we + * don't know any sizes yet, and thus must be conservative. + */ +static bool is_first_pass(struct bpf_jit *jit) +{ + return jit->size == 0; +} + +/* + * Return whether this is the code generation pass. The code generation pass is + * special, since we should change as little as possible. + */ +static bool is_codegen_pass(struct bpf_jit *jit) +{ + return jit->prg_buf; +} + +/* + * Return whether "rel" can be encoded as a short PC-relative offset + */ +static bool is_valid_rel(int rel) +{ + return rel >= -65536 && rel <= 65534; +} + +/* + * Return whether "off" can be reached using a short PC-relative offset + */ +static bool can_use_rel(struct bpf_jit *jit, int off) +{ + return is_valid_rel(off - jit->prg); +} + +/* + * Return whether given displacement can be encoded using + * Long-Displacement Facility + */ +static bool is_valid_ldisp(int disp) +{ + return disp >= -524288 && disp <= 524287; +} + +/* + * Return whether the next 32-bit literal pool entry can be referenced using + * Long-Displacement Facility + */ +static bool can_use_ldisp_for_lit32(struct bpf_jit *jit) +{ + return is_valid_ldisp(jit->lit32 - jit->base_ip); +} + +/* + * Return whether the next 64-bit literal pool entry can be referenced using + * Long-Displacement Facility + */ +static bool can_use_ldisp_for_lit64(struct bpf_jit *jit) +{ + return is_valid_ldisp(jit->lit64 - jit->base_ip); +} + +/* * Fill whole space with illegal instructions */ static void jit_fill_hole(void *area, unsigned int size) @@ -383,9 +464,18 @@ static int get_end(struct bpf_jit *jit, int start) */ static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth) { - + const int last = 15, save_restore_size = 6; int re = 6, rs; + if (is_first_pass(jit)) { + /* + * We don't know yet which registers are used. Reserve space + * conservatively. + */ + jit->prg += (last - re + 1) * save_restore_size; + return; + } + do { rs = get_start(jit, re); if (!rs) @@ -396,7 +486,7 @@ static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth) else restore_regs(jit, rs, re, stack_depth); re++; - } while (re <= 15); + } while (re <= last); } /* @@ -420,21 +510,28 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth) /* Save registers */ save_restore_regs(jit, REGS_SAVE, stack_depth); /* Setup literal pool */ - if (jit->seen & SEEN_LITERAL) { - /* basr %r13,0 */ - EMIT2(0x0d00, REG_L, REG_0); - jit->base_ip = jit->prg; + if (is_first_pass(jit) || (jit->seen & SEEN_LITERAL)) { + if (!is_first_pass(jit) && + is_valid_ldisp(jit->size - (jit->prg + 2))) { + /* basr %l,0 */ + EMIT2(0x0d00, REG_L, REG_0); + jit->base_ip = jit->prg; + } else { + /* larl %l,lit32_start */ + EMIT6_PCREL_RILB(0xc0000000, REG_L, jit->lit32_start); + jit->base_ip = jit->lit32_start; + } } /* Setup stack and backchain */ - if (jit->seen & SEEN_STACK) { - if (jit->seen & SEEN_FUNC) + if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) { + if (is_first_pass(jit) || (jit->seen & SEEN_FUNC)) /* lgr %w1,%r15 (backchain) */ EMIT4(0xb9040000, REG_W1, REG_15); /* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */ EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED); /* aghi %r15,-STK_OFF */ EMIT4_IMM(0xa70b0000, REG_15, -(STK_OFF + stack_depth)); - if (jit->seen & SEEN_FUNC) + if (is_first_pass(jit) || (jit->seen & SEEN_FUNC)) /* stg %w1,152(%r15) (backchain) */ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15, 152); @@ -446,12 +543,6 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth) */ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) { - /* Return 0 */ - if (jit->seen & SEEN_RET0) { - jit->ret0_ip = jit->prg; - /* lghi %b0,0 */ - EMIT4_IMM(0xa7090000, BPF_REG_0, 0); - } jit->exit_ip = jit->prg; /* Load exit code: lgr %r2,%b0 */ EMIT4(0xb9040000, REG_2, BPF_REG_0); @@ -476,7 +567,7 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) _EMIT2(0x07fe); if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable && - (jit->seen & SEEN_FUNC)) { + (is_first_pass(jit) || (jit->seen & SEEN_FUNC))) { jit->r1_thunk_ip = jit->prg; /* Generate __s390_indirect_jump_r1 thunk */ if (test_facility(35)) { @@ -506,16 +597,14 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i, bool extra_pass) { struct bpf_insn *insn = &fp->insnsi[i]; - int jmp_off, last, insn_count = 1; u32 dst_reg = insn->dst_reg; u32 src_reg = insn->src_reg; + int last, insn_count = 1; u32 *addrs = jit->addrs; s32 imm = insn->imm; s16 off = insn->off; unsigned int mask; - if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX) - jit->seen |= SEEN_REG_AX; switch (insn->code) { /* * BPF_MOV @@ -549,9 +638,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, u64 imm64; imm64 = (u64)(u32) insn[0].imm | ((u64)(u32) insn[1].imm) << 32; - /* lg %dst,<d(imm)>(%l) */ - EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, REG_0, REG_L, - EMIT_CONST_U64(imm64)); + /* lgrl %dst,imm */ + EMIT6_PCREL_RILB(0xc4080000, dst_reg, _EMIT_CONST_U64(imm64)); insn_count = 2; break; } @@ -680,9 +768,18 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT4_IMM(0xa7080000, REG_W0, 0); /* lr %w1,%dst */ EMIT2(0x1800, REG_W1, dst_reg); - /* dl %w0,<d(imm)>(%l) */ - EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L, - EMIT_CONST_U32(imm)); + if (!is_first_pass(jit) && can_use_ldisp_for_lit32(jit)) { + /* dl %w0,<d(imm)>(%l) */ + EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L, + EMIT_CONST_U32(imm)); + } else { + /* lgfrl %dst,imm */ + EMIT6_PCREL_RILB(0xc40c0000, dst_reg, + _EMIT_CONST_U32(imm)); + jit->seen |= SEEN_LITERAL; + /* dlr %w0,%dst */ + EMIT4(0xb9970000, REG_W0, dst_reg); + } /* llgfr %dst,%rc */ EMIT4(0xb9160000, dst_reg, rc_reg); if (insn_is_zext(&insn[1])) @@ -704,9 +801,18 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT4_IMM(0xa7090000, REG_W0, 0); /* lgr %w1,%dst */ EMIT4(0xb9040000, REG_W1, dst_reg); - /* dlg %w0,<d(imm)>(%l) */ - EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L, - EMIT_CONST_U64(imm)); + if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) { + /* dlg %w0,<d(imm)>(%l) */ + EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L, + EMIT_CONST_U64(imm)); + } else { + /* lgrl %dst,imm */ + EMIT6_PCREL_RILB(0xc4080000, dst_reg, + _EMIT_CONST_U64(imm)); + jit->seen |= SEEN_LITERAL; + /* dlgr %w0,%dst */ + EMIT4(0xb9870000, REG_W0, dst_reg); + } /* lgr %dst,%rc */ EMIT4(0xb9040000, dst_reg, rc_reg); break; @@ -729,9 +835,19 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT_ZERO(dst_reg); break; case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */ - /* ng %dst,<d(imm)>(%l) */ - EMIT6_DISP_LH(0xe3000000, 0x0080, dst_reg, REG_0, REG_L, - EMIT_CONST_U64(imm)); + if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) { + /* ng %dst,<d(imm)>(%l) */ + EMIT6_DISP_LH(0xe3000000, 0x0080, + dst_reg, REG_0, REG_L, + EMIT_CONST_U64(imm)); + } else { + /* lgrl %w0,imm */ + EMIT6_PCREL_RILB(0xc4080000, REG_W0, + _EMIT_CONST_U64(imm)); + jit->seen |= SEEN_LITERAL; + /* ngr %dst,%w0 */ + EMIT4(0xb9800000, dst_reg, REG_W0); + } break; /* * BPF_OR @@ -751,9 +867,19 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT_ZERO(dst_reg); break; case BPF_ALU64 | BPF_OR | BPF_K: /* dst = dst | imm */ - /* og %dst,<d(imm)>(%l) */ - EMIT6_DISP_LH(0xe3000000, 0x0081, dst_reg, REG_0, REG_L, - EMIT_CONST_U64(imm)); + if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) { + /* og %dst,<d(imm)>(%l) */ + EMIT6_DISP_LH(0xe3000000, 0x0081, + dst_reg, REG_0, REG_L, + EMIT_CONST_U64(imm)); + } else { + /* lgrl %w0,imm */ + EMIT6_PCREL_RILB(0xc4080000, REG_W0, + _EMIT_CONST_U64(imm)); + jit->seen |= SEEN_LITERAL; + /* ogr %dst,%w0 */ + EMIT4(0xb9810000, dst_reg, REG_W0); + } break; /* * BPF_XOR @@ -775,9 +901,19 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT_ZERO(dst_reg); break; case BPF_ALU64 | BPF_XOR | BPF_K: /* dst = dst ^ imm */ - /* xg %dst,<d(imm)>(%l) */ - EMIT6_DISP_LH(0xe3000000, 0x0082, dst_reg, REG_0, REG_L, - EMIT_CONST_U64(imm)); + if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) { + /* xg %dst,<d(imm)>(%l) */ + EMIT6_DISP_LH(0xe3000000, 0x0082, + dst_reg, REG_0, REG_L, + EMIT_CONST_U64(imm)); + } else { + /* lgrl %w0,imm */ + EMIT6_PCREL_RILB(0xc4080000, REG_W0, + _EMIT_CONST_U64(imm)); + jit->seen |= SEEN_LITERAL; + /* xgr %dst,%w0 */ + EMIT4(0xb9820000, dst_reg, REG_W0); + } break; /* * BPF_LSH @@ -1023,9 +1159,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, REG_SET_SEEN(BPF_REG_5); jit->seen |= SEEN_FUNC; - /* lg %w1,<d(imm)>(%l) */ - EMIT6_DISP_LH(0xe3000000, 0x0004, REG_W1, REG_0, REG_L, - EMIT_CONST_U64(func)); + /* lgrl %w1,func */ + EMIT6_PCREL_RILB(0xc4080000, REG_W1, _EMIT_CONST_U64(func)); if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable) { /* brasl %r14,__s390_indirect_jump_r1 */ EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip); @@ -1054,9 +1189,17 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, /* llgf %w1,map.max_entries(%b2) */ EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_2, offsetof(struct bpf_array, map.max_entries)); - /* clrj %b3,%w1,0xa,label0: if (u32)%b3 >= (u32)%w1 goto out */ - EMIT6_PCREL_LABEL(0xec000000, 0x0077, BPF_REG_3, - REG_W1, 0, 0xa); + /* if ((u32)%b3 >= (u32)%w1) goto out; */ + if (!is_first_pass(jit) && can_use_rel(jit, jit->labels[0])) { + /* clrj %b3,%w1,0xa,label0 */ + EMIT6_PCREL_LABEL(0xec000000, 0x0077, BPF_REG_3, + REG_W1, 0, 0xa); + } else { + /* clr %b3,%w1 */ + EMIT2(0x1500, BPF_REG_3, REG_W1); + /* brcl 0xa,label0 */ + EMIT6_PCREL_RILC(0xc0040000, 0xa, jit->labels[0]); + } /* * if (tail_call_cnt++ > MAX_TAIL_CALL_CNT) @@ -1071,9 +1214,16 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT4_IMM(0xa7080000, REG_W0, 1); /* laal %w1,%w0,off(%r15) */ EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W1, REG_W0, REG_15, off); - /* clij %w1,MAX_TAIL_CALL_CNT,0x2,label0 */ - EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007f, REG_W1, - MAX_TAIL_CALL_CNT, 0, 0x2); + if (!is_first_pass(jit) && can_use_rel(jit, jit->labels[0])) { + /* clij %w1,MAX_TAIL_CALL_CNT,0x2,label0 */ + EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007f, REG_W1, + MAX_TAIL_CALL_CNT, 0, 0x2); + } else { + /* clfi %w1,MAX_TAIL_CALL_CNT */ + EMIT6_IMM(0xc20f0000, REG_W1, MAX_TAIL_CALL_CNT); + /* brcl 0x2,label0 */ + EMIT6_PCREL_RILC(0xc0040000, 0x2, jit->labels[0]); + } /* * prog = array->ptrs[index]; @@ -1085,11 +1235,16 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT4(0xb9160000, REG_1, BPF_REG_3); /* sllg %r1,%r1,3: %r1 *= 8 */ EMIT6_DISP_LH(0xeb000000, 0x000d, REG_1, REG_1, REG_0, 3); - /* lg %r1,prog(%b2,%r1) */ - EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, BPF_REG_2, + /* ltg %r1,prog(%b2,%r1) */ + EMIT6_DISP_LH(0xe3000000, 0x0002, REG_1, BPF_REG_2, REG_1, offsetof(struct bpf_array, ptrs)); - /* clgij %r1,0,0x8,label0 */ - EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007d, REG_1, 0, 0, 0x8); + if (!is_first_pass(jit) && can_use_rel(jit, jit->labels[0])) { + /* brc 0x8,label0 */ + EMIT4_PCREL_RIC(0xa7040000, 0x8, jit->labels[0]); + } else { + /* brcl 0x8,label0 */ + EMIT6_PCREL_RILC(0xc0040000, 0x8, jit->labels[0]); + } /* * Restore registers before calling function @@ -1110,7 +1265,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, break; case BPF_JMP | BPF_EXIT: /* return b0 */ last = (i == fp->len - 1) ? 1 : 0; - if (last && !(jit->seen & SEEN_RET0)) + if (last) break; /* j <exit> */ EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg); @@ -1246,36 +1401,83 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, goto branch_oc; branch_ks: is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; - /* lgfi %w1,imm (load sign extend imm) */ - EMIT6_IMM(0xc0010000, REG_W1, imm); - /* crj or cgrj %dst,%w1,mask,off */ - EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064), - dst_reg, REG_W1, i, off, mask); + /* cfi or cgfi %dst,imm */ + EMIT6_IMM(is_jmp32 ? 0xc20d0000 : 0xc20c0000, + dst_reg, imm); + if (!is_first_pass(jit) && + can_use_rel(jit, addrs[i + off + 1])) { + /* brc mask,off */ + EMIT4_PCREL_RIC(0xa7040000, + mask >> 12, addrs[i + off + 1]); + } else { + /* brcl mask,off */ + EMIT6_PCREL_RILC(0xc0040000, + mask >> 12, addrs[i + off + 1]); + } break; branch_ku: is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; - /* lgfi %w1,imm (load sign extend imm) */ - EMIT6_IMM(0xc0010000, REG_W1, imm); - /* clrj or clgrj %dst,%w1,mask,off */ - EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065), - dst_reg, REG_W1, i, off, mask); + /* clfi or clgfi %dst,imm */ + EMIT6_IMM(is_jmp32 ? 0xc20f0000 : 0xc20e0000, + dst_reg, imm); + if (!is_first_pass(jit) && + can_use_rel(jit, addrs[i + off + 1])) { + /* brc mask,off */ + EMIT4_PCREL_RIC(0xa7040000, + mask >> 12, addrs[i + off + 1]); + } else { + /* brcl mask,off */ + EMIT6_PCREL_RILC(0xc0040000, + mask >> 12, addrs[i + off + 1]); + } break; branch_xs: is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; - /* crj or cgrj %dst,%src,mask,off */ - EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064), - dst_reg, src_reg, i, off, mask); + if (!is_first_pass(jit) && + can_use_rel(jit, addrs[i + off + 1])) { + /* crj or cgrj %dst,%src,mask,off */ + EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064), + dst_reg, src_reg, i, off, mask); + } else { + /* cr or cgr %dst,%src */ + if (is_jmp32) + EMIT2(0x1900, dst_reg, src_reg); + else + EMIT4(0xb9200000, dst_reg, src_reg); + /* brcl mask,off */ + EMIT6_PCREL_RILC(0xc0040000, + mask >> 12, addrs[i + off + 1]); + } break; branch_xu: is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; - /* clrj or clgrj %dst,%src,mask,off */ - EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065), - dst_reg, src_reg, i, off, mask); + if (!is_first_pass(jit) && + can_use_rel(jit, addrs[i + off + 1])) { + /* clrj or clgrj %dst,%src,mask,off */ + EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065), + dst_reg, src_reg, i, off, mask); + } else { + /* clr or clgr %dst,%src */ + if (is_jmp32) + EMIT2(0x1500, dst_reg, src_reg); + else + EMIT4(0xb9210000, dst_reg, src_reg); + /* brcl mask,off */ + EMIT6_PCREL_RILC(0xc0040000, + mask >> 12, addrs[i + off + 1]); + } break; branch_oc: - /* brc mask,jmp_off (branch instruction needs 4 bytes) */ - jmp_off = addrs[i + off + 1] - (addrs[i + 1] - 4); - EMIT4_PCREL(0xa7040000 | mask << 8, jmp_off); + if (!is_first_pass(jit) && + can_use_rel(jit, addrs[i + off + 1])) { + /* brc mask,off */ + EMIT4_PCREL_RIC(0xa7040000, + mask >> 12, addrs[i + off + 1]); + } else { + /* brcl mask,off */ + EMIT6_PCREL_RILC(0xc0040000, + mask >> 12, addrs[i + off + 1]); + } break; } default: /* too complex, give up */ @@ -1286,28 +1488,67 @@ branch_oc: } /* + * Return whether new i-th instruction address does not violate any invariant + */ +static bool bpf_is_new_addr_sane(struct bpf_jit *jit, int i) +{ + /* On the first pass anything goes */ + if (is_first_pass(jit)) + return true; + + /* The codegen pass must not change anything */ + if (is_codegen_pass(jit)) + return jit->addrs[i] == jit->prg; + + /* Passes in between must not increase code size */ + return jit->addrs[i] >= jit->prg; +} + +/* + * Update the address of i-th instruction + */ +static int bpf_set_addr(struct bpf_jit *jit, int i) +{ + if (!bpf_is_new_addr_sane(jit, i)) + return -1; + jit->addrs[i] = jit->prg; + return 0; +} + +/* * Compile eBPF program into s390x code */ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp, bool extra_pass) { - int i, insn_count; + int i, insn_count, lit32_size, lit64_size; - jit->lit = jit->lit_start; + jit->lit32 = jit->lit32_start; + jit->lit64 = jit->lit64_start; jit->prg = 0; bpf_jit_prologue(jit, fp->aux->stack_depth); + if (bpf_set_addr(jit, 0) < 0) + return -1; for (i = 0; i < fp->len; i += insn_count) { insn_count = bpf_jit_insn(jit, fp, i, extra_pass); if (insn_count < 0) return -1; /* Next instruction address */ - jit->addrs[i + insn_count] = jit->prg; + if (bpf_set_addr(jit, i + insn_count) < 0) + return -1; } bpf_jit_epilogue(jit, fp->aux->stack_depth); - jit->lit_start = jit->prg; - jit->size = jit->lit; + lit32_size = jit->lit32 - jit->lit32_start; + lit64_size = jit->lit64 - jit->lit64_start; + jit->lit32_start = jit->prg; + if (lit32_size) + jit->lit32_start = ALIGN(jit->lit32_start, 4); + jit->lit64_start = jit->lit32_start + lit32_size; + if (lit64_size) + jit->lit64_start = ALIGN(jit->lit64_start, 8); + jit->size = jit->lit64_start + lit64_size; jit->size_prg = jit->prg; return 0; } @@ -1369,7 +1610,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) } memset(&jit, 0, sizeof(jit)); - jit.addrs = kcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL); + jit.addrs = kvcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL); if (jit.addrs == NULL) { fp = orig_fp; goto out; @@ -1388,12 +1629,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) /* * Final pass: Allocate and generate program */ - if (jit.size >= BPF_SIZE_MAX) { - fp = orig_fp; - goto free_addrs; - } - - header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 2, jit_fill_hole); + header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 8, jit_fill_hole); if (!header) { fp = orig_fp; goto free_addrs; @@ -1422,7 +1658,7 @@ skip_init_ctx: if (!fp->is_func || extra_pass) { bpf_prog_fill_jited_linfo(fp, jit.addrs + 1); free_addrs: - kfree(jit.addrs); + kvfree(jit.addrs); kfree(jit_data); fp->aux->jit_data = NULL; } diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 9bdff4defef1..e585a62d6530 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -66,7 +66,7 @@ static inline int clp_get_ilp(unsigned long *ilp) /* * Call Logical Processor with c=0, the give constant lps and an lpcb request. */ -static inline int clp_req(void *data, unsigned int lps) +static __always_inline int clp_req(void *data, unsigned int lps) { struct { u8 _[CLP_BLK_SIZE]; } *req = data; u64 ignored; diff --git a/arch/sh/boards/mach-sdk7786/nmi.c b/arch/sh/boards/mach-sdk7786/nmi.c index c2e09d798537..afba49679a12 100644 --- a/arch/sh/boards/mach-sdk7786/nmi.c +++ b/arch/sh/boards/mach-sdk7786/nmi.c @@ -37,7 +37,7 @@ static int __init nmi_mode_setup(char *str) nmi_mode = NMI_MODE_ANY; else { nmi_mode = NMI_MODE_UNKNOWN; - pr_warning("Unknown NMI mode %s\n", str); + pr_warn("Unknown NMI mode %s\n", str); } printk("Set NMI mode to %d\n", nmi_mode); diff --git a/arch/sh/drivers/pci/fixups-sdk7786.c b/arch/sh/drivers/pci/fixups-sdk7786.c index 8cbfa5310a4b..6972af7b4e93 100644 --- a/arch/sh/drivers/pci/fixups-sdk7786.c +++ b/arch/sh/drivers/pci/fixups-sdk7786.c @@ -53,7 +53,7 @@ static int __init sdk7786_pci_init(void) /* Warn about forced rerouting if slot#3 is occupied */ if ((data & PCIECR_PRST3) == 0) { - pr_warning("Unreachable card detected in slot#3\n"); + pr_warn("Unreachable card detected in slot#3\n"); return -EBUSY; } } else diff --git a/arch/sh/kernel/io_trapped.c b/arch/sh/kernel/io_trapped.c index bacad6da4fe4..60c828a2b8a2 100644 --- a/arch/sh/kernel/io_trapped.c +++ b/arch/sh/kernel/io_trapped.c @@ -99,7 +99,7 @@ int register_trapped_io(struct trapped_io *tiop) return 0; bad: - pr_warning("unable to install trapped io filter\n"); + pr_warn("unable to install trapped io filter\n"); return -1; } EXPORT_SYMBOL_GPL(register_trapped_io); diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index 2c0e0f37a318..6ef341f6cfee 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -354,7 +354,7 @@ void __init setup_arch(char **cmdline_p) /* processor boot mode configuration */ int generic_mode_pins(void) { - pr_warning("generic_mode_pins(): missing mode pin configuration\n"); + pr_warn("generic_mode_pins(): missing mode pin configuration\n"); return 0; } diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c index 792f36129062..3169a343a5ab 100644 --- a/arch/sh/mm/consistent.c +++ b/arch/sh/mm/consistent.c @@ -43,8 +43,7 @@ int __init platform_resource_setup_memory(struct platform_device *pdev, r = pdev->resource + pdev->num_resources - 1; if (r->flags) { - pr_warning("%s: unable to find empty space for resource\n", - name); + pr_warn("%s: unable to find empty space for resource\n", name); return -EINVAL; } @@ -54,7 +53,7 @@ int __init platform_resource_setup_memory(struct platform_device *pdev, buf = dma_alloc_coherent(&pdev->dev, memsize, &dma_handle, GFP_KERNEL); if (!buf) { - pr_warning("%s: unable to allocate memory\n", name); + pr_warn("%s: unable to allocate memory\n", name); return -ENOMEM; } diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index fbc1aecf0f94..eb24cb1afc11 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -29,7 +29,6 @@ config SPARC select RTC_DRV_M48T59 select RTC_SYSTOHC select HAVE_ARCH_JUMP_LABEL if SPARC64 - select HAVE_FAST_GUP if SPARC64 select GENERIC_IRQ_SHOW select ARCH_WANT_IPC_PARSE_VERSION select GENERIC_PCI_IOMAP diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c index 7b946b3dee9d..0f5a501c95a9 100644 --- a/arch/sparc/crypto/aes_glue.c +++ b/arch/sparc/crypto/aes_glue.c @@ -24,6 +24,7 @@ #include <linux/types.h> #include <crypto/algapi.h> #include <crypto/aes.h> +#include <crypto/internal/skcipher.h> #include <asm/fpumacro.h> #include <asm/pstate.h> @@ -197,6 +198,12 @@ static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, return 0; } +static int aes_set_key_skcipher(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) +{ + return aes_set_key(crypto_skcipher_tfm(tfm), in_key, key_len); +} + static void crypto_aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm); @@ -211,131 +218,108 @@ static void crypto_aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) ctx->ops->decrypt(&ctx->key[0], (const u32 *) src, (u32 *) dst); } -#define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1)) - -static int ecb_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct crypto_sparc64_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + err = skcipher_walk_virt(&walk, req, true); + if (err) + return err; ctx->ops->load_encrypt_keys(&ctx->key[0]); - while ((nbytes = walk.nbytes)) { - unsigned int block_len = nbytes & AES_BLOCK_MASK; - - if (likely(block_len)) { - ctx->ops->ecb_encrypt(&ctx->key[0], - (const u64 *)walk.src.virt.addr, - (u64 *) walk.dst.virt.addr, - block_len); - } - nbytes &= AES_BLOCK_SIZE - 1; - err = blkcipher_walk_done(desc, &walk, nbytes); + while ((nbytes = walk.nbytes) != 0) { + ctx->ops->ecb_encrypt(&ctx->key[0], walk.src.virt.addr, + walk.dst.virt.addr, + round_down(nbytes, AES_BLOCK_SIZE)); + err = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE); } fprs_write(0); return err; } -static int ecb_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; - u64 *key_end; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct crypto_sparc64_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + const u64 *key_end; + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + err = skcipher_walk_virt(&walk, req, true); + if (err) + return err; ctx->ops->load_decrypt_keys(&ctx->key[0]); key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)]; - while ((nbytes = walk.nbytes)) { - unsigned int block_len = nbytes & AES_BLOCK_MASK; - - if (likely(block_len)) { - ctx->ops->ecb_decrypt(key_end, - (const u64 *) walk.src.virt.addr, - (u64 *) walk.dst.virt.addr, block_len); - } - nbytes &= AES_BLOCK_SIZE - 1; - err = blkcipher_walk_done(desc, &walk, nbytes); + while ((nbytes = walk.nbytes) != 0) { + ctx->ops->ecb_decrypt(key_end, walk.src.virt.addr, + walk.dst.virt.addr, + round_down(nbytes, AES_BLOCK_SIZE)); + err = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE); } fprs_write(0); return err; } -static int cbc_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct crypto_sparc64_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + err = skcipher_walk_virt(&walk, req, true); + if (err) + return err; ctx->ops->load_encrypt_keys(&ctx->key[0]); - while ((nbytes = walk.nbytes)) { - unsigned int block_len = nbytes & AES_BLOCK_MASK; - - if (likely(block_len)) { - ctx->ops->cbc_encrypt(&ctx->key[0], - (const u64 *)walk.src.virt.addr, - (u64 *) walk.dst.virt.addr, - block_len, (u64 *) walk.iv); - } - nbytes &= AES_BLOCK_SIZE - 1; - err = blkcipher_walk_done(desc, &walk, nbytes); + while ((nbytes = walk.nbytes) != 0) { + ctx->ops->cbc_encrypt(&ctx->key[0], walk.src.virt.addr, + walk.dst.virt.addr, + round_down(nbytes, AES_BLOCK_SIZE), + walk.iv); + err = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE); } fprs_write(0); return err; } -static int cbc_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; - u64 *key_end; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct crypto_sparc64_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + const u64 *key_end; + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + err = skcipher_walk_virt(&walk, req, true); + if (err) + return err; ctx->ops->load_decrypt_keys(&ctx->key[0]); key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)]; - while ((nbytes = walk.nbytes)) { - unsigned int block_len = nbytes & AES_BLOCK_MASK; - - if (likely(block_len)) { - ctx->ops->cbc_decrypt(key_end, - (const u64 *) walk.src.virt.addr, - (u64 *) walk.dst.virt.addr, - block_len, (u64 *) walk.iv); - } - nbytes &= AES_BLOCK_SIZE - 1; - err = blkcipher_walk_done(desc, &walk, nbytes); + while ((nbytes = walk.nbytes) != 0) { + ctx->ops->cbc_decrypt(key_end, walk.src.virt.addr, + walk.dst.virt.addr, + round_down(nbytes, AES_BLOCK_SIZE), + walk.iv); + err = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE); } fprs_write(0); return err; } -static void ctr_crypt_final(struct crypto_sparc64_aes_ctx *ctx, - struct blkcipher_walk *walk) +static void ctr_crypt_final(const struct crypto_sparc64_aes_ctx *ctx, + struct skcipher_walk *walk) { u8 *ctrblk = walk->iv; u64 keystream[AES_BLOCK_SIZE / sizeof(u64)]; @@ -349,40 +333,35 @@ static void ctr_crypt_final(struct crypto_sparc64_aes_ctx *ctx, crypto_inc(ctrblk, AES_BLOCK_SIZE); } -static int ctr_crypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int ctr_crypt(struct skcipher_request *req) { - struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct crypto_sparc64_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + err = skcipher_walk_virt(&walk, req, true); + if (err) + return err; ctx->ops->load_encrypt_keys(&ctx->key[0]); while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { - unsigned int block_len = nbytes & AES_BLOCK_MASK; - - if (likely(block_len)) { - ctx->ops->ctr_crypt(&ctx->key[0], - (const u64 *)walk.src.virt.addr, - (u64 *) walk.dst.virt.addr, - block_len, (u64 *) walk.iv); - } - nbytes &= AES_BLOCK_SIZE - 1; - err = blkcipher_walk_done(desc, &walk, nbytes); + ctx->ops->ctr_crypt(&ctx->key[0], walk.src.virt.addr, + walk.dst.virt.addr, + round_down(nbytes, AES_BLOCK_SIZE), + walk.iv); + err = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE); } if (walk.nbytes) { ctr_crypt_final(ctx, &walk); - err = blkcipher_walk_done(desc, &walk, 0); + err = skcipher_walk_done(&walk, 0); } fprs_write(0); return err; } -static struct crypto_alg algs[] = { { +static struct crypto_alg cipher_alg = { .cra_name = "aes", .cra_driver_name = "aes-sparc64", .cra_priority = SPARC_CR_OPCODE_PRIORITY, @@ -400,66 +379,53 @@ static struct crypto_alg algs[] = { { .cia_decrypt = crypto_aes_decrypt } } -}, { - .cra_name = "ecb(aes)", - .cra_driver_name = "ecb-aes-sparc64", - .cra_priority = SPARC_CR_OPCODE_PRIORITY, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .setkey = aes_set_key, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "cbc(aes)", - .cra_driver_name = "cbc-aes-sparc64", - .cra_priority = SPARC_CR_OPCODE_PRIORITY, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = aes_set_key, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "ctr(aes)", - .cra_driver_name = "ctr-aes-sparc64", - .cra_priority = SPARC_CR_OPCODE_PRIORITY, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = aes_set_key, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -} }; +}; + +static struct skcipher_alg skcipher_algs[] = { + { + .base.cra_name = "ecb(aes)", + .base.cra_driver_name = "ecb-aes-sparc64", + .base.cra_priority = SPARC_CR_OPCODE_PRIORITY, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx), + .base.cra_alignmask = 7, + .base.cra_module = THIS_MODULE, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = aes_set_key_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "cbc(aes)", + .base.cra_driver_name = "cbc-aes-sparc64", + .base.cra_priority = SPARC_CR_OPCODE_PRIORITY, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx), + .base.cra_alignmask = 7, + .base.cra_module = THIS_MODULE, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aes_set_key_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "ctr(aes)", + .base.cra_driver_name = "ctr-aes-sparc64", + .base.cra_priority = SPARC_CR_OPCODE_PRIORITY, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx), + .base.cra_alignmask = 7, + .base.cra_module = THIS_MODULE, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aes_set_key_skcipher, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + .chunksize = AES_BLOCK_SIZE, + } +}; static bool __init sparc64_has_aes_opcode(void) { @@ -477,17 +443,27 @@ static bool __init sparc64_has_aes_opcode(void) static int __init aes_sparc64_mod_init(void) { - if (sparc64_has_aes_opcode()) { - pr_info("Using sparc64 aes opcodes optimized AES implementation\n"); - return crypto_register_algs(algs, ARRAY_SIZE(algs)); + int err; + + if (!sparc64_has_aes_opcode()) { + pr_info("sparc64 aes opcodes not available.\n"); + return -ENODEV; } - pr_info("sparc64 aes opcodes not available.\n"); - return -ENODEV; + pr_info("Using sparc64 aes opcodes optimized AES implementation\n"); + err = crypto_register_alg(&cipher_alg); + if (err) + return err; + err = crypto_register_skciphers(skcipher_algs, + ARRAY_SIZE(skcipher_algs)); + if (err) + crypto_unregister_alg(&cipher_alg); + return err; } static void __exit aes_sparc64_mod_fini(void) { - crypto_unregister_algs(algs, ARRAY_SIZE(algs)); + crypto_unregister_alg(&cipher_alg); + crypto_unregister_skciphers(skcipher_algs, ARRAY_SIZE(skcipher_algs)); } module_init(aes_sparc64_mod_init); diff --git a/arch/sparc/crypto/camellia_glue.c b/arch/sparc/crypto/camellia_glue.c index 3823f9491a72..1700f863748c 100644 --- a/arch/sparc/crypto/camellia_glue.c +++ b/arch/sparc/crypto/camellia_glue.c @@ -12,6 +12,7 @@ #include <linux/mm.h> #include <linux/types.h> #include <crypto/algapi.h> +#include <crypto/internal/skcipher.h> #include <asm/fpumacro.h> #include <asm/pstate.h> @@ -52,6 +53,12 @@ static int camellia_set_key(struct crypto_tfm *tfm, const u8 *_in_key, return 0; } +static int camellia_set_key_skcipher(struct crypto_skcipher *tfm, + const u8 *in_key, unsigned int key_len) +{ + return camellia_set_key(crypto_skcipher_tfm(tfm), in_key, key_len); +} + extern void camellia_sparc64_crypt(const u64 *key, const u32 *input, u32 *output, unsigned int key_len); @@ -81,61 +88,46 @@ typedef void ecb_crypt_op(const u64 *input, u64 *output, unsigned int len, extern ecb_crypt_op camellia_sparc64_ecb_crypt_3_grand_rounds; extern ecb_crypt_op camellia_sparc64_ecb_crypt_4_grand_rounds; -#define CAMELLIA_BLOCK_MASK (~(CAMELLIA_BLOCK_SIZE - 1)) - -static int __ecb_crypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes, bool encrypt) +static int __ecb_crypt(struct skcipher_request *req, bool encrypt) { - struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct camellia_sparc64_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; ecb_crypt_op *op; const u64 *key; + unsigned int nbytes; int err; op = camellia_sparc64_ecb_crypt_3_grand_rounds; if (ctx->key_len != 16) op = camellia_sparc64_ecb_crypt_4_grand_rounds; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + err = skcipher_walk_virt(&walk, req, true); + if (err) + return err; if (encrypt) key = &ctx->encrypt_key[0]; else key = &ctx->decrypt_key[0]; camellia_sparc64_load_keys(key, ctx->key_len); - while ((nbytes = walk.nbytes)) { - unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK; - - if (likely(block_len)) { - const u64 *src64; - u64 *dst64; - - src64 = (const u64 *)walk.src.virt.addr; - dst64 = (u64 *) walk.dst.virt.addr; - op(src64, dst64, block_len, key); - } - nbytes &= CAMELLIA_BLOCK_SIZE - 1; - err = blkcipher_walk_done(desc, &walk, nbytes); + while ((nbytes = walk.nbytes) != 0) { + op(walk.src.virt.addr, walk.dst.virt.addr, + round_down(nbytes, CAMELLIA_BLOCK_SIZE), key); + err = skcipher_walk_done(&walk, nbytes % CAMELLIA_BLOCK_SIZE); } fprs_write(0); return err; } -static int ecb_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - return __ecb_crypt(desc, dst, src, nbytes, true); + return __ecb_crypt(req, true); } -static int ecb_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - return __ecb_crypt(desc, dst, src, nbytes, false); + return __ecb_crypt(req, false); } typedef void cbc_crypt_op(const u64 *input, u64 *output, unsigned int len, @@ -146,85 +138,65 @@ extern cbc_crypt_op camellia_sparc64_cbc_encrypt_4_grand_rounds; extern cbc_crypt_op camellia_sparc64_cbc_decrypt_3_grand_rounds; extern cbc_crypt_op camellia_sparc64_cbc_decrypt_4_grand_rounds; -static int cbc_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct camellia_sparc64_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; cbc_crypt_op *op; const u64 *key; + unsigned int nbytes; int err; op = camellia_sparc64_cbc_encrypt_3_grand_rounds; if (ctx->key_len != 16) op = camellia_sparc64_cbc_encrypt_4_grand_rounds; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + err = skcipher_walk_virt(&walk, req, true); + if (err) + return err; key = &ctx->encrypt_key[0]; camellia_sparc64_load_keys(key, ctx->key_len); - while ((nbytes = walk.nbytes)) { - unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK; - - if (likely(block_len)) { - const u64 *src64; - u64 *dst64; - - src64 = (const u64 *)walk.src.virt.addr; - dst64 = (u64 *) walk.dst.virt.addr; - op(src64, dst64, block_len, key, - (u64 *) walk.iv); - } - nbytes &= CAMELLIA_BLOCK_SIZE - 1; - err = blkcipher_walk_done(desc, &walk, nbytes); + while ((nbytes = walk.nbytes) != 0) { + op(walk.src.virt.addr, walk.dst.virt.addr, + round_down(nbytes, CAMELLIA_BLOCK_SIZE), key, walk.iv); + err = skcipher_walk_done(&walk, nbytes % CAMELLIA_BLOCK_SIZE); } fprs_write(0); return err; } -static int cbc_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct camellia_sparc64_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; cbc_crypt_op *op; const u64 *key; + unsigned int nbytes; int err; op = camellia_sparc64_cbc_decrypt_3_grand_rounds; if (ctx->key_len != 16) op = camellia_sparc64_cbc_decrypt_4_grand_rounds; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + err = skcipher_walk_virt(&walk, req, true); + if (err) + return err; key = &ctx->decrypt_key[0]; camellia_sparc64_load_keys(key, ctx->key_len); - while ((nbytes = walk.nbytes)) { - unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK; - - if (likely(block_len)) { - const u64 *src64; - u64 *dst64; - - src64 = (const u64 *)walk.src.virt.addr; - dst64 = (u64 *) walk.dst.virt.addr; - op(src64, dst64, block_len, key, - (u64 *) walk.iv); - } - nbytes &= CAMELLIA_BLOCK_SIZE - 1; - err = blkcipher_walk_done(desc, &walk, nbytes); + while ((nbytes = walk.nbytes) != 0) { + op(walk.src.virt.addr, walk.dst.virt.addr, + round_down(nbytes, CAMELLIA_BLOCK_SIZE), key, walk.iv); + err = skcipher_walk_done(&walk, nbytes % CAMELLIA_BLOCK_SIZE); } fprs_write(0); return err; } -static struct crypto_alg algs[] = { { +static struct crypto_alg cipher_alg = { .cra_name = "camellia", .cra_driver_name = "camellia-sparc64", .cra_priority = SPARC_CR_OPCODE_PRIORITY, @@ -242,46 +214,37 @@ static struct crypto_alg algs[] = { { .cia_decrypt = camellia_decrypt } } -}, { - .cra_name = "ecb(camellia)", - .cra_driver_name = "ecb-camellia-sparc64", - .cra_priority = SPARC_CR_OPCODE_PRIORITY, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_sparc64_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .setkey = camellia_set_key, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "cbc(camellia)", - .cra_driver_name = "cbc-camellia-sparc64", - .cra_priority = SPARC_CR_OPCODE_PRIORITY, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_sparc64_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = camellia_set_key, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -} +}; + +static struct skcipher_alg skcipher_algs[] = { + { + .base.cra_name = "ecb(camellia)", + .base.cra_driver_name = "ecb-camellia-sparc64", + .base.cra_priority = SPARC_CR_OPCODE_PRIORITY, + .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct camellia_sparc64_ctx), + .base.cra_alignmask = 7, + .base.cra_module = THIS_MODULE, + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .setkey = camellia_set_key_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "cbc(camellia)", + .base.cra_driver_name = "cbc-camellia-sparc64", + .base.cra_priority = SPARC_CR_OPCODE_PRIORITY, + .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct camellia_sparc64_ctx), + .base.cra_alignmask = 7, + .base.cra_module = THIS_MODULE, + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, + .setkey = camellia_set_key_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + } }; static bool __init sparc64_has_camellia_opcode(void) @@ -300,17 +263,27 @@ static bool __init sparc64_has_camellia_opcode(void) static int __init camellia_sparc64_mod_init(void) { - if (sparc64_has_camellia_opcode()) { - pr_info("Using sparc64 camellia opcodes optimized CAMELLIA implementation\n"); - return crypto_register_algs(algs, ARRAY_SIZE(algs)); + int err; + + if (!sparc64_has_camellia_opcode()) { + pr_info("sparc64 camellia opcodes not available.\n"); + return -ENODEV; } - pr_info("sparc64 camellia opcodes not available.\n"); - return -ENODEV; + pr_info("Using sparc64 camellia opcodes optimized CAMELLIA implementation\n"); + err = crypto_register_alg(&cipher_alg); + if (err) + return err; + err = crypto_register_skciphers(skcipher_algs, + ARRAY_SIZE(skcipher_algs)); + if (err) + crypto_unregister_alg(&cipher_alg); + return err; } static void __exit camellia_sparc64_mod_fini(void) { - crypto_unregister_algs(algs, ARRAY_SIZE(algs)); + crypto_unregister_alg(&cipher_alg); + crypto_unregister_skciphers(skcipher_algs, ARRAY_SIZE(skcipher_algs)); } module_init(camellia_sparc64_mod_init); diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c index db6010b4e52e..a499102bf706 100644 --- a/arch/sparc/crypto/des_glue.c +++ b/arch/sparc/crypto/des_glue.c @@ -13,6 +13,7 @@ #include <linux/types.h> #include <crypto/algapi.h> #include <crypto/internal/des.h> +#include <crypto/internal/skcipher.h> #include <asm/fpumacro.h> #include <asm/pstate.h> @@ -61,6 +62,12 @@ static int des_set_key(struct crypto_tfm *tfm, const u8 *key, return 0; } +static int des_set_key_skcipher(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) +{ + return des_set_key(crypto_skcipher_tfm(tfm), key, keylen); +} + extern void des_sparc64_crypt(const u64 *key, const u64 *input, u64 *output); @@ -85,113 +92,90 @@ extern void des_sparc64_load_keys(const u64 *key); extern void des_sparc64_ecb_crypt(const u64 *input, u64 *output, unsigned int len); -#define DES_BLOCK_MASK (~(DES_BLOCK_SIZE - 1)) - -static int __ecb_crypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes, bool encrypt) +static int __ecb_crypt(struct skcipher_request *req, bool encrypt) { - struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct des_sparc64_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + err = skcipher_walk_virt(&walk, req, true); + if (err) + return err; if (encrypt) des_sparc64_load_keys(&ctx->encrypt_expkey[0]); else des_sparc64_load_keys(&ctx->decrypt_expkey[0]); - while ((nbytes = walk.nbytes)) { - unsigned int block_len = nbytes & DES_BLOCK_MASK; - - if (likely(block_len)) { - des_sparc64_ecb_crypt((const u64 *)walk.src.virt.addr, - (u64 *) walk.dst.virt.addr, - block_len); - } - nbytes &= DES_BLOCK_SIZE - 1; - err = blkcipher_walk_done(desc, &walk, nbytes); + while ((nbytes = walk.nbytes) != 0) { + des_sparc64_ecb_crypt(walk.src.virt.addr, walk.dst.virt.addr, + round_down(nbytes, DES_BLOCK_SIZE)); + err = skcipher_walk_done(&walk, nbytes % DES_BLOCK_SIZE); } fprs_write(0); return err; } -static int ecb_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - return __ecb_crypt(desc, dst, src, nbytes, true); + return __ecb_crypt(req, true); } -static int ecb_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - return __ecb_crypt(desc, dst, src, nbytes, false); + return __ecb_crypt(req, false); } extern void des_sparc64_cbc_encrypt(const u64 *input, u64 *output, unsigned int len, u64 *iv); -static int cbc_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +extern void des_sparc64_cbc_decrypt(const u64 *input, u64 *output, + unsigned int len, u64 *iv); + +static int __cbc_crypt(struct skcipher_request *req, bool encrypt) { - struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct des_sparc64_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - - des_sparc64_load_keys(&ctx->encrypt_expkey[0]); - while ((nbytes = walk.nbytes)) { - unsigned int block_len = nbytes & DES_BLOCK_MASK; + err = skcipher_walk_virt(&walk, req, true); + if (err) + return err; - if (likely(block_len)) { - des_sparc64_cbc_encrypt((const u64 *)walk.src.virt.addr, - (u64 *) walk.dst.virt.addr, - block_len, (u64 *) walk.iv); - } - nbytes &= DES_BLOCK_SIZE - 1; - err = blkcipher_walk_done(desc, &walk, nbytes); + if (encrypt) + des_sparc64_load_keys(&ctx->encrypt_expkey[0]); + else + des_sparc64_load_keys(&ctx->decrypt_expkey[0]); + while ((nbytes = walk.nbytes) != 0) { + if (encrypt) + des_sparc64_cbc_encrypt(walk.src.virt.addr, + walk.dst.virt.addr, + round_down(nbytes, + DES_BLOCK_SIZE), + walk.iv); + else + des_sparc64_cbc_decrypt(walk.src.virt.addr, + walk.dst.virt.addr, + round_down(nbytes, + DES_BLOCK_SIZE), + walk.iv); + err = skcipher_walk_done(&walk, nbytes % DES_BLOCK_SIZE); } fprs_write(0); return err; } -extern void des_sparc64_cbc_decrypt(const u64 *input, u64 *output, - unsigned int len, u64 *iv); - -static int cbc_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - - des_sparc64_load_keys(&ctx->decrypt_expkey[0]); - while ((nbytes = walk.nbytes)) { - unsigned int block_len = nbytes & DES_BLOCK_MASK; + return __cbc_crypt(req, true); +} - if (likely(block_len)) { - des_sparc64_cbc_decrypt((const u64 *)walk.src.virt.addr, - (u64 *) walk.dst.virt.addr, - block_len, (u64 *) walk.iv); - } - nbytes &= DES_BLOCK_SIZE - 1; - err = blkcipher_walk_done(desc, &walk, nbytes); - } - fprs_write(0); - return err; +static int cbc_decrypt(struct skcipher_request *req) +{ + return __cbc_crypt(req, false); } static int des3_ede_set_key(struct crypto_tfm *tfm, const u8 *key, @@ -227,6 +211,12 @@ static int des3_ede_set_key(struct crypto_tfm *tfm, const u8 *key, return 0; } +static int des3_ede_set_key_skcipher(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) +{ + return des3_ede_set_key(crypto_skcipher_tfm(tfm), key, keylen); +} + extern void des3_ede_sparc64_crypt(const u64 *key, const u64 *input, u64 *output); @@ -251,241 +241,196 @@ extern void des3_ede_sparc64_load_keys(const u64 *key); extern void des3_ede_sparc64_ecb_crypt(const u64 *expkey, const u64 *input, u64 *output, unsigned int len); -static int __ecb3_crypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes, bool encrypt) +static int __ecb3_crypt(struct skcipher_request *req, bool encrypt) { - struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct des3_ede_sparc64_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; const u64 *K; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + err = skcipher_walk_virt(&walk, req, true); + if (err) + return err; if (encrypt) K = &ctx->encrypt_expkey[0]; else K = &ctx->decrypt_expkey[0]; des3_ede_sparc64_load_keys(K); - while ((nbytes = walk.nbytes)) { - unsigned int block_len = nbytes & DES_BLOCK_MASK; - - if (likely(block_len)) { - const u64 *src64 = (const u64 *)walk.src.virt.addr; - des3_ede_sparc64_ecb_crypt(K, src64, - (u64 *) walk.dst.virt.addr, - block_len); - } - nbytes &= DES_BLOCK_SIZE - 1; - err = blkcipher_walk_done(desc, &walk, nbytes); + while ((nbytes = walk.nbytes) != 0) { + des3_ede_sparc64_ecb_crypt(K, walk.src.virt.addr, + walk.dst.virt.addr, + round_down(nbytes, DES_BLOCK_SIZE)); + err = skcipher_walk_done(&walk, nbytes % DES_BLOCK_SIZE); } fprs_write(0); return err; } -static int ecb3_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int ecb3_encrypt(struct skcipher_request *req) { - return __ecb3_crypt(desc, dst, src, nbytes, true); + return __ecb3_crypt(req, true); } -static int ecb3_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int ecb3_decrypt(struct skcipher_request *req) { - return __ecb3_crypt(desc, dst, src, nbytes, false); + return __ecb3_crypt(req, false); } extern void des3_ede_sparc64_cbc_encrypt(const u64 *expkey, const u64 *input, u64 *output, unsigned int len, u64 *iv); -static int cbc3_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) -{ - struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; - const u64 *K; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - - K = &ctx->encrypt_expkey[0]; - des3_ede_sparc64_load_keys(K); - while ((nbytes = walk.nbytes)) { - unsigned int block_len = nbytes & DES_BLOCK_MASK; - - if (likely(block_len)) { - const u64 *src64 = (const u64 *)walk.src.virt.addr; - des3_ede_sparc64_cbc_encrypt(K, src64, - (u64 *) walk.dst.virt.addr, - block_len, - (u64 *) walk.iv); - } - nbytes &= DES_BLOCK_SIZE - 1; - err = blkcipher_walk_done(desc, &walk, nbytes); - } - fprs_write(0); - return err; -} - extern void des3_ede_sparc64_cbc_decrypt(const u64 *expkey, const u64 *input, u64 *output, unsigned int len, u64 *iv); -static int cbc3_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int __cbc3_crypt(struct skcipher_request *req, bool encrypt) { - struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct des3_ede_sparc64_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; const u64 *K; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + err = skcipher_walk_virt(&walk, req, true); + if (err) + return err; - K = &ctx->decrypt_expkey[0]; + if (encrypt) + K = &ctx->encrypt_expkey[0]; + else + K = &ctx->decrypt_expkey[0]; des3_ede_sparc64_load_keys(K); - while ((nbytes = walk.nbytes)) { - unsigned int block_len = nbytes & DES_BLOCK_MASK; - - if (likely(block_len)) { - const u64 *src64 = (const u64 *)walk.src.virt.addr; - des3_ede_sparc64_cbc_decrypt(K, src64, - (u64 *) walk.dst.virt.addr, - block_len, - (u64 *) walk.iv); - } - nbytes &= DES_BLOCK_SIZE - 1; - err = blkcipher_walk_done(desc, &walk, nbytes); + while ((nbytes = walk.nbytes) != 0) { + if (encrypt) + des3_ede_sparc64_cbc_encrypt(K, walk.src.virt.addr, + walk.dst.virt.addr, + round_down(nbytes, + DES_BLOCK_SIZE), + walk.iv); + else + des3_ede_sparc64_cbc_decrypt(K, walk.src.virt.addr, + walk.dst.virt.addr, + round_down(nbytes, + DES_BLOCK_SIZE), + walk.iv); + err = skcipher_walk_done(&walk, nbytes % DES_BLOCK_SIZE); } fprs_write(0); return err; } -static struct crypto_alg algs[] = { { - .cra_name = "des", - .cra_driver_name = "des-sparc64", - .cra_priority = SPARC_CR_OPCODE_PRIORITY, - .cra_flags = CRYPTO_ALG_TYPE_CIPHER, - .cra_blocksize = DES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct des_sparc64_ctx), - .cra_alignmask = 7, - .cra_module = THIS_MODULE, - .cra_u = { - .cipher = { - .cia_min_keysize = DES_KEY_SIZE, - .cia_max_keysize = DES_KEY_SIZE, - .cia_setkey = des_set_key, - .cia_encrypt = sparc_des_encrypt, - .cia_decrypt = sparc_des_decrypt +static int cbc3_encrypt(struct skcipher_request *req) +{ + return __cbc3_crypt(req, true); +} + +static int cbc3_decrypt(struct skcipher_request *req) +{ + return __cbc3_crypt(req, false); +} + +static struct crypto_alg cipher_algs[] = { + { + .cra_name = "des", + .cra_driver_name = "des-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct des_sparc64_ctx), + .cra_alignmask = 7, + .cra_module = THIS_MODULE, + .cra_u = { + .cipher = { + .cia_min_keysize = DES_KEY_SIZE, + .cia_max_keysize = DES_KEY_SIZE, + .cia_setkey = des_set_key, + .cia_encrypt = sparc_des_encrypt, + .cia_decrypt = sparc_des_decrypt + } } - } -}, { - .cra_name = "ecb(des)", - .cra_driver_name = "ecb-des-sparc64", - .cra_priority = SPARC_CR_OPCODE_PRIORITY, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = DES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct des_sparc64_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = DES_KEY_SIZE, - .max_keysize = DES_KEY_SIZE, - .setkey = des_set_key, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "cbc(des)", - .cra_driver_name = "cbc-des-sparc64", - .cra_priority = SPARC_CR_OPCODE_PRIORITY, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = DES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct des_sparc64_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = DES_KEY_SIZE, - .max_keysize = DES_KEY_SIZE, - .ivsize = DES_BLOCK_SIZE, - .setkey = des_set_key, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "des3_ede", - .cra_driver_name = "des3_ede-sparc64", - .cra_priority = SPARC_CR_OPCODE_PRIORITY, - .cra_flags = CRYPTO_ALG_TYPE_CIPHER, - .cra_blocksize = DES3_EDE_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx), - .cra_alignmask = 7, - .cra_module = THIS_MODULE, - .cra_u = { - .cipher = { - .cia_min_keysize = DES3_EDE_KEY_SIZE, - .cia_max_keysize = DES3_EDE_KEY_SIZE, - .cia_setkey = des3_ede_set_key, - .cia_encrypt = sparc_des3_ede_encrypt, - .cia_decrypt = sparc_des3_ede_decrypt + }, { + .cra_name = "des3_ede", + .cra_driver_name = "des3_ede-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx), + .cra_alignmask = 7, + .cra_module = THIS_MODULE, + .cra_u = { + .cipher = { + .cia_min_keysize = DES3_EDE_KEY_SIZE, + .cia_max_keysize = DES3_EDE_KEY_SIZE, + .cia_setkey = des3_ede_set_key, + .cia_encrypt = sparc_des3_ede_encrypt, + .cia_decrypt = sparc_des3_ede_decrypt + } } } -}, { - .cra_name = "ecb(des3_ede)", - .cra_driver_name = "ecb-des3_ede-sparc64", - .cra_priority = SPARC_CR_OPCODE_PRIORITY, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = DES3_EDE_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = DES3_EDE_KEY_SIZE, - .max_keysize = DES3_EDE_KEY_SIZE, - .setkey = des3_ede_set_key, - .encrypt = ecb3_encrypt, - .decrypt = ecb3_decrypt, - }, - }, -}, { - .cra_name = "cbc(des3_ede)", - .cra_driver_name = "cbc-des3_ede-sparc64", - .cra_priority = SPARC_CR_OPCODE_PRIORITY, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = DES3_EDE_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = DES3_EDE_KEY_SIZE, - .max_keysize = DES3_EDE_KEY_SIZE, - .ivsize = DES3_EDE_BLOCK_SIZE, - .setkey = des3_ede_set_key, - .encrypt = cbc3_encrypt, - .decrypt = cbc3_decrypt, - }, - }, -} }; +}; + +static struct skcipher_alg skcipher_algs[] = { + { + .base.cra_name = "ecb(des)", + .base.cra_driver_name = "ecb-des-sparc64", + .base.cra_priority = SPARC_CR_OPCODE_PRIORITY, + .base.cra_blocksize = DES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct des_sparc64_ctx), + .base.cra_alignmask = 7, + .base.cra_module = THIS_MODULE, + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .setkey = des_set_key_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "cbc(des)", + .base.cra_driver_name = "cbc-des-sparc64", + .base.cra_priority = SPARC_CR_OPCODE_PRIORITY, + .base.cra_blocksize = DES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct des_sparc64_ctx), + .base.cra_alignmask = 7, + .base.cra_module = THIS_MODULE, + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = des_set_key_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "ecb(des3_ede)", + .base.cra_driver_name = "ecb-des3_ede-sparc64", + .base.cra_priority = SPARC_CR_OPCODE_PRIORITY, + .base.cra_blocksize = DES3_EDE_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx), + .base.cra_alignmask = 7, + .base.cra_module = THIS_MODULE, + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .setkey = des3_ede_set_key_skcipher, + .encrypt = ecb3_encrypt, + .decrypt = ecb3_decrypt, + }, { + .base.cra_name = "cbc(des3_ede)", + .base.cra_driver_name = "cbc-des3_ede-sparc64", + .base.cra_priority = SPARC_CR_OPCODE_PRIORITY, + .base.cra_blocksize = DES3_EDE_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx), + .base.cra_alignmask = 7, + .base.cra_module = THIS_MODULE, + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = DES3_EDE_BLOCK_SIZE, + .setkey = des3_ede_set_key_skcipher, + .encrypt = cbc3_encrypt, + .decrypt = cbc3_decrypt, + } +}; static bool __init sparc64_has_des_opcode(void) { @@ -503,17 +448,27 @@ static bool __init sparc64_has_des_opcode(void) static int __init des_sparc64_mod_init(void) { - if (sparc64_has_des_opcode()) { - pr_info("Using sparc64 des opcodes optimized DES implementation\n"); - return crypto_register_algs(algs, ARRAY_SIZE(algs)); + int err; + + if (!sparc64_has_des_opcode()) { + pr_info("sparc64 des opcodes not available.\n"); + return -ENODEV; } - pr_info("sparc64 des opcodes not available.\n"); - return -ENODEV; + pr_info("Using sparc64 des opcodes optimized DES implementation\n"); + err = crypto_register_algs(cipher_algs, ARRAY_SIZE(cipher_algs)); + if (err) + return err; + err = crypto_register_skciphers(skcipher_algs, + ARRAY_SIZE(skcipher_algs)); + if (err) + crypto_unregister_algs(cipher_algs, ARRAY_SIZE(cipher_algs)); + return err; } static void __exit des_sparc64_mod_fini(void) { - crypto_unregister_algs(algs, ARRAY_SIZE(algs)); + crypto_unregister_algs(cipher_algs, ARRAY_SIZE(cipher_algs)); + crypto_unregister_skciphers(skcipher_algs, ARRAY_SIZE(skcipher_algs)); } module_init(des_sparc64_mod_init); diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index a8275fea4b70..9b4506373353 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1673,9 +1673,9 @@ void __init setup_per_cpu_areas(void) pcpu_alloc_bootmem, pcpu_free_bootmem); if (rc) - pr_warning("PERCPU: %s allocator failed (%d), " - "falling back to page size\n", - pcpu_fc_names[pcpu_chosen_fc], rc); + pr_warn("PERCPU: %s allocator failed (%d), " + "falling back to page size\n", + pcpu_fc_names[pcpu_chosen_fc], rc); } if (rc < 0) rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile index 324a23947585..997ffe46e953 100644 --- a/arch/sparc/vdso/Makefile +++ b/arch/sparc/vdso/Makefile @@ -65,14 +65,14 @@ $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(SPARC_REG_CFLAGS # # vDSO code runs in userspace and -pg doesn't help with profiling anyway. # -CFLAGS_REMOVE_vdso-note.o = -pg CFLAGS_REMOVE_vclock_gettime.o = -pg +CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg $(obj)/%.so: OBJCOPYFLAGS := -S $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) -CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) +CPPFLAGS_vdso32/vdso32.lds = $(CPPFLAGS_vdso.lds) VDSO_LDFLAGS_vdso32.lds = -m elf32_sparc -soname linux-gate.so.1 #This makes sure the $(obj) subdirectory exists even though vdso32/ diff --git a/arch/um/configs/kunit_defconfig b/arch/um/configs/kunit_defconfig new file mode 100644 index 000000000000..9235b7d42d38 --- /dev/null +++ b/arch/um/configs/kunit_defconfig @@ -0,0 +1,3 @@ +CONFIG_KUNIT=y +CONFIG_KUNIT_TEST=y +CONFIG_KUNIT_EXAMPLE_TEST=y diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 612535cd9706..6627d7c30f37 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -1403,8 +1403,12 @@ static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, spin_unlock_irq(&ubd_dev->lock); - if (ret < 0) - blk_mq_requeue_request(req, true); + if (ret < 0) { + if (ret == -ENOMEM) + res = BLK_STS_RESOURCE; + else + res = BLK_STS_DEV_RESOURCE; + } return res; } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4d27bdc85bf2..99c3ddd007ce 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -24,7 +24,7 @@ config X86_64 depends on 64BIT # Options that are inherently 64-bit kernel only: select ARCH_HAS_GIGANTIC_PAGE - select ARCH_SUPPORTS_INT128 + select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 select ARCH_USE_CMPXCHG_LOCKREF select HAVE_ARCH_SOFT_DIRTY select MODULES_USE_ELF_RELA @@ -1910,6 +1910,51 @@ config X86_INTEL_MEMORY_PROTECTION_KEYS If unsure, say y. +choice + prompt "TSX enable mode" + depends on CPU_SUP_INTEL + default X86_INTEL_TSX_MODE_OFF + help + Intel's TSX (Transactional Synchronization Extensions) feature + allows to optimize locking protocols through lock elision which + can lead to a noticeable performance boost. + + On the other hand it has been shown that TSX can be exploited + to form side channel attacks (e.g. TAA) and chances are there + will be more of those attacks discovered in the future. + + Therefore TSX is not enabled by default (aka tsx=off). An admin + might override this decision by tsx=on the command line parameter. + Even with TSX enabled, the kernel will attempt to enable the best + possible TAA mitigation setting depending on the microcode available + for the particular machine. + + This option allows to set the default tsx mode between tsx=on, =off + and =auto. See Documentation/admin-guide/kernel-parameters.txt for more + details. + + Say off if not sure, auto if TSX is in use but it should be used on safe + platforms or on if TSX is in use and the security aspect of tsx is not + relevant. + +config X86_INTEL_TSX_MODE_OFF + bool "off" + help + TSX is disabled if possible - equals to tsx=off command line parameter. + +config X86_INTEL_TSX_MODE_ON + bool "on" + help + TSX is always enabled on TSX capable HW - equals the tsx=on command + line parameter. + +config X86_INTEL_TSX_MODE_AUTO + bool "auto" + help + TSX is enabled on TSX capable HW that is believed to be safe against + side channel attacks- equals the tsx=auto command line parameter. +endchoice + config EFI bool "EFI runtime service support" depends on ACPI diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index bf9cd83de777..409c00f74e60 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -316,10 +316,6 @@ config UNWINDER_FRAME_POINTER unwinder, but the kernel text size will grow by ~3% and the kernel's overall performance will degrade by roughly 5-10%. - This option is recommended if you want to use the livepatch - consistency model, as this is currently the only way to get a - reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE). - config UNWINDER_GUESS bool "Guess unwinder" depends on EXPERT diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c index 149795c369f2..25019d42ae93 100644 --- a/arch/x86/boot/compressed/acpi.c +++ b/arch/x86/boot/compressed/acpi.c @@ -21,30 +21,6 @@ struct mem_vector immovable_mem[MAX_NUMNODES*2]; /* - * Max length of 64-bit hex address string is 19, prefix "0x" + 16 hex - * digits, and '\0' for termination. - */ -#define MAX_ADDR_LEN 19 - -static acpi_physical_address get_cmdline_acpi_rsdp(void) -{ - acpi_physical_address addr = 0; - -#ifdef CONFIG_KEXEC - char val[MAX_ADDR_LEN] = { }; - int ret; - - ret = cmdline_find_option("acpi_rsdp", val, MAX_ADDR_LEN); - if (ret < 0) - return 0; - - if (kstrtoull(val, 16, &addr)) - return 0; -#endif - return addr; -} - -/* * Search EFI system tables for RSDP. If both ACPI_20_TABLE_GUID and * ACPI_TABLE_GUID are found, take the former, which has more features. */ @@ -298,6 +274,30 @@ acpi_physical_address get_rsdp_addr(void) } #if defined(CONFIG_RANDOMIZE_BASE) && defined(CONFIG_MEMORY_HOTREMOVE) +/* + * Max length of 64-bit hex address string is 19, prefix "0x" + 16 hex + * digits, and '\0' for termination. + */ +#define MAX_ADDR_LEN 19 + +static acpi_physical_address get_cmdline_acpi_rsdp(void) +{ + acpi_physical_address addr = 0; + +#ifdef CONFIG_KEXEC + char val[MAX_ADDR_LEN] = { }; + int ret; + + ret = cmdline_find_option("acpi_rsdp", val, MAX_ADDR_LEN); + if (ret < 0) + return 0; + + if (kstrtoull(val, 16, &addr)) + return 0; +#endif + return addr; +} + /* Compute SRAT address from RSDP. */ static unsigned long get_acpi_srat_table(void) { diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index d6662fdef300..82bc60c8acb2 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -13,6 +13,7 @@ #include <asm/e820/types.h> #include <asm/setup.h> #include <asm/desc.h> +#include <asm/boot.h> #include "../string.h" #include "eboot.h" @@ -813,7 +814,8 @@ efi_main(struct efi_config *c, struct boot_params *boot_params) status = efi_relocate_kernel(sys_table, &bzimage_addr, hdr->init_size, hdr->init_size, hdr->pref_address, - hdr->kernel_alignment); + hdr->kernel_alignment, + LOAD_PHYSICAL_ADDR); if (status != EFI_SUCCESS) { efi_printk(sys_table, "efi_relocate_kernel() failed!\n"); goto fail; diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 53ac0cb2396d..9652d5c2afda 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -345,6 +345,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, { const unsigned long kernel_total_size = VO__end - VO__text; unsigned long virt_addr = LOAD_PHYSICAL_ADDR; + unsigned long needed_size; /* Retain x86 boot parameters pointer passed from startup_32/64. */ boot_params = rmode; @@ -379,26 +380,38 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, free_mem_ptr = heap; /* Heap */ free_mem_end_ptr = heap + BOOT_HEAP_SIZE; + /* + * The memory hole needed for the kernel is the larger of either + * the entire decompressed kernel plus relocation table, or the + * entire decompressed kernel plus .bss and .brk sections. + * + * On X86_64, the memory is mapped with PMD pages. Round the + * size up so that the full extent of PMD pages mapped is + * included in the check against the valid memory table + * entries. This ensures the full mapped area is usable RAM + * and doesn't include any reserved areas. + */ + needed_size = max(output_len, kernel_total_size); +#ifdef CONFIG_X86_64 + needed_size = ALIGN(needed_size, MIN_KERNEL_ALIGN); +#endif + /* Report initial kernel position details. */ debug_putaddr(input_data); debug_putaddr(input_len); debug_putaddr(output); debug_putaddr(output_len); debug_putaddr(kernel_total_size); + debug_putaddr(needed_size); #ifdef CONFIG_X86_64 /* Report address of 32-bit trampoline */ debug_putaddr(trampoline_32bit); #endif - /* - * The memory hole needed for the kernel is the larger of either - * the entire decompressed kernel plus relocation table, or the - * entire decompressed kernel plus .bss and .brk sections. - */ choose_random_location((unsigned long)input_data, input_len, (unsigned long *)&output, - max(output_len, kernel_total_size), + needed_size, &virt_addr); /* Validate memory location choices. */ diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 759b1a927826..958440eae27e 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -39,6 +39,7 @@ obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2) += aegis128-aesni.o obj-$(CONFIG_CRYPTO_NHPOLY1305_SSE2) += nhpoly1305-sse2.o obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o +obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o # These modules require assembler to support AVX. ifeq ($(avx_supported),yes) @@ -48,6 +49,7 @@ ifeq ($(avx_supported),yes) obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o + obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += blake2s-x86_64.o endif # These modules require assembler to support AVX2. @@ -70,6 +72,7 @@ serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o +blake2s-x86_64-y := blake2s-core.o blake2s-glue.o ifeq ($(avx_supported),yes) camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \ diff --git a/arch/x86/crypto/blake2s-core.S b/arch/x86/crypto/blake2s-core.S new file mode 100644 index 000000000000..8591938eee26 --- /dev/null +++ b/arch/x86/crypto/blake2s-core.S @@ -0,0 +1,258 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + * Copyright (C) 2017-2019 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved. + */ + +#include <linux/linkage.h> + +.section .rodata.cst32.BLAKE2S_IV, "aM", @progbits, 32 +.align 32 +IV: .octa 0xA54FF53A3C6EF372BB67AE856A09E667 + .octa 0x5BE0CD191F83D9AB9B05688C510E527F +.section .rodata.cst16.ROT16, "aM", @progbits, 16 +.align 16 +ROT16: .octa 0x0D0C0F0E09080B0A0504070601000302 +.section .rodata.cst16.ROR328, "aM", @progbits, 16 +.align 16 +ROR328: .octa 0x0C0F0E0D080B0A090407060500030201 +.section .rodata.cst64.BLAKE2S_SIGMA, "aM", @progbits, 160 +.align 64 +SIGMA: +.byte 0, 2, 4, 6, 1, 3, 5, 7, 14, 8, 10, 12, 15, 9, 11, 13 +.byte 14, 4, 9, 13, 10, 8, 15, 6, 5, 1, 0, 11, 3, 12, 2, 7 +.byte 11, 12, 5, 15, 8, 0, 2, 13, 9, 10, 3, 7, 4, 14, 6, 1 +.byte 7, 3, 13, 11, 9, 1, 12, 14, 15, 2, 5, 4, 8, 6, 10, 0 +.byte 9, 5, 2, 10, 0, 7, 4, 15, 3, 14, 11, 6, 13, 1, 12, 8 +.byte 2, 6, 0, 8, 12, 10, 11, 3, 1, 4, 7, 15, 9, 13, 5, 14 +.byte 12, 1, 14, 4, 5, 15, 13, 10, 8, 0, 6, 9, 11, 7, 3, 2 +.byte 13, 7, 12, 3, 11, 14, 1, 9, 2, 5, 15, 8, 10, 0, 4, 6 +.byte 6, 14, 11, 0, 15, 9, 3, 8, 10, 12, 13, 1, 5, 2, 7, 4 +.byte 10, 8, 7, 1, 2, 4, 6, 5, 13, 15, 9, 3, 0, 11, 14, 12 +#ifdef CONFIG_AS_AVX512 +.section .rodata.cst64.BLAKE2S_SIGMA2, "aM", @progbits, 640 +.align 64 +SIGMA2: +.long 0, 2, 4, 6, 1, 3, 5, 7, 14, 8, 10, 12, 15, 9, 11, 13 +.long 8, 2, 13, 15, 10, 9, 12, 3, 6, 4, 0, 14, 5, 11, 1, 7 +.long 11, 13, 8, 6, 5, 10, 14, 3, 2, 4, 12, 15, 1, 0, 7, 9 +.long 11, 10, 7, 0, 8, 15, 1, 13, 3, 6, 2, 12, 4, 14, 9, 5 +.long 4, 10, 9, 14, 15, 0, 11, 8, 1, 7, 3, 13, 2, 5, 6, 12 +.long 2, 11, 4, 15, 14, 3, 10, 8, 13, 6, 5, 7, 0, 12, 1, 9 +.long 4, 8, 15, 9, 14, 11, 13, 5, 3, 2, 1, 12, 6, 10, 7, 0 +.long 6, 13, 0, 14, 12, 2, 1, 11, 15, 4, 5, 8, 7, 9, 3, 10 +.long 15, 5, 4, 13, 10, 7, 3, 11, 12, 2, 0, 6, 9, 8, 1, 14 +.long 8, 7, 14, 11, 13, 15, 0, 12, 10, 4, 5, 6, 3, 2, 1, 9 +#endif /* CONFIG_AS_AVX512 */ + +.text +#ifdef CONFIG_AS_SSSE3 +ENTRY(blake2s_compress_ssse3) + testq %rdx,%rdx + je .Lendofloop + movdqu (%rdi),%xmm0 + movdqu 0x10(%rdi),%xmm1 + movdqa ROT16(%rip),%xmm12 + movdqa ROR328(%rip),%xmm13 + movdqu 0x20(%rdi),%xmm14 + movq %rcx,%xmm15 + leaq SIGMA+0xa0(%rip),%r8 + jmp .Lbeginofloop + .align 32 +.Lbeginofloop: + movdqa %xmm0,%xmm10 + movdqa %xmm1,%xmm11 + paddq %xmm15,%xmm14 + movdqa IV(%rip),%xmm2 + movdqa %xmm14,%xmm3 + pxor IV+0x10(%rip),%xmm3 + leaq SIGMA(%rip),%rcx +.Lroundloop: + movzbl (%rcx),%eax + movd (%rsi,%rax,4),%xmm4 + movzbl 0x1(%rcx),%eax + movd (%rsi,%rax,4),%xmm5 + movzbl 0x2(%rcx),%eax + movd (%rsi,%rax,4),%xmm6 + movzbl 0x3(%rcx),%eax + movd (%rsi,%rax,4),%xmm7 + punpckldq %xmm5,%xmm4 + punpckldq %xmm7,%xmm6 + punpcklqdq %xmm6,%xmm4 + paddd %xmm4,%xmm0 + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 + pshufb %xmm12,%xmm3 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm8 + psrld $0xc,%xmm1 + pslld $0x14,%xmm8 + por %xmm8,%xmm1 + movzbl 0x4(%rcx),%eax + movd (%rsi,%rax,4),%xmm5 + movzbl 0x5(%rcx),%eax + movd (%rsi,%rax,4),%xmm6 + movzbl 0x6(%rcx),%eax + movd (%rsi,%rax,4),%xmm7 + movzbl 0x7(%rcx),%eax + movd (%rsi,%rax,4),%xmm4 + punpckldq %xmm6,%xmm5 + punpckldq %xmm4,%xmm7 + punpcklqdq %xmm7,%xmm5 + paddd %xmm5,%xmm0 + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 + pshufb %xmm13,%xmm3 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm8 + psrld $0x7,%xmm1 + pslld $0x19,%xmm8 + por %xmm8,%xmm1 + pshufd $0x93,%xmm0,%xmm0 + pshufd $0x4e,%xmm3,%xmm3 + pshufd $0x39,%xmm2,%xmm2 + movzbl 0x8(%rcx),%eax + movd (%rsi,%rax,4),%xmm6 + movzbl 0x9(%rcx),%eax + movd (%rsi,%rax,4),%xmm7 + movzbl 0xa(%rcx),%eax + movd (%rsi,%rax,4),%xmm4 + movzbl 0xb(%rcx),%eax + movd (%rsi,%rax,4),%xmm5 + punpckldq %xmm7,%xmm6 + punpckldq %xmm5,%xmm4 + punpcklqdq %xmm4,%xmm6 + paddd %xmm6,%xmm0 + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 + pshufb %xmm12,%xmm3 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm8 + psrld $0xc,%xmm1 + pslld $0x14,%xmm8 + por %xmm8,%xmm1 + movzbl 0xc(%rcx),%eax + movd (%rsi,%rax,4),%xmm7 + movzbl 0xd(%rcx),%eax + movd (%rsi,%rax,4),%xmm4 + movzbl 0xe(%rcx),%eax + movd (%rsi,%rax,4),%xmm5 + movzbl 0xf(%rcx),%eax + movd (%rsi,%rax,4),%xmm6 + punpckldq %xmm4,%xmm7 + punpckldq %xmm6,%xmm5 + punpcklqdq %xmm5,%xmm7 + paddd %xmm7,%xmm0 + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 + pshufb %xmm13,%xmm3 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm8 + psrld $0x7,%xmm1 + pslld $0x19,%xmm8 + por %xmm8,%xmm1 + pshufd $0x39,%xmm0,%xmm0 + pshufd $0x4e,%xmm3,%xmm3 + pshufd $0x93,%xmm2,%xmm2 + addq $0x10,%rcx + cmpq %r8,%rcx + jnz .Lroundloop + pxor %xmm2,%xmm0 + pxor %xmm3,%xmm1 + pxor %xmm10,%xmm0 + pxor %xmm11,%xmm1 + addq $0x40,%rsi + decq %rdx + jnz .Lbeginofloop + movdqu %xmm0,(%rdi) + movdqu %xmm1,0x10(%rdi) + movdqu %xmm14,0x20(%rdi) +.Lendofloop: + ret +ENDPROC(blake2s_compress_ssse3) +#endif /* CONFIG_AS_SSSE3 */ + +#ifdef CONFIG_AS_AVX512 +ENTRY(blake2s_compress_avx512) + vmovdqu (%rdi),%xmm0 + vmovdqu 0x10(%rdi),%xmm1 + vmovdqu 0x20(%rdi),%xmm4 + vmovq %rcx,%xmm5 + vmovdqa IV(%rip),%xmm14 + vmovdqa IV+16(%rip),%xmm15 + jmp .Lblake2s_compress_avx512_mainloop +.align 32 +.Lblake2s_compress_avx512_mainloop: + vmovdqa %xmm0,%xmm10 + vmovdqa %xmm1,%xmm11 + vpaddq %xmm5,%xmm4,%xmm4 + vmovdqa %xmm14,%xmm2 + vpxor %xmm15,%xmm4,%xmm3 + vmovdqu (%rsi),%ymm6 + vmovdqu 0x20(%rsi),%ymm7 + addq $0x40,%rsi + leaq SIGMA2(%rip),%rax + movb $0xa,%cl +.Lblake2s_compress_avx512_roundloop: + addq $0x40,%rax + vmovdqa -0x40(%rax),%ymm8 + vmovdqa -0x20(%rax),%ymm9 + vpermi2d %ymm7,%ymm6,%ymm8 + vpermi2d %ymm7,%ymm6,%ymm9 + vmovdqa %ymm8,%ymm6 + vmovdqa %ymm9,%ymm7 + vpaddd %xmm8,%xmm0,%xmm0 + vpaddd %xmm1,%xmm0,%xmm0 + vpxor %xmm0,%xmm3,%xmm3 + vprord $0x10,%xmm3,%xmm3 + vpaddd %xmm3,%xmm2,%xmm2 + vpxor %xmm2,%xmm1,%xmm1 + vprord $0xc,%xmm1,%xmm1 + vextracti128 $0x1,%ymm8,%xmm8 + vpaddd %xmm8,%xmm0,%xmm0 + vpaddd %xmm1,%xmm0,%xmm0 + vpxor %xmm0,%xmm3,%xmm3 + vprord $0x8,%xmm3,%xmm3 + vpaddd %xmm3,%xmm2,%xmm2 + vpxor %xmm2,%xmm1,%xmm1 + vprord $0x7,%xmm1,%xmm1 + vpshufd $0x93,%xmm0,%xmm0 + vpshufd $0x4e,%xmm3,%xmm3 + vpshufd $0x39,%xmm2,%xmm2 + vpaddd %xmm9,%xmm0,%xmm0 + vpaddd %xmm1,%xmm0,%xmm0 + vpxor %xmm0,%xmm3,%xmm3 + vprord $0x10,%xmm3,%xmm3 + vpaddd %xmm3,%xmm2,%xmm2 + vpxor %xmm2,%xmm1,%xmm1 + vprord $0xc,%xmm1,%xmm1 + vextracti128 $0x1,%ymm9,%xmm9 + vpaddd %xmm9,%xmm0,%xmm0 + vpaddd %xmm1,%xmm0,%xmm0 + vpxor %xmm0,%xmm3,%xmm3 + vprord $0x8,%xmm3,%xmm3 + vpaddd %xmm3,%xmm2,%xmm2 + vpxor %xmm2,%xmm1,%xmm1 + vprord $0x7,%xmm1,%xmm1 + vpshufd $0x39,%xmm0,%xmm0 + vpshufd $0x4e,%xmm3,%xmm3 + vpshufd $0x93,%xmm2,%xmm2 + decb %cl + jne .Lblake2s_compress_avx512_roundloop + vpxor %xmm10,%xmm0,%xmm0 + vpxor %xmm11,%xmm1,%xmm1 + vpxor %xmm2,%xmm0,%xmm0 + vpxor %xmm3,%xmm1,%xmm1 + decq %rdx + jne .Lblake2s_compress_avx512_mainloop + vmovdqu %xmm0,(%rdi) + vmovdqu %xmm1,0x10(%rdi) + vmovdqu %xmm4,0x20(%rdi) + vzeroupper + retq +ENDPROC(blake2s_compress_avx512) +#endif /* CONFIG_AS_AVX512 */ diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c new file mode 100644 index 000000000000..4a37ba7cdbe5 --- /dev/null +++ b/arch/x86/crypto/blake2s-glue.c @@ -0,0 +1,233 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include <crypto/internal/blake2s.h> +#include <crypto/internal/simd.h> +#include <crypto/internal/hash.h> + +#include <linux/types.h> +#include <linux/jump_label.h> +#include <linux/kernel.h> +#include <linux/module.h> + +#include <asm/cpufeature.h> +#include <asm/fpu/api.h> +#include <asm/processor.h> +#include <asm/simd.h> + +asmlinkage void blake2s_compress_ssse3(struct blake2s_state *state, + const u8 *block, const size_t nblocks, + const u32 inc); +asmlinkage void blake2s_compress_avx512(struct blake2s_state *state, + const u8 *block, const size_t nblocks, + const u32 inc); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_ssse3); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_avx512); + +void blake2s_compress_arch(struct blake2s_state *state, + const u8 *block, size_t nblocks, + const u32 inc) +{ + /* SIMD disables preemption, so relax after processing each page. */ + BUILD_BUG_ON(PAGE_SIZE / BLAKE2S_BLOCK_SIZE < 8); + + if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) { + blake2s_compress_generic(state, block, nblocks, inc); + return; + } + + for (;;) { + const size_t blocks = min_t(size_t, nblocks, + PAGE_SIZE / BLAKE2S_BLOCK_SIZE); + + kernel_fpu_begin(); + if (IS_ENABLED(CONFIG_AS_AVX512) && + static_branch_likely(&blake2s_use_avx512)) + blake2s_compress_avx512(state, block, blocks, inc); + else + blake2s_compress_ssse3(state, block, blocks, inc); + kernel_fpu_end(); + + nblocks -= blocks; + if (!nblocks) + break; + block += blocks * BLAKE2S_BLOCK_SIZE; + } +} +EXPORT_SYMBOL(blake2s_compress_arch); + +static int crypto_blake2s_setkey(struct crypto_shash *tfm, const u8 *key, + unsigned int keylen) +{ + struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(tfm); + + if (keylen == 0 || keylen > BLAKE2S_KEY_SIZE) { + crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + memcpy(tctx->key, key, keylen); + tctx->keylen = keylen; + + return 0; +} + +static int crypto_blake2s_init(struct shash_desc *desc) +{ + struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); + struct blake2s_state *state = shash_desc_ctx(desc); + const int outlen = crypto_shash_digestsize(desc->tfm); + + if (tctx->keylen) + blake2s_init_key(state, outlen, tctx->key, tctx->keylen); + else + blake2s_init(state, outlen); + + return 0; +} + +static int crypto_blake2s_update(struct shash_desc *desc, const u8 *in, + unsigned int inlen) +{ + struct blake2s_state *state = shash_desc_ctx(desc); + const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen; + + if (unlikely(!inlen)) + return 0; + if (inlen > fill) { + memcpy(state->buf + state->buflen, in, fill); + blake2s_compress_arch(state, state->buf, 1, BLAKE2S_BLOCK_SIZE); + state->buflen = 0; + in += fill; + inlen -= fill; + } + if (inlen > BLAKE2S_BLOCK_SIZE) { + const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE); + /* Hash one less (full) block than strictly possible */ + blake2s_compress_arch(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE); + in += BLAKE2S_BLOCK_SIZE * (nblocks - 1); + inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1); + } + memcpy(state->buf + state->buflen, in, inlen); + state->buflen += inlen; + + return 0; +} + +static int crypto_blake2s_final(struct shash_desc *desc, u8 *out) +{ + struct blake2s_state *state = shash_desc_ctx(desc); + + blake2s_set_lastblock(state); + memset(state->buf + state->buflen, 0, + BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */ + blake2s_compress_arch(state, state->buf, 1, state->buflen); + cpu_to_le32_array(state->h, ARRAY_SIZE(state->h)); + memcpy(out, state->h, state->outlen); + memzero_explicit(state, sizeof(*state)); + + return 0; +} + +static struct shash_alg blake2s_algs[] = {{ + .base.cra_name = "blake2s-128", + .base.cra_driver_name = "blake2s-128-x86", + .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, + .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), + .base.cra_priority = 200, + .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, + + .digestsize = BLAKE2S_128_HASH_SIZE, + .setkey = crypto_blake2s_setkey, + .init = crypto_blake2s_init, + .update = crypto_blake2s_update, + .final = crypto_blake2s_final, + .descsize = sizeof(struct blake2s_state), +}, { + .base.cra_name = "blake2s-160", + .base.cra_driver_name = "blake2s-160-x86", + .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, + .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), + .base.cra_priority = 200, + .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, + + .digestsize = BLAKE2S_160_HASH_SIZE, + .setkey = crypto_blake2s_setkey, + .init = crypto_blake2s_init, + .update = crypto_blake2s_update, + .final = crypto_blake2s_final, + .descsize = sizeof(struct blake2s_state), +}, { + .base.cra_name = "blake2s-224", + .base.cra_driver_name = "blake2s-224-x86", + .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, + .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), + .base.cra_priority = 200, + .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, + + .digestsize = BLAKE2S_224_HASH_SIZE, + .setkey = crypto_blake2s_setkey, + .init = crypto_blake2s_init, + .update = crypto_blake2s_update, + .final = crypto_blake2s_final, + .descsize = sizeof(struct blake2s_state), +}, { + .base.cra_name = "blake2s-256", + .base.cra_driver_name = "blake2s-256-x86", + .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, + .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), + .base.cra_priority = 200, + .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, + + .digestsize = BLAKE2S_256_HASH_SIZE, + .setkey = crypto_blake2s_setkey, + .init = crypto_blake2s_init, + .update = crypto_blake2s_update, + .final = crypto_blake2s_final, + .descsize = sizeof(struct blake2s_state), +}}; + +static int __init blake2s_mod_init(void) +{ + if (!boot_cpu_has(X86_FEATURE_SSSE3)) + return 0; + + static_branch_enable(&blake2s_use_ssse3); + + if (IS_ENABLED(CONFIG_AS_AVX512) && + boot_cpu_has(X86_FEATURE_AVX) && + boot_cpu_has(X86_FEATURE_AVX2) && + boot_cpu_has(X86_FEATURE_AVX512F) && + boot_cpu_has(X86_FEATURE_AVX512VL) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | + XFEATURE_MASK_AVX512, NULL)) + static_branch_enable(&blake2s_use_avx512); + + return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs)); +} + +static void __exit blake2s_mod_exit(void) +{ + if (boot_cpu_has(X86_FEATURE_SSSE3)) + crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs)); +} + +module_init(blake2s_mod_init); +module_exit(blake2s_mod_exit); + +MODULE_ALIAS_CRYPTO("blake2s-128"); +MODULE_ALIAS_CRYPTO("blake2s-128-x86"); +MODULE_ALIAS_CRYPTO("blake2s-160"); +MODULE_ALIAS_CRYPTO("blake2s-160-x86"); +MODULE_ALIAS_CRYPTO("blake2s-224"); +MODULE_ALIAS_CRYPTO("blake2s-224-x86"); +MODULE_ALIAS_CRYPTO("blake2s-256"); +MODULE_ALIAS_CRYPTO("blake2s-256-x86"); +MODULE_LICENSE("GPL v2"); diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c index 388f95a4ec24..a94e30b6f941 100644 --- a/arch/x86/crypto/chacha_glue.c +++ b/arch/x86/crypto/chacha_glue.c @@ -7,7 +7,7 @@ */ #include <crypto/algapi.h> -#include <crypto/chacha.h> +#include <crypto/internal/chacha.h> #include <crypto/internal/simd.h> #include <crypto/internal/skcipher.h> #include <linux/kernel.h> @@ -21,24 +21,24 @@ asmlinkage void chacha_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src, asmlinkage void chacha_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src, unsigned int len, int nrounds); asmlinkage void hchacha_block_ssse3(const u32 *state, u32 *out, int nrounds); -#ifdef CONFIG_AS_AVX2 + asmlinkage void chacha_2block_xor_avx2(u32 *state, u8 *dst, const u8 *src, unsigned int len, int nrounds); asmlinkage void chacha_4block_xor_avx2(u32 *state, u8 *dst, const u8 *src, unsigned int len, int nrounds); asmlinkage void chacha_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src, unsigned int len, int nrounds); -static bool chacha_use_avx2; -#ifdef CONFIG_AS_AVX512 + asmlinkage void chacha_2block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src, unsigned int len, int nrounds); asmlinkage void chacha_4block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src, unsigned int len, int nrounds); asmlinkage void chacha_8block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src, unsigned int len, int nrounds); -static bool chacha_use_avx512vl; -#endif -#endif + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_simd); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx2); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx512vl); static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks) { @@ -49,9 +49,8 @@ static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks) static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, int nrounds) { -#ifdef CONFIG_AS_AVX2 -#ifdef CONFIG_AS_AVX512 - if (chacha_use_avx512vl) { + if (IS_ENABLED(CONFIG_AS_AVX512) && + static_branch_likely(&chacha_use_avx512vl)) { while (bytes >= CHACHA_BLOCK_SIZE * 8) { chacha_8block_xor_avx512vl(state, dst, src, bytes, nrounds); @@ -79,8 +78,9 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src, return; } } -#endif - if (chacha_use_avx2) { + + if (IS_ENABLED(CONFIG_AS_AVX2) && + static_branch_likely(&chacha_use_avx2)) { while (bytes >= CHACHA_BLOCK_SIZE * 8) { chacha_8block_xor_avx2(state, dst, src, bytes, nrounds); bytes -= CHACHA_BLOCK_SIZE * 8; @@ -104,7 +104,7 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src, return; } } -#endif + while (bytes >= CHACHA_BLOCK_SIZE * 4) { chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds); bytes -= CHACHA_BLOCK_SIZE * 4; @@ -123,37 +123,76 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src, } } -static int chacha_simd_stream_xor(struct skcipher_walk *walk, +void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds) +{ + state = PTR_ALIGN(state, CHACHA_STATE_ALIGN); + + if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable()) { + hchacha_block_generic(state, stream, nrounds); + } else { + kernel_fpu_begin(); + hchacha_block_ssse3(state, stream, nrounds); + kernel_fpu_end(); + } +} +EXPORT_SYMBOL(hchacha_block_arch); + +void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv) +{ + state = PTR_ALIGN(state, CHACHA_STATE_ALIGN); + + chacha_init_generic(state, key, iv); +} +EXPORT_SYMBOL(chacha_init_arch); + +void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, + int nrounds) +{ + state = PTR_ALIGN(state, CHACHA_STATE_ALIGN); + + if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable() || + bytes <= CHACHA_BLOCK_SIZE) + return chacha_crypt_generic(state, dst, src, bytes, nrounds); + + kernel_fpu_begin(); + chacha_dosimd(state, dst, src, bytes, nrounds); + kernel_fpu_end(); +} +EXPORT_SYMBOL(chacha_crypt_arch); + +static int chacha_simd_stream_xor(struct skcipher_request *req, const struct chacha_ctx *ctx, const u8 *iv) { u32 *state, state_buf[16 + 2] __aligned(8); - int next_yield = 4096; /* bytes until next FPU yield */ - int err = 0; + struct skcipher_walk walk; + int err; + + err = skcipher_walk_virt(&walk, req, false); BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16); state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN); - crypto_chacha_init(state, ctx, iv); - - while (walk->nbytes > 0) { - unsigned int nbytes = walk->nbytes; + chacha_init_generic(state, ctx->key, iv); - if (nbytes < walk->total) { - nbytes = round_down(nbytes, walk->stride); - next_yield -= nbytes; - } + while (walk.nbytes > 0) { + unsigned int nbytes = walk.nbytes; - chacha_dosimd(state, walk->dst.virt.addr, walk->src.virt.addr, - nbytes, ctx->nrounds); + if (nbytes < walk.total) + nbytes = round_down(nbytes, walk.stride); - if (next_yield <= 0) { - /* temporarily allow preemption */ - kernel_fpu_end(); + if (!static_branch_likely(&chacha_use_simd) || + !crypto_simd_usable()) { + chacha_crypt_generic(state, walk.dst.virt.addr, + walk.src.virt.addr, nbytes, + ctx->nrounds); + } else { kernel_fpu_begin(); - next_yield = 4096; + chacha_dosimd(state, walk.dst.virt.addr, + walk.src.virt.addr, nbytes, + ctx->nrounds); + kernel_fpu_end(); } - - err = skcipher_walk_done(walk, walk->nbytes - nbytes); + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } return err; @@ -163,55 +202,34 @@ static int chacha_simd(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - int err; - - if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) - return crypto_chacha_crypt(req); - err = skcipher_walk_virt(&walk, req, true); - if (err) - return err; - - kernel_fpu_begin(); - err = chacha_simd_stream_xor(&walk, ctx, req->iv); - kernel_fpu_end(); - return err; + return chacha_simd_stream_xor(req, ctx, req->iv); } static int xchacha_simd(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - struct chacha_ctx subctx; u32 *state, state_buf[16 + 2] __aligned(8); + struct chacha_ctx subctx; u8 real_iv[16]; - int err; - - if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) - return crypto_xchacha_crypt(req); - - err = skcipher_walk_virt(&walk, req, true); - if (err) - return err; BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16); state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN); - crypto_chacha_init(state, ctx, req->iv); - - kernel_fpu_begin(); - - hchacha_block_ssse3(state, subctx.key, ctx->nrounds); + chacha_init_generic(state, ctx->key, req->iv); + + if (req->cryptlen > CHACHA_BLOCK_SIZE && crypto_simd_usable()) { + kernel_fpu_begin(); + hchacha_block_ssse3(state, subctx.key, ctx->nrounds); + kernel_fpu_end(); + } else { + hchacha_block_generic(state, subctx.key, ctx->nrounds); + } subctx.nrounds = ctx->nrounds; memcpy(&real_iv[0], req->iv + 24, 8); memcpy(&real_iv[8], req->iv + 16, 8); - err = chacha_simd_stream_xor(&walk, &subctx, real_iv); - - kernel_fpu_end(); - - return err; + return chacha_simd_stream_xor(req, &subctx, real_iv); } static struct skcipher_alg algs[] = { @@ -227,7 +245,7 @@ static struct skcipher_alg algs[] = { .max_keysize = CHACHA_KEY_SIZE, .ivsize = CHACHA_IV_SIZE, .chunksize = CHACHA_BLOCK_SIZE, - .setkey = crypto_chacha20_setkey, + .setkey = chacha20_setkey, .encrypt = chacha_simd, .decrypt = chacha_simd, }, { @@ -242,7 +260,7 @@ static struct skcipher_alg algs[] = { .max_keysize = CHACHA_KEY_SIZE, .ivsize = XCHACHA_IV_SIZE, .chunksize = CHACHA_BLOCK_SIZE, - .setkey = crypto_chacha20_setkey, + .setkey = chacha20_setkey, .encrypt = xchacha_simd, .decrypt = xchacha_simd, }, { @@ -257,7 +275,7 @@ static struct skcipher_alg algs[] = { .max_keysize = CHACHA_KEY_SIZE, .ivsize = XCHACHA_IV_SIZE, .chunksize = CHACHA_BLOCK_SIZE, - .setkey = crypto_chacha12_setkey, + .setkey = chacha12_setkey, .encrypt = xchacha_simd, .decrypt = xchacha_simd, }, @@ -266,24 +284,28 @@ static struct skcipher_alg algs[] = { static int __init chacha_simd_mod_init(void) { if (!boot_cpu_has(X86_FEATURE_SSSE3)) - return -ENODEV; - -#ifdef CONFIG_AS_AVX2 - chacha_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) && - boot_cpu_has(X86_FEATURE_AVX2) && - cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); -#ifdef CONFIG_AS_AVX512 - chacha_use_avx512vl = chacha_use_avx2 && - boot_cpu_has(X86_FEATURE_AVX512VL) && - boot_cpu_has(X86_FEATURE_AVX512BW); /* kmovq */ -#endif -#endif + return 0; + + static_branch_enable(&chacha_use_simd); + + if (IS_ENABLED(CONFIG_AS_AVX2) && + boot_cpu_has(X86_FEATURE_AVX) && + boot_cpu_has(X86_FEATURE_AVX2) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { + static_branch_enable(&chacha_use_avx2); + + if (IS_ENABLED(CONFIG_AS_AVX512) && + boot_cpu_has(X86_FEATURE_AVX512VL) && + boot_cpu_has(X86_FEATURE_AVX512BW)) /* kmovq */ + static_branch_enable(&chacha_use_avx512vl); + } return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); } static void __exit chacha_simd_mod_fini(void) { - crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); + if (boot_cpu_has(X86_FEATURE_SSSE3)) + crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); } module_init(chacha_simd_mod_init); diff --git a/arch/x86/crypto/curve25519-x86_64.c b/arch/x86/crypto/curve25519-x86_64.c new file mode 100644 index 000000000000..a52a3fb15727 --- /dev/null +++ b/arch/x86/crypto/curve25519-x86_64.c @@ -0,0 +1,2475 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* + * Copyright (c) 2017 Armando Faz <armfazh@ic.unicamp.br>. All Rights Reserved. + * Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + * Copyright (C) 2018 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved. + */ + +#include <crypto/curve25519.h> +#include <crypto/internal/kpp.h> + +#include <linux/types.h> +#include <linux/jump_label.h> +#include <linux/kernel.h> +#include <linux/module.h> + +#include <asm/cpufeature.h> +#include <asm/processor.h> + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_bmi2); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_adx); + +enum { NUM_WORDS_ELTFP25519 = 4 }; +typedef __aligned(32) u64 eltfp25519_1w[NUM_WORDS_ELTFP25519]; +typedef __aligned(32) u64 eltfp25519_1w_buffer[2 * NUM_WORDS_ELTFP25519]; + +#define mul_eltfp25519_1w_adx(c, a, b) do { \ + mul_256x256_integer_adx(m.buffer, a, b); \ + red_eltfp25519_1w_adx(c, m.buffer); \ +} while (0) + +#define mul_eltfp25519_1w_bmi2(c, a, b) do { \ + mul_256x256_integer_bmi2(m.buffer, a, b); \ + red_eltfp25519_1w_bmi2(c, m.buffer); \ +} while (0) + +#define sqr_eltfp25519_1w_adx(a) do { \ + sqr_256x256_integer_adx(m.buffer, a); \ + red_eltfp25519_1w_adx(a, m.buffer); \ +} while (0) + +#define sqr_eltfp25519_1w_bmi2(a) do { \ + sqr_256x256_integer_bmi2(m.buffer, a); \ + red_eltfp25519_1w_bmi2(a, m.buffer); \ +} while (0) + +#define mul_eltfp25519_2w_adx(c, a, b) do { \ + mul2_256x256_integer_adx(m.buffer, a, b); \ + red_eltfp25519_2w_adx(c, m.buffer); \ +} while (0) + +#define mul_eltfp25519_2w_bmi2(c, a, b) do { \ + mul2_256x256_integer_bmi2(m.buffer, a, b); \ + red_eltfp25519_2w_bmi2(c, m.buffer); \ +} while (0) + +#define sqr_eltfp25519_2w_adx(a) do { \ + sqr2_256x256_integer_adx(m.buffer, a); \ + red_eltfp25519_2w_adx(a, m.buffer); \ +} while (0) + +#define sqr_eltfp25519_2w_bmi2(a) do { \ + sqr2_256x256_integer_bmi2(m.buffer, a); \ + red_eltfp25519_2w_bmi2(a, m.buffer); \ +} while (0) + +#define sqrn_eltfp25519_1w_adx(a, times) do { \ + int ____counter = (times); \ + while (____counter-- > 0) \ + sqr_eltfp25519_1w_adx(a); \ +} while (0) + +#define sqrn_eltfp25519_1w_bmi2(a, times) do { \ + int ____counter = (times); \ + while (____counter-- > 0) \ + sqr_eltfp25519_1w_bmi2(a); \ +} while (0) + +#define copy_eltfp25519_1w(C, A) do { \ + (C)[0] = (A)[0]; \ + (C)[1] = (A)[1]; \ + (C)[2] = (A)[2]; \ + (C)[3] = (A)[3]; \ +} while (0) + +#define setzero_eltfp25519_1w(C) do { \ + (C)[0] = 0; \ + (C)[1] = 0; \ + (C)[2] = 0; \ + (C)[3] = 0; \ +} while (0) + +__aligned(32) static const u64 table_ladder_8k[252 * NUM_WORDS_ELTFP25519] = { + /* 1 */ 0xfffffffffffffff3UL, 0xffffffffffffffffUL, + 0xffffffffffffffffUL, 0x5fffffffffffffffUL, + /* 2 */ 0x6b8220f416aafe96UL, 0x82ebeb2b4f566a34UL, + 0xd5a9a5b075a5950fUL, 0x5142b2cf4b2488f4UL, + /* 3 */ 0x6aaebc750069680cUL, 0x89cf7820a0f99c41UL, + 0x2a58d9183b56d0f4UL, 0x4b5aca80e36011a4UL, + /* 4 */ 0x329132348c29745dUL, 0xf4a2e616e1642fd7UL, + 0x1e45bb03ff67bc34UL, 0x306912d0f42a9b4aUL, + /* 5 */ 0xff886507e6af7154UL, 0x04f50e13dfeec82fUL, + 0xaa512fe82abab5ceUL, 0x174e251a68d5f222UL, + /* 6 */ 0xcf96700d82028898UL, 0x1743e3370a2c02c5UL, + 0x379eec98b4e86eaaUL, 0x0c59888a51e0482eUL, + /* 7 */ 0xfbcbf1d699b5d189UL, 0xacaef0d58e9fdc84UL, + 0xc1c20d06231f7614UL, 0x2938218da274f972UL, + /* 8 */ 0xf6af49beff1d7f18UL, 0xcc541c22387ac9c2UL, + 0x96fcc9ef4015c56bUL, 0x69c1627c690913a9UL, + /* 9 */ 0x7a86fd2f4733db0eUL, 0xfdb8c4f29e087de9UL, + 0x095e4b1a8ea2a229UL, 0x1ad7a7c829b37a79UL, + /* 10 */ 0x342d89cad17ea0c0UL, 0x67bedda6cced2051UL, + 0x19ca31bf2bb42f74UL, 0x3df7b4c84980acbbUL, + /* 11 */ 0xa8c6444dc80ad883UL, 0xb91e440366e3ab85UL, + 0xc215cda00164f6d8UL, 0x3d867c6ef247e668UL, + /* 12 */ 0xc7dd582bcc3e658cUL, 0xfd2c4748ee0e5528UL, + 0xa0fd9b95cc9f4f71UL, 0x7529d871b0675ddfUL, + /* 13 */ 0xb8f568b42d3cbd78UL, 0x1233011b91f3da82UL, + 0x2dce6ccd4a7c3b62UL, 0x75e7fc8e9e498603UL, + /* 14 */ 0x2f4f13f1fcd0b6ecUL, 0xf1a8ca1f29ff7a45UL, + 0xc249c1a72981e29bUL, 0x6ebe0dbb8c83b56aUL, + /* 15 */ 0x7114fa8d170bb222UL, 0x65a2dcd5bf93935fUL, + 0xbdc41f68b59c979aUL, 0x2f0eef79a2ce9289UL, + /* 16 */ 0x42ecbf0c083c37ceUL, 0x2930bc09ec496322UL, + 0xf294b0c19cfeac0dUL, 0x3780aa4bedfabb80UL, + /* 17 */ 0x56c17d3e7cead929UL, 0xe7cb4beb2e5722c5UL, + 0x0ce931732dbfe15aUL, 0x41b883c7621052f8UL, + /* 18 */ 0xdbf75ca0c3d25350UL, 0x2936be086eb1e351UL, + 0xc936e03cb4a9b212UL, 0x1d45bf82322225aaUL, + /* 19 */ 0xe81ab1036a024cc5UL, 0xe212201c304c9a72UL, + 0xc5d73fba6832b1fcUL, 0x20ffdb5a4d839581UL, + /* 20 */ 0xa283d367be5d0fadUL, 0x6c2b25ca8b164475UL, + 0x9d4935467caaf22eUL, 0x5166408eee85ff49UL, + /* 21 */ 0x3c67baa2fab4e361UL, 0xb3e433c67ef35cefUL, + 0x5259729241159b1cUL, 0x6a621892d5b0ab33UL, + /* 22 */ 0x20b74a387555cdcbUL, 0x532aa10e1208923fUL, + 0xeaa17b7762281dd1UL, 0x61ab3443f05c44bfUL, + /* 23 */ 0x257a6c422324def8UL, 0x131c6c1017e3cf7fUL, + 0x23758739f630a257UL, 0x295a407a01a78580UL, + /* 24 */ 0xf8c443246d5da8d9UL, 0x19d775450c52fa5dUL, + 0x2afcfc92731bf83dUL, 0x7d10c8e81b2b4700UL, + /* 25 */ 0xc8e0271f70baa20bUL, 0x993748867ca63957UL, + 0x5412efb3cb7ed4bbUL, 0x3196d36173e62975UL, + /* 26 */ 0xde5bcad141c7dffcUL, 0x47cc8cd2b395c848UL, + 0xa34cd942e11af3cbUL, 0x0256dbf2d04ecec2UL, + /* 27 */ 0x875ab7e94b0e667fUL, 0xcad4dd83c0850d10UL, + 0x47f12e8f4e72c79fUL, 0x5f1a87bb8c85b19bUL, + /* 28 */ 0x7ae9d0b6437f51b8UL, 0x12c7ce5518879065UL, + 0x2ade09fe5cf77aeeUL, 0x23a05a2f7d2c5627UL, + /* 29 */ 0x5908e128f17c169aUL, 0xf77498dd8ad0852dUL, + 0x74b4c4ceab102f64UL, 0x183abadd10139845UL, + /* 30 */ 0xb165ba8daa92aaacUL, 0xd5c5ef9599386705UL, + 0xbe2f8f0cf8fc40d1UL, 0x2701e635ee204514UL, + /* 31 */ 0x629fa80020156514UL, 0xf223868764a8c1ceUL, + 0x5b894fff0b3f060eUL, 0x60d9944cf708a3faUL, + /* 32 */ 0xaeea001a1c7a201fUL, 0xebf16a633ee2ce63UL, + 0x6f7709594c7a07e1UL, 0x79b958150d0208cbUL, + /* 33 */ 0x24b55e5301d410e7UL, 0xe3a34edff3fdc84dUL, + 0xd88768e4904032d8UL, 0x131384427b3aaeecUL, + /* 34 */ 0x8405e51286234f14UL, 0x14dc4739adb4c529UL, + 0xb8a2b5b250634ffdUL, 0x2fe2a94ad8a7ff93UL, + /* 35 */ 0xec5c57efe843faddUL, 0x2843ce40f0bb9918UL, + 0xa4b561d6cf3d6305UL, 0x743629bde8fb777eUL, + /* 36 */ 0x343edd46bbaf738fUL, 0xed981828b101a651UL, + 0xa401760b882c797aUL, 0x1fc223e28dc88730UL, + /* 37 */ 0x48604e91fc0fba0eUL, 0xb637f78f052c6fa4UL, + 0x91ccac3d09e9239cUL, 0x23f7eed4437a687cUL, + /* 38 */ 0x5173b1118d9bd800UL, 0x29d641b63189d4a7UL, + 0xfdbf177988bbc586UL, 0x2959894fcad81df5UL, + /* 39 */ 0xaebc8ef3b4bbc899UL, 0x4148995ab26992b9UL, + 0x24e20b0134f92cfbUL, 0x40d158894a05dee8UL, + /* 40 */ 0x46b00b1185af76f6UL, 0x26bac77873187a79UL, + 0x3dc0bf95ab8fff5fUL, 0x2a608bd8945524d7UL, + /* 41 */ 0x26449588bd446302UL, 0x7c4bc21c0388439cUL, + 0x8e98a4f383bd11b2UL, 0x26218d7bc9d876b9UL, + /* 42 */ 0xe3081542997c178aUL, 0x3c2d29a86fb6606fUL, + 0x5c217736fa279374UL, 0x7dde05734afeb1faUL, + /* 43 */ 0x3bf10e3906d42babUL, 0xe4f7803e1980649cUL, + 0xe6053bf89595bf7aUL, 0x394faf38da245530UL, + /* 44 */ 0x7a8efb58896928f4UL, 0xfbc778e9cc6a113cUL, + 0x72670ce330af596fUL, 0x48f222a81d3d6cf7UL, + /* 45 */ 0xf01fce410d72caa7UL, 0x5a20ecc7213b5595UL, + 0x7bc21165c1fa1483UL, 0x07f89ae31da8a741UL, + /* 46 */ 0x05d2c2b4c6830ff9UL, 0xd43e330fc6316293UL, + 0xa5a5590a96d3a904UL, 0x705edb91a65333b6UL, + /* 47 */ 0x048ee15e0bb9a5f7UL, 0x3240cfca9e0aaf5dUL, + 0x8f4b71ceedc4a40bUL, 0x621c0da3de544a6dUL, + /* 48 */ 0x92872836a08c4091UL, 0xce8375b010c91445UL, + 0x8a72eb524f276394UL, 0x2667fcfa7ec83635UL, + /* 49 */ 0x7f4c173345e8752aUL, 0x061b47feee7079a5UL, + 0x25dd9afa9f86ff34UL, 0x3780cef5425dc89cUL, + /* 50 */ 0x1a46035a513bb4e9UL, 0x3e1ef379ac575adaUL, + 0xc78c5f1c5fa24b50UL, 0x321a967634fd9f22UL, + /* 51 */ 0x946707b8826e27faUL, 0x3dca84d64c506fd0UL, + 0xc189218075e91436UL, 0x6d9284169b3b8484UL, + /* 52 */ 0x3a67e840383f2ddfUL, 0x33eec9a30c4f9b75UL, + 0x3ec7c86fa783ef47UL, 0x26ec449fbac9fbc4UL, + /* 53 */ 0x5c0f38cba09b9e7dUL, 0x81168cc762a3478cUL, + 0x3e23b0d306fc121cUL, 0x5a238aa0a5efdcddUL, + /* 54 */ 0x1ba26121c4ea43ffUL, 0x36f8c77f7c8832b5UL, + 0x88fbea0b0adcf99aUL, 0x5ca9938ec25bebf9UL, + /* 55 */ 0xd5436a5e51fccda0UL, 0x1dbc4797c2cd893bUL, + 0x19346a65d3224a08UL, 0x0f5034e49b9af466UL, + /* 56 */ 0xf23c3967a1e0b96eUL, 0xe58b08fa867a4d88UL, + 0xfb2fabc6a7341679UL, 0x2a75381eb6026946UL, + /* 57 */ 0xc80a3be4c19420acUL, 0x66b1f6c681f2b6dcUL, + 0x7cf7036761e93388UL, 0x25abbbd8a660a4c4UL, + /* 58 */ 0x91ea12ba14fd5198UL, 0x684950fc4a3cffa9UL, + 0xf826842130f5ad28UL, 0x3ea988f75301a441UL, + /* 59 */ 0xc978109a695f8c6fUL, 0x1746eb4a0530c3f3UL, + 0x444d6d77b4459995UL, 0x75952b8c054e5cc7UL, + /* 60 */ 0xa3703f7915f4d6aaUL, 0x66c346202f2647d8UL, + 0xd01469df811d644bUL, 0x77fea47d81a5d71fUL, + /* 61 */ 0xc5e9529ef57ca381UL, 0x6eeeb4b9ce2f881aUL, + 0xb6e91a28e8009bd6UL, 0x4b80be3e9afc3fecUL, + /* 62 */ 0x7e3773c526aed2c5UL, 0x1b4afcb453c9a49dUL, + 0xa920bdd7baffb24dUL, 0x7c54699f122d400eUL, + /* 63 */ 0xef46c8e14fa94bc8UL, 0xe0b074ce2952ed5eUL, + 0xbea450e1dbd885d5UL, 0x61b68649320f712cUL, + /* 64 */ 0x8a485f7309ccbdd1UL, 0xbd06320d7d4d1a2dUL, + 0x25232973322dbef4UL, 0x445dc4758c17f770UL, + /* 65 */ 0xdb0434177cc8933cUL, 0xed6fe82175ea059fUL, + 0x1efebefdc053db34UL, 0x4adbe867c65daf99UL, + /* 66 */ 0x3acd71a2a90609dfUL, 0xe5e991856dd04050UL, + 0x1ec69b688157c23cUL, 0x697427f6885cfe4dUL, + /* 67 */ 0xd7be7b9b65e1a851UL, 0xa03d28d522c536ddUL, + 0x28399d658fd2b645UL, 0x49e5b7e17c2641e1UL, + /* 68 */ 0x6f8c3a98700457a4UL, 0x5078f0a25ebb6778UL, + 0xd13c3ccbc382960fUL, 0x2e003258a7df84b1UL, + /* 69 */ 0x8ad1f39be6296a1cUL, 0xc1eeaa652a5fbfb2UL, + 0x33ee0673fd26f3cbUL, 0x59256173a69d2cccUL, + /* 70 */ 0x41ea07aa4e18fc41UL, 0xd9fc19527c87a51eUL, + 0xbdaacb805831ca6fUL, 0x445b652dc916694fUL, + /* 71 */ 0xce92a3a7f2172315UL, 0x1edc282de11b9964UL, + 0xa1823aafe04c314aUL, 0x790a2d94437cf586UL, + /* 72 */ 0x71c447fb93f6e009UL, 0x8922a56722845276UL, + 0xbf70903b204f5169UL, 0x2f7a89891ba319feUL, + /* 73 */ 0x02a08eb577e2140cUL, 0xed9a4ed4427bdcf4UL, + 0x5253ec44e4323cd1UL, 0x3e88363c14e9355bUL, + /* 74 */ 0xaa66c14277110b8cUL, 0x1ae0391610a23390UL, + 0x2030bd12c93fc2a2UL, 0x3ee141579555c7abUL, + /* 75 */ 0x9214de3a6d6e7d41UL, 0x3ccdd88607f17efeUL, + 0x674f1288f8e11217UL, 0x5682250f329f93d0UL, + /* 76 */ 0x6cf00b136d2e396eUL, 0x6e4cf86f1014debfUL, + 0x5930b1b5bfcc4e83UL, 0x047069b48aba16b6UL, + /* 77 */ 0x0d4ce4ab69b20793UL, 0xb24db91a97d0fb9eUL, + 0xcdfa50f54e00d01dUL, 0x221b1085368bddb5UL, + /* 78 */ 0xe7e59468b1e3d8d2UL, 0x53c56563bd122f93UL, + 0xeee8a903e0663f09UL, 0x61efa662cbbe3d42UL, + /* 79 */ 0x2cf8ddddde6eab2aUL, 0x9bf80ad51435f231UL, + 0x5deadacec9f04973UL, 0x29275b5d41d29b27UL, + /* 80 */ 0xcfde0f0895ebf14fUL, 0xb9aab96b054905a7UL, + 0xcae80dd9a1c420fdUL, 0x0a63bf2f1673bbc7UL, + /* 81 */ 0x092f6e11958fbc8cUL, 0x672a81e804822fadUL, + 0xcac8351560d52517UL, 0x6f3f7722c8f192f8UL, + /* 82 */ 0xf8ba90ccc2e894b7UL, 0x2c7557a438ff9f0dUL, + 0x894d1d855ae52359UL, 0x68e122157b743d69UL, + /* 83 */ 0xd87e5570cfb919f3UL, 0x3f2cdecd95798db9UL, + 0x2121154710c0a2ceUL, 0x3c66a115246dc5b2UL, + /* 84 */ 0xcbedc562294ecb72UL, 0xba7143c36a280b16UL, + 0x9610c2efd4078b67UL, 0x6144735d946a4b1eUL, + /* 85 */ 0x536f111ed75b3350UL, 0x0211db8c2041d81bUL, + 0xf93cb1000e10413cUL, 0x149dfd3c039e8876UL, + /* 86 */ 0xd479dde46b63155bUL, 0xb66e15e93c837976UL, + 0xdafde43b1f13e038UL, 0x5fafda1a2e4b0b35UL, + /* 87 */ 0x3600bbdf17197581UL, 0x3972050bbe3cd2c2UL, + 0x5938906dbdd5be86UL, 0x34fce5e43f9b860fUL, + /* 88 */ 0x75a8a4cd42d14d02UL, 0x828dabc53441df65UL, + 0x33dcabedd2e131d3UL, 0x3ebad76fb814d25fUL, + /* 89 */ 0xd4906f566f70e10fUL, 0x5d12f7aa51690f5aUL, + 0x45adb16e76cefcf2UL, 0x01f768aead232999UL, + /* 90 */ 0x2b6cc77b6248febdUL, 0x3cd30628ec3aaffdUL, + 0xce1c0b80d4ef486aUL, 0x4c3bff2ea6f66c23UL, + /* 91 */ 0x3f2ec4094aeaeb5fUL, 0x61b19b286e372ca7UL, + 0x5eefa966de2a701dUL, 0x23b20565de55e3efUL, + /* 92 */ 0xe301ca5279d58557UL, 0x07b2d4ce27c2874fUL, + 0xa532cd8a9dcf1d67UL, 0x2a52fee23f2bff56UL, + /* 93 */ 0x8624efb37cd8663dUL, 0xbbc7ac20ffbd7594UL, + 0x57b85e9c82d37445UL, 0x7b3052cb86a6ec66UL, + /* 94 */ 0x3482f0ad2525e91eUL, 0x2cb68043d28edca0UL, + 0xaf4f6d052e1b003aUL, 0x185f8c2529781b0aUL, + /* 95 */ 0xaa41de5bd80ce0d6UL, 0x9407b2416853e9d6UL, + 0x563ec36e357f4c3aUL, 0x4cc4b8dd0e297bceUL, + /* 96 */ 0xa2fc1a52ffb8730eUL, 0x1811f16e67058e37UL, + 0x10f9a366cddf4ee1UL, 0x72f4a0c4a0b9f099UL, + /* 97 */ 0x8c16c06f663f4ea7UL, 0x693b3af74e970fbaUL, + 0x2102e7f1d69ec345UL, 0x0ba53cbc968a8089UL, + /* 98 */ 0xca3d9dc7fea15537UL, 0x4c6824bb51536493UL, + 0xb9886314844006b1UL, 0x40d2a72ab454cc60UL, + /* 99 */ 0x5936a1b712570975UL, 0x91b9d648debda657UL, + 0x3344094bb64330eaUL, 0x006ba10d12ee51d0UL, + /* 100 */ 0x19228468f5de5d58UL, 0x0eb12f4c38cc05b0UL, + 0xa1039f9dd5601990UL, 0x4502d4ce4fff0e0bUL, + /* 101 */ 0xeb2054106837c189UL, 0xd0f6544c6dd3b93cUL, + 0x40727064c416d74fUL, 0x6e15c6114b502ef0UL, + /* 102 */ 0x4df2a398cfb1a76bUL, 0x11256c7419f2f6b1UL, + 0x4a497962066e6043UL, 0x705b3aab41355b44UL, + /* 103 */ 0x365ef536d797b1d8UL, 0x00076bd622ddf0dbUL, + 0x3bbf33b0e0575a88UL, 0x3777aa05c8e4ca4dUL, + /* 104 */ 0x392745c85578db5fUL, 0x6fda4149dbae5ae2UL, + 0xb1f0b00b8adc9867UL, 0x09963437d36f1da3UL, + /* 105 */ 0x7e824e90a5dc3853UL, 0xccb5f6641f135cbdUL, + 0x6736d86c87ce8fccUL, 0x625f3ce26604249fUL, + /* 106 */ 0xaf8ac8059502f63fUL, 0x0c05e70a2e351469UL, + 0x35292e9c764b6305UL, 0x1a394360c7e23ac3UL, + /* 107 */ 0xd5c6d53251183264UL, 0x62065abd43c2b74fUL, + 0xb5fbf5d03b973f9bUL, 0x13a3da3661206e5eUL, + /* 108 */ 0xc6bd5837725d94e5UL, 0x18e30912205016c5UL, + 0x2088ce1570033c68UL, 0x7fba1f495c837987UL, + /* 109 */ 0x5a8c7423f2f9079dUL, 0x1735157b34023fc5UL, + 0xe4f9b49ad2fab351UL, 0x6691ff72c878e33cUL, + /* 110 */ 0x122c2adedc5eff3eUL, 0xf8dd4bf1d8956cf4UL, + 0xeb86205d9e9e5bdaUL, 0x049b92b9d975c743UL, + /* 111 */ 0xa5379730b0f6c05aUL, 0x72a0ffacc6f3a553UL, + 0xb0032c34b20dcd6dUL, 0x470e9dbc88d5164aUL, + /* 112 */ 0xb19cf10ca237c047UL, 0xb65466711f6c81a2UL, + 0xb3321bd16dd80b43UL, 0x48c14f600c5fbe8eUL, + /* 113 */ 0x66451c264aa6c803UL, 0xb66e3904a4fa7da6UL, + 0xd45f19b0b3128395UL, 0x31602627c3c9bc10UL, + /* 114 */ 0x3120dc4832e4e10dUL, 0xeb20c46756c717f7UL, + 0x00f52e3f67280294UL, 0x566d4fc14730c509UL, + /* 115 */ 0x7e3a5d40fd837206UL, 0xc1e926dc7159547aUL, + 0x216730fba68d6095UL, 0x22e8c3843f69cea7UL, + /* 116 */ 0x33d074e8930e4b2bUL, 0xb6e4350e84d15816UL, + 0x5534c26ad6ba2365UL, 0x7773c12f89f1f3f3UL, + /* 117 */ 0x8cba404da57962aaUL, 0x5b9897a81999ce56UL, + 0x508e862f121692fcUL, 0x3a81907fa093c291UL, + /* 118 */ 0x0dded0ff4725a510UL, 0x10d8cc10673fc503UL, + 0x5b9d151c9f1f4e89UL, 0x32a5c1d5cb09a44cUL, + /* 119 */ 0x1e0aa442b90541fbUL, 0x5f85eb7cc1b485dbUL, + 0xbee595ce8a9df2e5UL, 0x25e496c722422236UL, + /* 120 */ 0x5edf3c46cd0fe5b9UL, 0x34e75a7ed2a43388UL, + 0xe488de11d761e352UL, 0x0e878a01a085545cUL, + /* 121 */ 0xba493c77e021bb04UL, 0x2b4d1843c7df899aUL, + 0x9ea37a487ae80d67UL, 0x67a9958011e41794UL, + /* 122 */ 0x4b58051a6697b065UL, 0x47e33f7d8d6ba6d4UL, + 0xbb4da8d483ca46c1UL, 0x68becaa181c2db0dUL, + /* 123 */ 0x8d8980e90b989aa5UL, 0xf95eb14a2c93c99bUL, + 0x51c6c7c4796e73a2UL, 0x6e228363b5efb569UL, + /* 124 */ 0xc6bbc0b02dd624c8UL, 0x777eb47dec8170eeUL, + 0x3cde15a004cfafa9UL, 0x1dc6bc087160bf9bUL, + /* 125 */ 0x2e07e043eec34002UL, 0x18e9fc677a68dc7fUL, + 0xd8da03188bd15b9aUL, 0x48fbc3bb00568253UL, + /* 126 */ 0x57547d4cfb654ce1UL, 0xd3565b82a058e2adUL, + 0xf63eaf0bbf154478UL, 0x47531ef114dfbb18UL, + /* 127 */ 0xe1ec630a4278c587UL, 0x5507d546ca8e83f3UL, + 0x85e135c63adc0c2bUL, 0x0aa7efa85682844eUL, + /* 128 */ 0x72691ba8b3e1f615UL, 0x32b4e9701fbe3ffaUL, + 0x97b6d92e39bb7868UL, 0x2cfe53dea02e39e8UL, + /* 129 */ 0x687392cd85cd52b0UL, 0x27ff66c910e29831UL, + 0x97134556a9832d06UL, 0x269bb0360a84f8a0UL, + /* 130 */ 0x706e55457643f85cUL, 0x3734a48c9b597d1bUL, + 0x7aee91e8c6efa472UL, 0x5cd6abc198a9d9e0UL, + /* 131 */ 0x0e04de06cb3ce41aUL, 0xd8c6eb893402e138UL, + 0x904659bb686e3772UL, 0x7215c371746ba8c8UL, + /* 132 */ 0xfd12a97eeae4a2d9UL, 0x9514b7516394f2c5UL, + 0x266fd5809208f294UL, 0x5c847085619a26b9UL, + /* 133 */ 0x52985410fed694eaUL, 0x3c905b934a2ed254UL, + 0x10bb47692d3be467UL, 0x063b3d2d69e5e9e1UL, + /* 134 */ 0x472726eedda57debUL, 0xefb6c4ae10f41891UL, + 0x2b1641917b307614UL, 0x117c554fc4f45b7cUL, + /* 135 */ 0xc07cf3118f9d8812UL, 0x01dbd82050017939UL, + 0xd7e803f4171b2827UL, 0x1015e87487d225eaUL, + /* 136 */ 0xc58de3fed23acc4dUL, 0x50db91c294a7be2dUL, + 0x0b94d43d1c9cf457UL, 0x6b1640fa6e37524aUL, + /* 137 */ 0x692f346c5fda0d09UL, 0x200b1c59fa4d3151UL, + 0xb8c46f760777a296UL, 0x4b38395f3ffdfbcfUL, + /* 138 */ 0x18d25e00be54d671UL, 0x60d50582bec8aba6UL, + 0x87ad8f263b78b982UL, 0x50fdf64e9cda0432UL, + /* 139 */ 0x90f567aac578dcf0UL, 0xef1e9b0ef2a3133bUL, + 0x0eebba9242d9de71UL, 0x15473c9bf03101c7UL, + /* 140 */ 0x7c77e8ae56b78095UL, 0xb678e7666e6f078eUL, + 0x2da0b9615348ba1fUL, 0x7cf931c1ff733f0bUL, + /* 141 */ 0x26b357f50a0a366cUL, 0xe9708cf42b87d732UL, + 0xc13aeea5f91cb2c0UL, 0x35d90c991143bb4cUL, + /* 142 */ 0x47c1c404a9a0d9dcUL, 0x659e58451972d251UL, + 0x3875a8c473b38c31UL, 0x1fbd9ed379561f24UL, + /* 143 */ 0x11fabc6fd41ec28dUL, 0x7ef8dfe3cd2a2dcaUL, + 0x72e73b5d8c404595UL, 0x6135fa4954b72f27UL, + /* 144 */ 0xccfc32a2de24b69cUL, 0x3f55698c1f095d88UL, + 0xbe3350ed5ac3f929UL, 0x5e9bf806ca477eebUL, + /* 145 */ 0xe9ce8fb63c309f68UL, 0x5376f63565e1f9f4UL, + 0xd1afcfb35a6393f1UL, 0x6632a1ede5623506UL, + /* 146 */ 0x0b7d6c390c2ded4cUL, 0x56cb3281df04cb1fUL, + 0x66305a1249ecc3c7UL, 0x5d588b60a38ca72aUL, + /* 147 */ 0xa6ecbf78e8e5f42dUL, 0x86eeb44b3c8a3eecUL, + 0xec219c48fbd21604UL, 0x1aaf1af517c36731UL, + /* 148 */ 0xc306a2836769bde7UL, 0x208280622b1e2adbUL, + 0x8027f51ffbff94a6UL, 0x76cfa1ce1124f26bUL, + /* 149 */ 0x18eb00562422abb6UL, 0xf377c4d58f8c29c3UL, + 0x4dbbc207f531561aUL, 0x0253b7f082128a27UL, + /* 150 */ 0x3d1f091cb62c17e0UL, 0x4860e1abd64628a9UL, + 0x52d17436309d4253UL, 0x356f97e13efae576UL, + /* 151 */ 0xd351e11aa150535bUL, 0x3e6b45bb1dd878ccUL, + 0x0c776128bed92c98UL, 0x1d34ae93032885b8UL, + /* 152 */ 0x4ba0488ca85ba4c3UL, 0x985348c33c9ce6ceUL, + 0x66124c6f97bda770UL, 0x0f81a0290654124aUL, + /* 153 */ 0x9ed09ca6569b86fdUL, 0x811009fd18af9a2dUL, + 0xff08d03f93d8c20aUL, 0x52a148199faef26bUL, + /* 154 */ 0x3e03f9dc2d8d1b73UL, 0x4205801873961a70UL, + 0xc0d987f041a35970UL, 0x07aa1f15a1c0d549UL, + /* 155 */ 0xdfd46ce08cd27224UL, 0x6d0a024f934e4239UL, + 0x808a7a6399897b59UL, 0x0a4556e9e13d95a2UL, + /* 156 */ 0xd21a991fe9c13045UL, 0x9b0e8548fe7751b8UL, + 0x5da643cb4bf30035UL, 0x77db28d63940f721UL, + /* 157 */ 0xfc5eeb614adc9011UL, 0x5229419ae8c411ebUL, + 0x9ec3e7787d1dcf74UL, 0x340d053e216e4cb5UL, + /* 158 */ 0xcac7af39b48df2b4UL, 0xc0faec2871a10a94UL, + 0x140a69245ca575edUL, 0x0cf1c37134273a4cUL, + /* 159 */ 0xc8ee306ac224b8a5UL, 0x57eaee7ccb4930b0UL, + 0xa1e806bdaacbe74fUL, 0x7d9a62742eeb657dUL, + /* 160 */ 0x9eb6b6ef546c4830UL, 0x885cca1fddb36e2eUL, + 0xe6b9f383ef0d7105UL, 0x58654fef9d2e0412UL, + /* 161 */ 0xa905c4ffbe0e8e26UL, 0x942de5df9b31816eUL, + 0x497d723f802e88e1UL, 0x30684dea602f408dUL, + /* 162 */ 0x21e5a278a3e6cb34UL, 0xaefb6e6f5b151dc4UL, + 0xb30b8e049d77ca15UL, 0x28c3c9cf53b98981UL, + /* 163 */ 0x287fb721556cdd2aUL, 0x0d317ca897022274UL, + 0x7468c7423a543258UL, 0x4a7f11464eb5642fUL, + /* 164 */ 0xa237a4774d193aa6UL, 0xd865986ea92129a1UL, + 0x24c515ecf87c1a88UL, 0x604003575f39f5ebUL, + /* 165 */ 0x47b9f189570a9b27UL, 0x2b98cede465e4b78UL, + 0x026df551dbb85c20UL, 0x74fcd91047e21901UL, + /* 166 */ 0x13e2a90a23c1bfa3UL, 0x0cb0074e478519f6UL, + 0x5ff1cbbe3af6cf44UL, 0x67fe5438be812dbeUL, + /* 167 */ 0xd13cf64fa40f05b0UL, 0x054dfb2f32283787UL, + 0x4173915b7f0d2aeaUL, 0x482f144f1f610d4eUL, + /* 168 */ 0xf6210201b47f8234UL, 0x5d0ae1929e70b990UL, + 0xdcd7f455b049567cUL, 0x7e93d0f1f0916f01UL, + /* 169 */ 0xdd79cbf18a7db4faUL, 0xbe8391bf6f74c62fUL, + 0x027145d14b8291bdUL, 0x585a73ea2cbf1705UL, + /* 170 */ 0x485ca03e928a0db2UL, 0x10fc01a5742857e7UL, + 0x2f482edbd6d551a7UL, 0x0f0433b5048fdb8aUL, + /* 171 */ 0x60da2e8dd7dc6247UL, 0x88b4c9d38cd4819aUL, + 0x13033ac001f66697UL, 0x273b24fe3b367d75UL, + /* 172 */ 0xc6e8f66a31b3b9d4UL, 0x281514a494df49d5UL, + 0xd1726fdfc8b23da7UL, 0x4b3ae7d103dee548UL, + /* 173 */ 0xc6256e19ce4b9d7eUL, 0xff5c5cf186e3c61cUL, + 0xacc63ca34b8ec145UL, 0x74621888fee66574UL, + /* 174 */ 0x956f409645290a1eUL, 0xef0bf8e3263a962eUL, + 0xed6a50eb5ec2647bUL, 0x0694283a9dca7502UL, + /* 175 */ 0x769b963643a2dcd1UL, 0x42b7c8ea09fc5353UL, + 0x4f002aee13397eabUL, 0x63005e2c19b7d63aUL, + /* 176 */ 0xca6736da63023beaUL, 0x966c7f6db12a99b7UL, + 0xace09390c537c5e1UL, 0x0b696063a1aa89eeUL, + /* 177 */ 0xebb03e97288c56e5UL, 0x432a9f9f938c8be8UL, + 0xa6a5a93d5b717f71UL, 0x1a5fb4c3e18f9d97UL, + /* 178 */ 0x1c94e7ad1c60cdceUL, 0xee202a43fc02c4a0UL, + 0x8dafe4d867c46a20UL, 0x0a10263c8ac27b58UL, + /* 179 */ 0xd0dea9dfe4432a4aUL, 0x856af87bbe9277c5UL, + 0xce8472acc212c71aUL, 0x6f151b6d9bbb1e91UL, + /* 180 */ 0x26776c527ceed56aUL, 0x7d211cb7fbf8faecUL, + 0x37ae66a6fd4609ccUL, 0x1f81b702d2770c42UL, + /* 181 */ 0x2fb0b057eac58392UL, 0xe1dd89fe29744e9dUL, + 0xc964f8eb17beb4f8UL, 0x29571073c9a2d41eUL, + /* 182 */ 0xa948a18981c0e254UL, 0x2df6369b65b22830UL, + 0xa33eb2d75fcfd3c6UL, 0x078cd6ec4199a01fUL, + /* 183 */ 0x4a584a41ad900d2fUL, 0x32142b78e2c74c52UL, + 0x68c4e8338431c978UL, 0x7f69ea9008689fc2UL, + /* 184 */ 0x52f2c81e46a38265UL, 0xfd78072d04a832fdUL, + 0x8cd7d5fa25359e94UL, 0x4de71b7454cc29d2UL, + /* 185 */ 0x42eb60ad1eda6ac9UL, 0x0aad37dfdbc09c3aUL, + 0x81004b71e33cc191UL, 0x44e6be345122803cUL, + /* 186 */ 0x03fe8388ba1920dbUL, 0xf5d57c32150db008UL, + 0x49c8c4281af60c29UL, 0x21edb518de701aeeUL, + /* 187 */ 0x7fb63e418f06dc99UL, 0xa4460d99c166d7b8UL, + 0x24dd5248ce520a83UL, 0x5ec3ad712b928358UL, + /* 188 */ 0x15022a5fbd17930fUL, 0xa4f64a77d82570e3UL, + 0x12bc8d6915783712UL, 0x498194c0fc620abbUL, + /* 189 */ 0x38a2d9d255686c82UL, 0x785c6bd9193e21f0UL, + 0xe4d5c81ab24a5484UL, 0x56307860b2e20989UL, + /* 190 */ 0x429d55f78b4d74c4UL, 0x22f1834643350131UL, + 0x1e60c24598c71fffUL, 0x59f2f014979983efUL, + /* 191 */ 0x46a47d56eb494a44UL, 0x3e22a854d636a18eUL, + 0xb346e15274491c3bUL, 0x2ceafd4e5390cde7UL, + /* 192 */ 0xba8a8538be0d6675UL, 0x4b9074bb50818e23UL, + 0xcbdab89085d304c3UL, 0x61a24fe0e56192c4UL, + /* 193 */ 0xcb7615e6db525bcbUL, 0xdd7d8c35a567e4caUL, + 0xe6b4153acafcdd69UL, 0x2d668e097f3c9766UL, + /* 194 */ 0xa57e7e265ce55ef0UL, 0x5d9f4e527cd4b967UL, + 0xfbc83606492fd1e5UL, 0x090d52beb7c3f7aeUL, + /* 195 */ 0x09b9515a1e7b4d7cUL, 0x1f266a2599da44c0UL, + 0xa1c49548e2c55504UL, 0x7ef04287126f15ccUL, + /* 196 */ 0xfed1659dbd30ef15UL, 0x8b4ab9eec4e0277bUL, + 0x884d6236a5df3291UL, 0x1fd96ea6bf5cf788UL, + /* 197 */ 0x42a161981f190d9aUL, 0x61d849507e6052c1UL, + 0x9fe113bf285a2cd5UL, 0x7c22d676dbad85d8UL, + /* 198 */ 0x82e770ed2bfbd27dUL, 0x4c05b2ece996f5a5UL, + 0xcd40a9c2b0900150UL, 0x5895319213d9bf64UL, + /* 199 */ 0xe7cc5d703fea2e08UL, 0xb50c491258e2188cUL, + 0xcce30baa48205bf0UL, 0x537c659ccfa32d62UL, + /* 200 */ 0x37b6623a98cfc088UL, 0xfe9bed1fa4d6aca4UL, + 0x04d29b8e56a8d1b0UL, 0x725f71c40b519575UL, + /* 201 */ 0x28c7f89cd0339ce6UL, 0x8367b14469ddc18bUL, + 0x883ada83a6a1652cUL, 0x585f1974034d6c17UL, + /* 202 */ 0x89cfb266f1b19188UL, 0xe63b4863e7c35217UL, + 0xd88c9da6b4c0526aUL, 0x3e035c9df0954635UL, + /* 203 */ 0xdd9d5412fb45de9dUL, 0xdd684532e4cff40dUL, + 0x4b5c999b151d671cUL, 0x2d8c2cc811e7f690UL, + /* 204 */ 0x7f54be1d90055d40UL, 0xa464c5df464aaf40UL, + 0x33979624f0e917beUL, 0x2c018dc527356b30UL, + /* 205 */ 0xa5415024e330b3d4UL, 0x73ff3d96691652d3UL, + 0x94ec42c4ef9b59f1UL, 0x0747201618d08e5aUL, + /* 206 */ 0x4d6ca48aca411c53UL, 0x66415f2fcfa66119UL, + 0x9c4dd40051e227ffUL, 0x59810bc09a02f7ebUL, + /* 207 */ 0x2a7eb171b3dc101dUL, 0x441c5ab99ffef68eUL, + 0x32025c9b93b359eaUL, 0x5e8ce0a71e9d112fUL, + /* 208 */ 0xbfcccb92429503fdUL, 0xd271ba752f095d55UL, + 0x345ead5e972d091eUL, 0x18c8df11a83103baUL, + /* 209 */ 0x90cd949a9aed0f4cUL, 0xc5d1f4cb6660e37eUL, + 0xb8cac52d56c52e0bUL, 0x6e42e400c5808e0dUL, + /* 210 */ 0xa3b46966eeaefd23UL, 0x0c4f1f0be39ecdcaUL, + 0x189dc8c9d683a51dUL, 0x51f27f054c09351bUL, + /* 211 */ 0x4c487ccd2a320682UL, 0x587ea95bb3df1c96UL, + 0xc8ccf79e555cb8e8UL, 0x547dc829a206d73dUL, + /* 212 */ 0xb822a6cd80c39b06UL, 0xe96d54732000d4c6UL, + 0x28535b6f91463b4dUL, 0x228f4660e2486e1dUL, + /* 213 */ 0x98799538de8d3abfUL, 0x8cd8330045ebca6eUL, + 0x79952a008221e738UL, 0x4322e1a7535cd2bbUL, + /* 214 */ 0xb114c11819d1801cUL, 0x2016e4d84f3f5ec7UL, + 0xdd0e2df409260f4cUL, 0x5ec362c0ae5f7266UL, + /* 215 */ 0xc0462b18b8b2b4eeUL, 0x7cc8d950274d1afbUL, + 0xf25f7105436b02d2UL, 0x43bbf8dcbff9ccd3UL, + /* 216 */ 0xb6ad1767a039e9dfUL, 0xb0714da8f69d3583UL, + 0x5e55fa18b42931f5UL, 0x4ed5558f33c60961UL, + /* 217 */ 0x1fe37901c647a5ddUL, 0x593ddf1f8081d357UL, + 0x0249a4fd813fd7a6UL, 0x69acca274e9caf61UL, + /* 218 */ 0x047ba3ea330721c9UL, 0x83423fc20e7e1ea0UL, + 0x1df4c0af01314a60UL, 0x09a62dab89289527UL, + /* 219 */ 0xa5b325a49cc6cb00UL, 0xe94b5dc654b56cb6UL, + 0x3be28779adc994a0UL, 0x4296e8f8ba3a4aadUL, + /* 220 */ 0x328689761e451eabUL, 0x2e4d598bff59594aUL, + 0x49b96853d7a7084aUL, 0x4980a319601420a8UL, + /* 221 */ 0x9565b9e12f552c42UL, 0x8a5318db7100fe96UL, + 0x05c90b4d43add0d7UL, 0x538b4cd66a5d4edaUL, + /* 222 */ 0xf4e94fc3e89f039fUL, 0x592c9af26f618045UL, + 0x08a36eb5fd4b9550UL, 0x25fffaf6c2ed1419UL, + /* 223 */ 0x34434459cc79d354UL, 0xeeecbfb4b1d5476bUL, + 0xddeb34a061615d99UL, 0x5129cecceb64b773UL, + /* 224 */ 0xee43215894993520UL, 0x772f9c7cf14c0b3bUL, + 0xd2e2fce306bedad5UL, 0x715f42b546f06a97UL, + /* 225 */ 0x434ecdceda5b5f1aUL, 0x0da17115a49741a9UL, + 0x680bd77c73edad2eUL, 0x487c02354edd9041UL, + /* 226 */ 0xb8efeff3a70ed9c4UL, 0x56a32aa3e857e302UL, + 0xdf3a68bd48a2a5a0UL, 0x07f650b73176c444UL, + /* 227 */ 0xe38b9b1626e0ccb1UL, 0x79e053c18b09fb36UL, + 0x56d90319c9f94964UL, 0x1ca941e7ac9ff5c4UL, + /* 228 */ 0x49c4df29162fa0bbUL, 0x8488cf3282b33305UL, + 0x95dfda14cabb437dUL, 0x3391f78264d5ad86UL, + /* 229 */ 0x729ae06ae2b5095dUL, 0xd58a58d73259a946UL, + 0xe9834262d13921edUL, 0x27fedafaa54bb592UL, + /* 230 */ 0xa99dc5b829ad48bbUL, 0x5f025742499ee260UL, + 0x802c8ecd5d7513fdUL, 0x78ceb3ef3f6dd938UL, + /* 231 */ 0xc342f44f8a135d94UL, 0x7b9edb44828cdda3UL, + 0x9436d11a0537cfe7UL, 0x5064b164ec1ab4c8UL, + /* 232 */ 0x7020eccfd37eb2fcUL, 0x1f31ea3ed90d25fcUL, + 0x1b930d7bdfa1bb34UL, 0x5344467a48113044UL, + /* 233 */ 0x70073170f25e6dfbUL, 0xe385dc1a50114cc8UL, + 0x2348698ac8fc4f00UL, 0x2a77a55284dd40d8UL, + /* 234 */ 0xfe06afe0c98c6ce4UL, 0xc235df96dddfd6e4UL, + 0x1428d01e33bf1ed3UL, 0x785768ec9300bdafUL, + /* 235 */ 0x9702e57a91deb63bUL, 0x61bdb8bfe5ce8b80UL, + 0x645b426f3d1d58acUL, 0x4804a82227a557bcUL, + /* 236 */ 0x8e57048ab44d2601UL, 0x68d6501a4b3a6935UL, + 0xc39c9ec3f9e1c293UL, 0x4172f257d4de63e2UL, + /* 237 */ 0xd368b450330c6401UL, 0x040d3017418f2391UL, + 0x2c34bb6090b7d90dUL, 0x16f649228fdfd51fUL, + /* 238 */ 0xbea6818e2b928ef5UL, 0xe28ccf91cdc11e72UL, + 0x594aaa68e77a36cdUL, 0x313034806c7ffd0fUL, + /* 239 */ 0x8a9d27ac2249bd65UL, 0x19a3b464018e9512UL, + 0xc26ccff352b37ec7UL, 0x056f68341d797b21UL, + /* 240 */ 0x5e79d6757efd2327UL, 0xfabdbcb6553afe15UL, + 0xd3e7222c6eaf5a60UL, 0x7046c76d4dae743bUL, + /* 241 */ 0x660be872b18d4a55UL, 0x19992518574e1496UL, + 0xc103053a302bdcbbUL, 0x3ed8e9800b218e8eUL, + /* 242 */ 0x7b0b9239fa75e03eUL, 0xefe9fb684633c083UL, + 0x98a35fbe391a7793UL, 0x6065510fe2d0fe34UL, + /* 243 */ 0x55cb668548abad0cUL, 0xb4584548da87e527UL, + 0x2c43ecea0107c1ddUL, 0x526028809372de35UL, + /* 244 */ 0x3415c56af9213b1fUL, 0x5bee1a4d017e98dbUL, + 0x13f6b105b5cf709bUL, 0x5ff20e3482b29ab6UL, + /* 245 */ 0x0aa29c75cc2e6c90UL, 0xfc7d73ca3a70e206UL, + 0x899fc38fc4b5c515UL, 0x250386b124ffc207UL, + /* 246 */ 0x54ea28d5ae3d2b56UL, 0x9913149dd6de60ceUL, + 0x16694fc58f06d6c1UL, 0x46b23975eb018fc7UL, + /* 247 */ 0x470a6a0fb4b7b4e2UL, 0x5d92475a8f7253deUL, + 0xabeee5b52fbd3adbUL, 0x7fa20801a0806968UL, + /* 248 */ 0x76f3faf19f7714d2UL, 0xb3e840c12f4660c3UL, + 0x0fb4cd8df212744eUL, 0x4b065a251d3a2dd2UL, + /* 249 */ 0x5cebde383d77cd4aUL, 0x6adf39df882c9cb1UL, + 0xa2dd242eb09af759UL, 0x3147c0e50e5f6422UL, + /* 250 */ 0x164ca5101d1350dbUL, 0xf8d13479c33fc962UL, + 0xe640ce4d13e5da08UL, 0x4bdee0c45061f8baUL, + /* 251 */ 0xd7c46dc1a4edb1c9UL, 0x5514d7b6437fd98aUL, + 0x58942f6bb2a1c00bUL, 0x2dffb2ab1d70710eUL, + /* 252 */ 0xccdfcf2fc18b6d68UL, 0xa8ebcba8b7806167UL, + 0x980697f95e2937e3UL, 0x02fbba1cd0126e8cUL +}; + +/* c is two 512-bit products: c0[0:7]=a0[0:3]*b0[0:3] and c1[8:15]=a1[4:7]*b1[4:7] + * a is two 256-bit integers: a0[0:3] and a1[4:7] + * b is two 256-bit integers: b0[0:3] and b1[4:7] + */ +static void mul2_256x256_integer_adx(u64 *const c, const u64 *const a, + const u64 *const b) +{ + asm volatile( + "xorl %%r14d, %%r14d ;" + "movq (%1), %%rdx; " /* A[0] */ + "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */ + "xorl %%r10d, %%r10d ;" + "movq %%r8, (%0) ;" + "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */ + "adox %%r10, %%r15 ;" + "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */ + "adox %%r8, %%rax ;" + "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */ + "adox %%r10, %%rbx ;" + /******************************************/ + "adox %%r14, %%rcx ;" + + "movq 8(%1), %%rdx; " /* A[1] */ + "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */ + "adox %%r15, %%r8 ;" + "movq %%r8, 8(%0) ;" + "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */ + "adox %%r10, %%r9 ;" + "adcx %%r9, %%rax ;" + "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */ + "adox %%r8, %%r11 ;" + "adcx %%r11, %%rbx ;" + "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */ + "adox %%r10, %%r13 ;" + "adcx %%r13, %%rcx ;" + /******************************************/ + "adox %%r14, %%r15 ;" + "adcx %%r14, %%r15 ;" + + "movq 16(%1), %%rdx; " /* A[2] */ + "xorl %%r10d, %%r10d ;" + "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */ + "adox %%rax, %%r8 ;" + "movq %%r8, 16(%0) ;" + "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */ + "adox %%r10, %%r9 ;" + "adcx %%r9, %%rbx ;" + "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */ + "adox %%r8, %%r11 ;" + "adcx %%r11, %%rcx ;" + "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */ + "adox %%r10, %%r13 ;" + "adcx %%r13, %%r15 ;" + /******************************************/ + "adox %%r14, %%rax ;" + "adcx %%r14, %%rax ;" + + "movq 24(%1), %%rdx; " /* A[3] */ + "xorl %%r10d, %%r10d ;" + "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */ + "adox %%rbx, %%r8 ;" + "movq %%r8, 24(%0) ;" + "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */ + "adox %%r10, %%r9 ;" + "adcx %%r9, %%rcx ;" + "movq %%rcx, 32(%0) ;" + "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */ + "adox %%r8, %%r11 ;" + "adcx %%r11, %%r15 ;" + "movq %%r15, 40(%0) ;" + "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */ + "adox %%r10, %%r13 ;" + "adcx %%r13, %%rax ;" + "movq %%rax, 48(%0) ;" + /******************************************/ + "adox %%r14, %%rbx ;" + "adcx %%r14, %%rbx ;" + "movq %%rbx, 56(%0) ;" + + "movq 32(%1), %%rdx; " /* C[0] */ + "mulx 32(%2), %%r8, %%r15; " /* C[0]*D[0] */ + "xorl %%r10d, %%r10d ;" + "movq %%r8, 64(%0);" + "mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */ + "adox %%r10, %%r15 ;" + "mulx 48(%2), %%r8, %%rbx; " /* C[0]*D[2] */ + "adox %%r8, %%rax ;" + "mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */ + "adox %%r10, %%rbx ;" + /******************************************/ + "adox %%r14, %%rcx ;" + + "movq 40(%1), %%rdx; " /* C[1] */ + "xorl %%r10d, %%r10d ;" + "mulx 32(%2), %%r8, %%r9; " /* C[1]*D[0] */ + "adox %%r15, %%r8 ;" + "movq %%r8, 72(%0);" + "mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */ + "adox %%r10, %%r9 ;" + "adcx %%r9, %%rax ;" + "mulx 48(%2), %%r8, %%r13; " /* C[1]*D[2] */ + "adox %%r8, %%r11 ;" + "adcx %%r11, %%rbx ;" + "mulx 56(%2), %%r10, %%r15; " /* C[1]*D[3] */ + "adox %%r10, %%r13 ;" + "adcx %%r13, %%rcx ;" + /******************************************/ + "adox %%r14, %%r15 ;" + "adcx %%r14, %%r15 ;" + + "movq 48(%1), %%rdx; " /* C[2] */ + "xorl %%r10d, %%r10d ;" + "mulx 32(%2), %%r8, %%r9; " /* C[2]*D[0] */ + "adox %%rax, %%r8 ;" + "movq %%r8, 80(%0);" + "mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */ + "adox %%r10, %%r9 ;" + "adcx %%r9, %%rbx ;" + "mulx 48(%2), %%r8, %%r13; " /* C[2]*D[2] */ + "adox %%r8, %%r11 ;" + "adcx %%r11, %%rcx ;" + "mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */ + "adox %%r10, %%r13 ;" + "adcx %%r13, %%r15 ;" + /******************************************/ + "adox %%r14, %%rax ;" + "adcx %%r14, %%rax ;" + + "movq 56(%1), %%rdx; " /* C[3] */ + "xorl %%r10d, %%r10d ;" + "mulx 32(%2), %%r8, %%r9; " /* C[3]*D[0] */ + "adox %%rbx, %%r8 ;" + "movq %%r8, 88(%0);" + "mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */ + "adox %%r10, %%r9 ;" + "adcx %%r9, %%rcx ;" + "movq %%rcx, 96(%0) ;" + "mulx 48(%2), %%r8, %%r13; " /* C[3]*D[2] */ + "adox %%r8, %%r11 ;" + "adcx %%r11, %%r15 ;" + "movq %%r15, 104(%0) ;" + "mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */ + "adox %%r10, %%r13 ;" + "adcx %%r13, %%rax ;" + "movq %%rax, 112(%0) ;" + /******************************************/ + "adox %%r14, %%rbx ;" + "adcx %%r14, %%rbx ;" + "movq %%rbx, 120(%0) ;" + : + : "r"(c), "r"(a), "r"(b) + : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", + "%r10", "%r11", "%r13", "%r14", "%r15"); +} + +static void mul2_256x256_integer_bmi2(u64 *const c, const u64 *const a, + const u64 *const b) +{ + asm volatile( + "movq (%1), %%rdx; " /* A[0] */ + "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */ + "movq %%r8, (%0) ;" + "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */ + "addq %%r10, %%r15 ;" + "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */ + "adcq %%r8, %%rax ;" + "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */ + "adcq %%r10, %%rbx ;" + /******************************************/ + "adcq $0, %%rcx ;" + + "movq 8(%1), %%rdx; " /* A[1] */ + "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */ + "addq %%r15, %%r8 ;" + "movq %%r8, 8(%0) ;" + "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */ + "adcq %%r10, %%r9 ;" + "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */ + "adcq %%r8, %%r11 ;" + "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */ + "adcq %%r10, %%r13 ;" + /******************************************/ + "adcq $0, %%r15 ;" + + "addq %%r9, %%rax ;" + "adcq %%r11, %%rbx ;" + "adcq %%r13, %%rcx ;" + "adcq $0, %%r15 ;" + + "movq 16(%1), %%rdx; " /* A[2] */ + "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */ + "addq %%rax, %%r8 ;" + "movq %%r8, 16(%0) ;" + "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */ + "adcq %%r10, %%r9 ;" + "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */ + "adcq %%r8, %%r11 ;" + "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */ + "adcq %%r10, %%r13 ;" + /******************************************/ + "adcq $0, %%rax ;" + + "addq %%r9, %%rbx ;" + "adcq %%r11, %%rcx ;" + "adcq %%r13, %%r15 ;" + "adcq $0, %%rax ;" + + "movq 24(%1), %%rdx; " /* A[3] */ + "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */ + "addq %%rbx, %%r8 ;" + "movq %%r8, 24(%0) ;" + "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */ + "adcq %%r10, %%r9 ;" + "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */ + "adcq %%r8, %%r11 ;" + "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */ + "adcq %%r10, %%r13 ;" + /******************************************/ + "adcq $0, %%rbx ;" + + "addq %%r9, %%rcx ;" + "movq %%rcx, 32(%0) ;" + "adcq %%r11, %%r15 ;" + "movq %%r15, 40(%0) ;" + "adcq %%r13, %%rax ;" + "movq %%rax, 48(%0) ;" + "adcq $0, %%rbx ;" + "movq %%rbx, 56(%0) ;" + + "movq 32(%1), %%rdx; " /* C[0] */ + "mulx 32(%2), %%r8, %%r15; " /* C[0]*D[0] */ + "movq %%r8, 64(%0) ;" + "mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */ + "addq %%r10, %%r15 ;" + "mulx 48(%2), %%r8, %%rbx; " /* C[0]*D[2] */ + "adcq %%r8, %%rax ;" + "mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */ + "adcq %%r10, %%rbx ;" + /******************************************/ + "adcq $0, %%rcx ;" + + "movq 40(%1), %%rdx; " /* C[1] */ + "mulx 32(%2), %%r8, %%r9; " /* C[1]*D[0] */ + "addq %%r15, %%r8 ;" + "movq %%r8, 72(%0) ;" + "mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */ + "adcq %%r10, %%r9 ;" + "mulx 48(%2), %%r8, %%r13; " /* C[1]*D[2] */ + "adcq %%r8, %%r11 ;" + "mulx 56(%2), %%r10, %%r15; " /* C[1]*D[3] */ + "adcq %%r10, %%r13 ;" + /******************************************/ + "adcq $0, %%r15 ;" + + "addq %%r9, %%rax ;" + "adcq %%r11, %%rbx ;" + "adcq %%r13, %%rcx ;" + "adcq $0, %%r15 ;" + + "movq 48(%1), %%rdx; " /* C[2] */ + "mulx 32(%2), %%r8, %%r9; " /* C[2]*D[0] */ + "addq %%rax, %%r8 ;" + "movq %%r8, 80(%0) ;" + "mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */ + "adcq %%r10, %%r9 ;" + "mulx 48(%2), %%r8, %%r13; " /* C[2]*D[2] */ + "adcq %%r8, %%r11 ;" + "mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */ + "adcq %%r10, %%r13 ;" + /******************************************/ + "adcq $0, %%rax ;" + + "addq %%r9, %%rbx ;" + "adcq %%r11, %%rcx ;" + "adcq %%r13, %%r15 ;" + "adcq $0, %%rax ;" + + "movq 56(%1), %%rdx; " /* C[3] */ + "mulx 32(%2), %%r8, %%r9; " /* C[3]*D[0] */ + "addq %%rbx, %%r8 ;" + "movq %%r8, 88(%0) ;" + "mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */ + "adcq %%r10, %%r9 ;" + "mulx 48(%2), %%r8, %%r13; " /* C[3]*D[2] */ + "adcq %%r8, %%r11 ;" + "mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */ + "adcq %%r10, %%r13 ;" + /******************************************/ + "adcq $0, %%rbx ;" + + "addq %%r9, %%rcx ;" + "movq %%rcx, 96(%0) ;" + "adcq %%r11, %%r15 ;" + "movq %%r15, 104(%0) ;" + "adcq %%r13, %%rax ;" + "movq %%rax, 112(%0) ;" + "adcq $0, %%rbx ;" + "movq %%rbx, 120(%0) ;" + : + : "r"(c), "r"(a), "r"(b) + : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", + "%r10", "%r11", "%r13", "%r15"); +} + +static void sqr2_256x256_integer_adx(u64 *const c, const u64 *const a) +{ + asm volatile( + "movq (%1), %%rdx ;" /* A[0] */ + "mulx 8(%1), %%r8, %%r14 ;" /* A[1]*A[0] */ + "xorl %%r15d, %%r15d;" + "mulx 16(%1), %%r9, %%r10 ;" /* A[2]*A[0] */ + "adcx %%r14, %%r9 ;" + "mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */ + "adcx %%rax, %%r10 ;" + "movq 24(%1), %%rdx ;" /* A[3] */ + "mulx 8(%1), %%r11, %%rbx ;" /* A[1]*A[3] */ + "adcx %%rcx, %%r11 ;" + "mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */ + "adcx %%rax, %%rbx ;" + "movq 8(%1), %%rdx ;" /* A[1] */ + "adcx %%r15, %%r13 ;" + "mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */ + "movq $0, %%r14 ;" + /******************************************/ + "adcx %%r15, %%r14 ;" + + "xorl %%r15d, %%r15d;" + "adox %%rax, %%r10 ;" + "adcx %%r8, %%r8 ;" + "adox %%rcx, %%r11 ;" + "adcx %%r9, %%r9 ;" + "adox %%r15, %%rbx ;" + "adcx %%r10, %%r10 ;" + "adox %%r15, %%r13 ;" + "adcx %%r11, %%r11 ;" + "adox %%r15, %%r14 ;" + "adcx %%rbx, %%rbx ;" + "adcx %%r13, %%r13 ;" + "adcx %%r14, %%r14 ;" + + "movq (%1), %%rdx ;" + "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */ + /*******************/ + "movq %%rax, 0(%0) ;" + "addq %%rcx, %%r8 ;" + "movq %%r8, 8(%0) ;" + "movq 8(%1), %%rdx ;" + "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */ + "adcq %%rax, %%r9 ;" + "movq %%r9, 16(%0) ;" + "adcq %%rcx, %%r10 ;" + "movq %%r10, 24(%0) ;" + "movq 16(%1), %%rdx ;" + "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */ + "adcq %%rax, %%r11 ;" + "movq %%r11, 32(%0) ;" + "adcq %%rcx, %%rbx ;" + "movq %%rbx, 40(%0) ;" + "movq 24(%1), %%rdx ;" + "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */ + "adcq %%rax, %%r13 ;" + "movq %%r13, 48(%0) ;" + "adcq %%rcx, %%r14 ;" + "movq %%r14, 56(%0) ;" + + + "movq 32(%1), %%rdx ;" /* B[0] */ + "mulx 40(%1), %%r8, %%r14 ;" /* B[1]*B[0] */ + "xorl %%r15d, %%r15d;" + "mulx 48(%1), %%r9, %%r10 ;" /* B[2]*B[0] */ + "adcx %%r14, %%r9 ;" + "mulx 56(%1), %%rax, %%rcx ;" /* B[3]*B[0] */ + "adcx %%rax, %%r10 ;" + "movq 56(%1), %%rdx ;" /* B[3] */ + "mulx 40(%1), %%r11, %%rbx ;" /* B[1]*B[3] */ + "adcx %%rcx, %%r11 ;" + "mulx 48(%1), %%rax, %%r13 ;" /* B[2]*B[3] */ + "adcx %%rax, %%rbx ;" + "movq 40(%1), %%rdx ;" /* B[1] */ + "adcx %%r15, %%r13 ;" + "mulx 48(%1), %%rax, %%rcx ;" /* B[2]*B[1] */ + "movq $0, %%r14 ;" + /******************************************/ + "adcx %%r15, %%r14 ;" + + "xorl %%r15d, %%r15d;" + "adox %%rax, %%r10 ;" + "adcx %%r8, %%r8 ;" + "adox %%rcx, %%r11 ;" + "adcx %%r9, %%r9 ;" + "adox %%r15, %%rbx ;" + "adcx %%r10, %%r10 ;" + "adox %%r15, %%r13 ;" + "adcx %%r11, %%r11 ;" + "adox %%r15, %%r14 ;" + "adcx %%rbx, %%rbx ;" + "adcx %%r13, %%r13 ;" + "adcx %%r14, %%r14 ;" + + "movq 32(%1), %%rdx ;" + "mulx %%rdx, %%rax, %%rcx ;" /* B[0]^2 */ + /*******************/ + "movq %%rax, 64(%0) ;" + "addq %%rcx, %%r8 ;" + "movq %%r8, 72(%0) ;" + "movq 40(%1), %%rdx ;" + "mulx %%rdx, %%rax, %%rcx ;" /* B[1]^2 */ + "adcq %%rax, %%r9 ;" + "movq %%r9, 80(%0) ;" + "adcq %%rcx, %%r10 ;" + "movq %%r10, 88(%0) ;" + "movq 48(%1), %%rdx ;" + "mulx %%rdx, %%rax, %%rcx ;" /* B[2]^2 */ + "adcq %%rax, %%r11 ;" + "movq %%r11, 96(%0) ;" + "adcq %%rcx, %%rbx ;" + "movq %%rbx, 104(%0) ;" + "movq 56(%1), %%rdx ;" + "mulx %%rdx, %%rax, %%rcx ;" /* B[3]^2 */ + "adcq %%rax, %%r13 ;" + "movq %%r13, 112(%0) ;" + "adcq %%rcx, %%r14 ;" + "movq %%r14, 120(%0) ;" + : + : "r"(c), "r"(a) + : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", + "%r10", "%r11", "%r13", "%r14", "%r15"); +} + +static void sqr2_256x256_integer_bmi2(u64 *const c, const u64 *const a) +{ + asm volatile( + "movq 8(%1), %%rdx ;" /* A[1] */ + "mulx (%1), %%r8, %%r9 ;" /* A[0]*A[1] */ + "mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */ + "mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */ + + "movq 16(%1), %%rdx ;" /* A[2] */ + "mulx 24(%1), %%r15, %%r13 ;" /* A[3]*A[2] */ + "mulx (%1), %%rax, %%rdx ;" /* A[0]*A[2] */ + + "addq %%rax, %%r9 ;" + "adcq %%rdx, %%r10 ;" + "adcq %%rcx, %%r11 ;" + "adcq %%r14, %%r15 ;" + "adcq $0, %%r13 ;" + "movq $0, %%r14 ;" + "adcq $0, %%r14 ;" + + "movq (%1), %%rdx ;" /* A[0] */ + "mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */ + + "addq %%rax, %%r10 ;" + "adcq %%rcx, %%r11 ;" + "adcq $0, %%r15 ;" + "adcq $0, %%r13 ;" + "adcq $0, %%r14 ;" + + "shldq $1, %%r13, %%r14 ;" + "shldq $1, %%r15, %%r13 ;" + "shldq $1, %%r11, %%r15 ;" + "shldq $1, %%r10, %%r11 ;" + "shldq $1, %%r9, %%r10 ;" + "shldq $1, %%r8, %%r9 ;" + "shlq $1, %%r8 ;" + + /*******************/ + "mulx %%rdx, %%rax, %%rcx ; " /* A[0]^2 */ + /*******************/ + "movq %%rax, 0(%0) ;" + "addq %%rcx, %%r8 ;" + "movq %%r8, 8(%0) ;" + "movq 8(%1), %%rdx ;" + "mulx %%rdx, %%rax, %%rcx ; " /* A[1]^2 */ + "adcq %%rax, %%r9 ;" + "movq %%r9, 16(%0) ;" + "adcq %%rcx, %%r10 ;" + "movq %%r10, 24(%0) ;" + "movq 16(%1), %%rdx ;" + "mulx %%rdx, %%rax, %%rcx ; " /* A[2]^2 */ + "adcq %%rax, %%r11 ;" + "movq %%r11, 32(%0) ;" + "adcq %%rcx, %%r15 ;" + "movq %%r15, 40(%0) ;" + "movq 24(%1), %%rdx ;" + "mulx %%rdx, %%rax, %%rcx ; " /* A[3]^2 */ + "adcq %%rax, %%r13 ;" + "movq %%r13, 48(%0) ;" + "adcq %%rcx, %%r14 ;" + "movq %%r14, 56(%0) ;" + + "movq 40(%1), %%rdx ;" /* B[1] */ + "mulx 32(%1), %%r8, %%r9 ;" /* B[0]*B[1] */ + "mulx 48(%1), %%r10, %%r11 ;" /* B[2]*B[1] */ + "mulx 56(%1), %%rcx, %%r14 ;" /* B[3]*B[1] */ + + "movq 48(%1), %%rdx ;" /* B[2] */ + "mulx 56(%1), %%r15, %%r13 ;" /* B[3]*B[2] */ + "mulx 32(%1), %%rax, %%rdx ;" /* B[0]*B[2] */ + + "addq %%rax, %%r9 ;" + "adcq %%rdx, %%r10 ;" + "adcq %%rcx, %%r11 ;" + "adcq %%r14, %%r15 ;" + "adcq $0, %%r13 ;" + "movq $0, %%r14 ;" + "adcq $0, %%r14 ;" + + "movq 32(%1), %%rdx ;" /* B[0] */ + "mulx 56(%1), %%rax, %%rcx ;" /* B[0]*B[3] */ + + "addq %%rax, %%r10 ;" + "adcq %%rcx, %%r11 ;" + "adcq $0, %%r15 ;" + "adcq $0, %%r13 ;" + "adcq $0, %%r14 ;" + + "shldq $1, %%r13, %%r14 ;" + "shldq $1, %%r15, %%r13 ;" + "shldq $1, %%r11, %%r15 ;" + "shldq $1, %%r10, %%r11 ;" + "shldq $1, %%r9, %%r10 ;" + "shldq $1, %%r8, %%r9 ;" + "shlq $1, %%r8 ;" + + /*******************/ + "mulx %%rdx, %%rax, %%rcx ; " /* B[0]^2 */ + /*******************/ + "movq %%rax, 64(%0) ;" + "addq %%rcx, %%r8 ;" + "movq %%r8, 72(%0) ;" + "movq 40(%1), %%rdx ;" + "mulx %%rdx, %%rax, %%rcx ; " /* B[1]^2 */ + "adcq %%rax, %%r9 ;" + "movq %%r9, 80(%0) ;" + "adcq %%rcx, %%r10 ;" + "movq %%r10, 88(%0) ;" + "movq 48(%1), %%rdx ;" + "mulx %%rdx, %%rax, %%rcx ; " /* B[2]^2 */ + "adcq %%rax, %%r11 ;" + "movq %%r11, 96(%0) ;" + "adcq %%rcx, %%r15 ;" + "movq %%r15, 104(%0) ;" + "movq 56(%1), %%rdx ;" + "mulx %%rdx, %%rax, %%rcx ; " /* B[3]^2 */ + "adcq %%rax, %%r13 ;" + "movq %%r13, 112(%0) ;" + "adcq %%rcx, %%r14 ;" + "movq %%r14, 120(%0) ;" + : + : "r"(c), "r"(a) + : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", + "%r11", "%r13", "%r14", "%r15"); +} + +static void red_eltfp25519_2w_adx(u64 *const c, const u64 *const a) +{ + asm volatile( + "movl $38, %%edx; " /* 2*c = 38 = 2^256 */ + "mulx 32(%1), %%r8, %%r10; " /* c*C[4] */ + "xorl %%ebx, %%ebx ;" + "adox (%1), %%r8 ;" + "mulx 40(%1), %%r9, %%r11; " /* c*C[5] */ + "adcx %%r10, %%r9 ;" + "adox 8(%1), %%r9 ;" + "mulx 48(%1), %%r10, %%rax; " /* c*C[6] */ + "adcx %%r11, %%r10 ;" + "adox 16(%1), %%r10 ;" + "mulx 56(%1), %%r11, %%rcx; " /* c*C[7] */ + "adcx %%rax, %%r11 ;" + "adox 24(%1), %%r11 ;" + /***************************************/ + "adcx %%rbx, %%rcx ;" + "adox %%rbx, %%rcx ;" + "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */ + "adcx %%rcx, %%r8 ;" + "adcx %%rbx, %%r9 ;" + "movq %%r9, 8(%0) ;" + "adcx %%rbx, %%r10 ;" + "movq %%r10, 16(%0) ;" + "adcx %%rbx, %%r11 ;" + "movq %%r11, 24(%0) ;" + "mov $0, %%ecx ;" + "cmovc %%edx, %%ecx ;" + "addq %%rcx, %%r8 ;" + "movq %%r8, (%0) ;" + + "mulx 96(%1), %%r8, %%r10; " /* c*C[4] */ + "xorl %%ebx, %%ebx ;" + "adox 64(%1), %%r8 ;" + "mulx 104(%1), %%r9, %%r11; " /* c*C[5] */ + "adcx %%r10, %%r9 ;" + "adox 72(%1), %%r9 ;" + "mulx 112(%1), %%r10, %%rax; " /* c*C[6] */ + "adcx %%r11, %%r10 ;" + "adox 80(%1), %%r10 ;" + "mulx 120(%1), %%r11, %%rcx; " /* c*C[7] */ + "adcx %%rax, %%r11 ;" + "adox 88(%1), %%r11 ;" + /****************************************/ + "adcx %%rbx, %%rcx ;" + "adox %%rbx, %%rcx ;" + "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */ + "adcx %%rcx, %%r8 ;" + "adcx %%rbx, %%r9 ;" + "movq %%r9, 40(%0) ;" + "adcx %%rbx, %%r10 ;" + "movq %%r10, 48(%0) ;" + "adcx %%rbx, %%r11 ;" + "movq %%r11, 56(%0) ;" + "mov $0, %%ecx ;" + "cmovc %%edx, %%ecx ;" + "addq %%rcx, %%r8 ;" + "movq %%r8, 32(%0) ;" + : + : "r"(c), "r"(a) + : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", + "%r10", "%r11"); +} + +static void red_eltfp25519_2w_bmi2(u64 *const c, const u64 *const a) +{ + asm volatile( + "movl $38, %%edx ; " /* 2*c = 38 = 2^256 */ + "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */ + "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */ + "addq %%r10, %%r9 ;" + "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */ + "adcq %%r11, %%r10 ;" + "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */ + "adcq %%rax, %%r11 ;" + /***************************************/ + "adcq $0, %%rcx ;" + "addq (%1), %%r8 ;" + "adcq 8(%1), %%r9 ;" + "adcq 16(%1), %%r10 ;" + "adcq 24(%1), %%r11 ;" + "adcq $0, %%rcx ;" + "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */ + "addq %%rcx, %%r8 ;" + "adcq $0, %%r9 ;" + "movq %%r9, 8(%0) ;" + "adcq $0, %%r10 ;" + "movq %%r10, 16(%0) ;" + "adcq $0, %%r11 ;" + "movq %%r11, 24(%0) ;" + "mov $0, %%ecx ;" + "cmovc %%edx, %%ecx ;" + "addq %%rcx, %%r8 ;" + "movq %%r8, (%0) ;" + + "mulx 96(%1), %%r8, %%r10 ;" /* c*C[4] */ + "mulx 104(%1), %%r9, %%r11 ;" /* c*C[5] */ + "addq %%r10, %%r9 ;" + "mulx 112(%1), %%r10, %%rax ;" /* c*C[6] */ + "adcq %%r11, %%r10 ;" + "mulx 120(%1), %%r11, %%rcx ;" /* c*C[7] */ + "adcq %%rax, %%r11 ;" + /****************************************/ + "adcq $0, %%rcx ;" + "addq 64(%1), %%r8 ;" + "adcq 72(%1), %%r9 ;" + "adcq 80(%1), %%r10 ;" + "adcq 88(%1), %%r11 ;" + "adcq $0, %%rcx ;" + "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */ + "addq %%rcx, %%r8 ;" + "adcq $0, %%r9 ;" + "movq %%r9, 40(%0) ;" + "adcq $0, %%r10 ;" + "movq %%r10, 48(%0) ;" + "adcq $0, %%r11 ;" + "movq %%r11, 56(%0) ;" + "mov $0, %%ecx ;" + "cmovc %%edx, %%ecx ;" + "addq %%rcx, %%r8 ;" + "movq %%r8, 32(%0) ;" + : + : "r"(c), "r"(a) + : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", + "%r11"); +} + +static void mul_256x256_integer_adx(u64 *const c, const u64 *const a, + const u64 *const b) +{ + asm volatile( + "movq (%1), %%rdx; " /* A[0] */ + "mulx (%2), %%r8, %%r9; " /* A[0]*B[0] */ + "xorl %%r10d, %%r10d ;" + "movq %%r8, (%0) ;" + "mulx 8(%2), %%r10, %%r11; " /* A[0]*B[1] */ + "adox %%r9, %%r10 ;" + "movq %%r10, 8(%0) ;" + "mulx 16(%2), %%r15, %%r13; " /* A[0]*B[2] */ + "adox %%r11, %%r15 ;" + "mulx 24(%2), %%r14, %%rdx; " /* A[0]*B[3] */ + "adox %%r13, %%r14 ;" + "movq $0, %%rax ;" + /******************************************/ + "adox %%rdx, %%rax ;" + + "movq 8(%1), %%rdx; " /* A[1] */ + "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */ + "xorl %%r10d, %%r10d ;" + "adcx 8(%0), %%r8 ;" + "movq %%r8, 8(%0) ;" + "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */ + "adox %%r9, %%r10 ;" + "adcx %%r15, %%r10 ;" + "movq %%r10, 16(%0) ;" + "mulx 16(%2), %%r15, %%r13; " /* A[1]*B[2] */ + "adox %%r11, %%r15 ;" + "adcx %%r14, %%r15 ;" + "movq $0, %%r8 ;" + "mulx 24(%2), %%r14, %%rdx; " /* A[1]*B[3] */ + "adox %%r13, %%r14 ;" + "adcx %%rax, %%r14 ;" + "movq $0, %%rax ;" + /******************************************/ + "adox %%rdx, %%rax ;" + "adcx %%r8, %%rax ;" + + "movq 16(%1), %%rdx; " /* A[2] */ + "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */ + "xorl %%r10d, %%r10d ;" + "adcx 16(%0), %%r8 ;" + "movq %%r8, 16(%0) ;" + "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */ + "adox %%r9, %%r10 ;" + "adcx %%r15, %%r10 ;" + "movq %%r10, 24(%0) ;" + "mulx 16(%2), %%r15, %%r13; " /* A[2]*B[2] */ + "adox %%r11, %%r15 ;" + "adcx %%r14, %%r15 ;" + "movq $0, %%r8 ;" + "mulx 24(%2), %%r14, %%rdx; " /* A[2]*B[3] */ + "adox %%r13, %%r14 ;" + "adcx %%rax, %%r14 ;" + "movq $0, %%rax ;" + /******************************************/ + "adox %%rdx, %%rax ;" + "adcx %%r8, %%rax ;" + + "movq 24(%1), %%rdx; " /* A[3] */ + "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */ + "xorl %%r10d, %%r10d ;" + "adcx 24(%0), %%r8 ;" + "movq %%r8, 24(%0) ;" + "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */ + "adox %%r9, %%r10 ;" + "adcx %%r15, %%r10 ;" + "movq %%r10, 32(%0) ;" + "mulx 16(%2), %%r15, %%r13; " /* A[3]*B[2] */ + "adox %%r11, %%r15 ;" + "adcx %%r14, %%r15 ;" + "movq %%r15, 40(%0) ;" + "movq $0, %%r8 ;" + "mulx 24(%2), %%r14, %%rdx; " /* A[3]*B[3] */ + "adox %%r13, %%r14 ;" + "adcx %%rax, %%r14 ;" + "movq %%r14, 48(%0) ;" + "movq $0, %%rax ;" + /******************************************/ + "adox %%rdx, %%rax ;" + "adcx %%r8, %%rax ;" + "movq %%rax, 56(%0) ;" + : + : "r"(c), "r"(a), "r"(b) + : "memory", "cc", "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", + "%r13", "%r14", "%r15"); +} + +static void mul_256x256_integer_bmi2(u64 *const c, const u64 *const a, + const u64 *const b) +{ + asm volatile( + "movq (%1), %%rdx; " /* A[0] */ + "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */ + "movq %%r8, (%0) ;" + "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */ + "addq %%r10, %%r15 ;" + "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */ + "adcq %%r8, %%rax ;" + "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */ + "adcq %%r10, %%rbx ;" + /******************************************/ + "adcq $0, %%rcx ;" + + "movq 8(%1), %%rdx; " /* A[1] */ + "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */ + "addq %%r15, %%r8 ;" + "movq %%r8, 8(%0) ;" + "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */ + "adcq %%r10, %%r9 ;" + "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */ + "adcq %%r8, %%r11 ;" + "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */ + "adcq %%r10, %%r13 ;" + /******************************************/ + "adcq $0, %%r15 ;" + + "addq %%r9, %%rax ;" + "adcq %%r11, %%rbx ;" + "adcq %%r13, %%rcx ;" + "adcq $0, %%r15 ;" + + "movq 16(%1), %%rdx; " /* A[2] */ + "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */ + "addq %%rax, %%r8 ;" + "movq %%r8, 16(%0) ;" + "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */ + "adcq %%r10, %%r9 ;" + "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */ + "adcq %%r8, %%r11 ;" + "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */ + "adcq %%r10, %%r13 ;" + /******************************************/ + "adcq $0, %%rax ;" + + "addq %%r9, %%rbx ;" + "adcq %%r11, %%rcx ;" + "adcq %%r13, %%r15 ;" + "adcq $0, %%rax ;" + + "movq 24(%1), %%rdx; " /* A[3] */ + "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */ + "addq %%rbx, %%r8 ;" + "movq %%r8, 24(%0) ;" + "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */ + "adcq %%r10, %%r9 ;" + "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */ + "adcq %%r8, %%r11 ;" + "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */ + "adcq %%r10, %%r13 ;" + /******************************************/ + "adcq $0, %%rbx ;" + + "addq %%r9, %%rcx ;" + "movq %%rcx, 32(%0) ;" + "adcq %%r11, %%r15 ;" + "movq %%r15, 40(%0) ;" + "adcq %%r13, %%rax ;" + "movq %%rax, 48(%0) ;" + "adcq $0, %%rbx ;" + "movq %%rbx, 56(%0) ;" + : + : "r"(c), "r"(a), "r"(b) + : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", + "%r10", "%r11", "%r13", "%r15"); +} + +static void sqr_256x256_integer_adx(u64 *const c, const u64 *const a) +{ + asm volatile( + "movq (%1), %%rdx ;" /* A[0] */ + "mulx 8(%1), %%r8, %%r14 ;" /* A[1]*A[0] */ + "xorl %%r15d, %%r15d;" + "mulx 16(%1), %%r9, %%r10 ;" /* A[2]*A[0] */ + "adcx %%r14, %%r9 ;" + "mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */ + "adcx %%rax, %%r10 ;" + "movq 24(%1), %%rdx ;" /* A[3] */ + "mulx 8(%1), %%r11, %%rbx ;" /* A[1]*A[3] */ + "adcx %%rcx, %%r11 ;" + "mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */ + "adcx %%rax, %%rbx ;" + "movq 8(%1), %%rdx ;" /* A[1] */ + "adcx %%r15, %%r13 ;" + "mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */ + "movq $0, %%r14 ;" + /******************************************/ + "adcx %%r15, %%r14 ;" + + "xorl %%r15d, %%r15d;" + "adox %%rax, %%r10 ;" + "adcx %%r8, %%r8 ;" + "adox %%rcx, %%r11 ;" + "adcx %%r9, %%r9 ;" + "adox %%r15, %%rbx ;" + "adcx %%r10, %%r10 ;" + "adox %%r15, %%r13 ;" + "adcx %%r11, %%r11 ;" + "adox %%r15, %%r14 ;" + "adcx %%rbx, %%rbx ;" + "adcx %%r13, %%r13 ;" + "adcx %%r14, %%r14 ;" + + "movq (%1), %%rdx ;" + "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */ + /*******************/ + "movq %%rax, 0(%0) ;" + "addq %%rcx, %%r8 ;" + "movq %%r8, 8(%0) ;" + "movq 8(%1), %%rdx ;" + "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */ + "adcq %%rax, %%r9 ;" + "movq %%r9, 16(%0) ;" + "adcq %%rcx, %%r10 ;" + "movq %%r10, 24(%0) ;" + "movq 16(%1), %%rdx ;" + "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */ + "adcq %%rax, %%r11 ;" + "movq %%r11, 32(%0) ;" + "adcq %%rcx, %%rbx ;" + "movq %%rbx, 40(%0) ;" + "movq 24(%1), %%rdx ;" + "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */ + "adcq %%rax, %%r13 ;" + "movq %%r13, 48(%0) ;" + "adcq %%rcx, %%r14 ;" + "movq %%r14, 56(%0) ;" + : + : "r"(c), "r"(a) + : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", + "%r10", "%r11", "%r13", "%r14", "%r15"); +} + +static void sqr_256x256_integer_bmi2(u64 *const c, const u64 *const a) +{ + asm volatile( + "movq 8(%1), %%rdx ;" /* A[1] */ + "mulx (%1), %%r8, %%r9 ;" /* A[0]*A[1] */ + "mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */ + "mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */ + + "movq 16(%1), %%rdx ;" /* A[2] */ + "mulx 24(%1), %%r15, %%r13 ;" /* A[3]*A[2] */ + "mulx (%1), %%rax, %%rdx ;" /* A[0]*A[2] */ + + "addq %%rax, %%r9 ;" + "adcq %%rdx, %%r10 ;" + "adcq %%rcx, %%r11 ;" + "adcq %%r14, %%r15 ;" + "adcq $0, %%r13 ;" + "movq $0, %%r14 ;" + "adcq $0, %%r14 ;" + + "movq (%1), %%rdx ;" /* A[0] */ + "mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */ + + "addq %%rax, %%r10 ;" + "adcq %%rcx, %%r11 ;" + "adcq $0, %%r15 ;" + "adcq $0, %%r13 ;" + "adcq $0, %%r14 ;" + + "shldq $1, %%r13, %%r14 ;" + "shldq $1, %%r15, %%r13 ;" + "shldq $1, %%r11, %%r15 ;" + "shldq $1, %%r10, %%r11 ;" + "shldq $1, %%r9, %%r10 ;" + "shldq $1, %%r8, %%r9 ;" + "shlq $1, %%r8 ;" + + /*******************/ + "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */ + /*******************/ + "movq %%rax, 0(%0) ;" + "addq %%rcx, %%r8 ;" + "movq %%r8, 8(%0) ;" + "movq 8(%1), %%rdx ;" + "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */ + "adcq %%rax, %%r9 ;" + "movq %%r9, 16(%0) ;" + "adcq %%rcx, %%r10 ;" + "movq %%r10, 24(%0) ;" + "movq 16(%1), %%rdx ;" + "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */ + "adcq %%rax, %%r11 ;" + "movq %%r11, 32(%0) ;" + "adcq %%rcx, %%r15 ;" + "movq %%r15, 40(%0) ;" + "movq 24(%1), %%rdx ;" + "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */ + "adcq %%rax, %%r13 ;" + "movq %%r13, 48(%0) ;" + "adcq %%rcx, %%r14 ;" + "movq %%r14, 56(%0) ;" + : + : "r"(c), "r"(a) + : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", + "%r11", "%r13", "%r14", "%r15"); +} + +static void red_eltfp25519_1w_adx(u64 *const c, const u64 *const a) +{ + asm volatile( + "movl $38, %%edx ;" /* 2*c = 38 = 2^256 */ + "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */ + "xorl %%ebx, %%ebx ;" + "adox (%1), %%r8 ;" + "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */ + "adcx %%r10, %%r9 ;" + "adox 8(%1), %%r9 ;" + "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */ + "adcx %%r11, %%r10 ;" + "adox 16(%1), %%r10 ;" + "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */ + "adcx %%rax, %%r11 ;" + "adox 24(%1), %%r11 ;" + /***************************************/ + "adcx %%rbx, %%rcx ;" + "adox %%rbx, %%rcx ;" + "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */ + "adcx %%rcx, %%r8 ;" + "adcx %%rbx, %%r9 ;" + "movq %%r9, 8(%0) ;" + "adcx %%rbx, %%r10 ;" + "movq %%r10, 16(%0) ;" + "adcx %%rbx, %%r11 ;" + "movq %%r11, 24(%0) ;" + "mov $0, %%ecx ;" + "cmovc %%edx, %%ecx ;" + "addq %%rcx, %%r8 ;" + "movq %%r8, (%0) ;" + : + : "r"(c), "r"(a) + : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", + "%r10", "%r11"); +} + +static void red_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a) +{ + asm volatile( + "movl $38, %%edx ;" /* 2*c = 38 = 2^256 */ + "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */ + "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */ + "addq %%r10, %%r9 ;" + "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */ + "adcq %%r11, %%r10 ;" + "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */ + "adcq %%rax, %%r11 ;" + /***************************************/ + "adcq $0, %%rcx ;" + "addq (%1), %%r8 ;" + "adcq 8(%1), %%r9 ;" + "adcq 16(%1), %%r10 ;" + "adcq 24(%1), %%r11 ;" + "adcq $0, %%rcx ;" + "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */ + "addq %%rcx, %%r8 ;" + "adcq $0, %%r9 ;" + "movq %%r9, 8(%0) ;" + "adcq $0, %%r10 ;" + "movq %%r10, 16(%0) ;" + "adcq $0, %%r11 ;" + "movq %%r11, 24(%0) ;" + "mov $0, %%ecx ;" + "cmovc %%edx, %%ecx ;" + "addq %%rcx, %%r8 ;" + "movq %%r8, (%0) ;" + : + : "r"(c), "r"(a) + : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", + "%r11"); +} + +static __always_inline void +add_eltfp25519_1w_adx(u64 *const c, const u64 *const a, const u64 *const b) +{ + asm volatile( + "mov $38, %%eax ;" + "xorl %%ecx, %%ecx ;" + "movq (%2), %%r8 ;" + "adcx (%1), %%r8 ;" + "movq 8(%2), %%r9 ;" + "adcx 8(%1), %%r9 ;" + "movq 16(%2), %%r10 ;" + "adcx 16(%1), %%r10 ;" + "movq 24(%2), %%r11 ;" + "adcx 24(%1), %%r11 ;" + "cmovc %%eax, %%ecx ;" + "xorl %%eax, %%eax ;" + "adcx %%rcx, %%r8 ;" + "adcx %%rax, %%r9 ;" + "movq %%r9, 8(%0) ;" + "adcx %%rax, %%r10 ;" + "movq %%r10, 16(%0) ;" + "adcx %%rax, %%r11 ;" + "movq %%r11, 24(%0) ;" + "mov $38, %%ecx ;" + "cmovc %%ecx, %%eax ;" + "addq %%rax, %%r8 ;" + "movq %%r8, (%0) ;" + : + : "r"(c), "r"(a), "r"(b) + : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11"); +} + +static __always_inline void +add_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a, const u64 *const b) +{ + asm volatile( + "mov $38, %%eax ;" + "movq (%2), %%r8 ;" + "addq (%1), %%r8 ;" + "movq 8(%2), %%r9 ;" + "adcq 8(%1), %%r9 ;" + "movq 16(%2), %%r10 ;" + "adcq 16(%1), %%r10 ;" + "movq 24(%2), %%r11 ;" + "adcq 24(%1), %%r11 ;" + "mov $0, %%ecx ;" + "cmovc %%eax, %%ecx ;" + "addq %%rcx, %%r8 ;" + "adcq $0, %%r9 ;" + "movq %%r9, 8(%0) ;" + "adcq $0, %%r10 ;" + "movq %%r10, 16(%0) ;" + "adcq $0, %%r11 ;" + "movq %%r11, 24(%0) ;" + "mov $0, %%ecx ;" + "cmovc %%eax, %%ecx ;" + "addq %%rcx, %%r8 ;" + "movq %%r8, (%0) ;" + : + : "r"(c), "r"(a), "r"(b) + : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11"); +} + +static __always_inline void +sub_eltfp25519_1w(u64 *const c, const u64 *const a, const u64 *const b) +{ + asm volatile( + "mov $38, %%eax ;" + "movq (%1), %%r8 ;" + "subq (%2), %%r8 ;" + "movq 8(%1), %%r9 ;" + "sbbq 8(%2), %%r9 ;" + "movq 16(%1), %%r10 ;" + "sbbq 16(%2), %%r10 ;" + "movq 24(%1), %%r11 ;" + "sbbq 24(%2), %%r11 ;" + "mov $0, %%ecx ;" + "cmovc %%eax, %%ecx ;" + "subq %%rcx, %%r8 ;" + "sbbq $0, %%r9 ;" + "movq %%r9, 8(%0) ;" + "sbbq $0, %%r10 ;" + "movq %%r10, 16(%0) ;" + "sbbq $0, %%r11 ;" + "movq %%r11, 24(%0) ;" + "mov $0, %%ecx ;" + "cmovc %%eax, %%ecx ;" + "subq %%rcx, %%r8 ;" + "movq %%r8, (%0) ;" + : + : "r"(c), "r"(a), "r"(b) + : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11"); +} + +/* Multiplication by a24 = (A+2)/4 = (486662+2)/4 = 121666 */ +static __always_inline void +mul_a24_eltfp25519_1w(u64 *const c, const u64 *const a) +{ + const u64 a24 = 121666; + asm volatile( + "movq %2, %%rdx ;" + "mulx (%1), %%r8, %%r10 ;" + "mulx 8(%1), %%r9, %%r11 ;" + "addq %%r10, %%r9 ;" + "mulx 16(%1), %%r10, %%rax ;" + "adcq %%r11, %%r10 ;" + "mulx 24(%1), %%r11, %%rcx ;" + "adcq %%rax, %%r11 ;" + /**************************/ + "adcq $0, %%rcx ;" + "movl $38, %%edx ;" /* 2*c = 38 = 2^256 mod 2^255-19*/ + "imul %%rdx, %%rcx ;" + "addq %%rcx, %%r8 ;" + "adcq $0, %%r9 ;" + "movq %%r9, 8(%0) ;" + "adcq $0, %%r10 ;" + "movq %%r10, 16(%0) ;" + "adcq $0, %%r11 ;" + "movq %%r11, 24(%0) ;" + "mov $0, %%ecx ;" + "cmovc %%edx, %%ecx ;" + "addq %%rcx, %%r8 ;" + "movq %%r8, (%0) ;" + : + : "r"(c), "r"(a), "r"(a24) + : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", + "%r11"); +} + +static void inv_eltfp25519_1w_adx(u64 *const c, const u64 *const a) +{ + struct { + eltfp25519_1w_buffer buffer; + eltfp25519_1w x0, x1, x2; + } __aligned(32) m; + u64 *T[4]; + + T[0] = m.x0; + T[1] = c; /* x^(-1) */ + T[2] = m.x1; + T[3] = m.x2; + + copy_eltfp25519_1w(T[1], a); + sqrn_eltfp25519_1w_adx(T[1], 1); + copy_eltfp25519_1w(T[2], T[1]); + sqrn_eltfp25519_1w_adx(T[2], 2); + mul_eltfp25519_1w_adx(T[0], a, T[2]); + mul_eltfp25519_1w_adx(T[1], T[1], T[0]); + copy_eltfp25519_1w(T[2], T[1]); + sqrn_eltfp25519_1w_adx(T[2], 1); + mul_eltfp25519_1w_adx(T[0], T[0], T[2]); + copy_eltfp25519_1w(T[2], T[0]); + sqrn_eltfp25519_1w_adx(T[2], 5); + mul_eltfp25519_1w_adx(T[0], T[0], T[2]); + copy_eltfp25519_1w(T[2], T[0]); + sqrn_eltfp25519_1w_adx(T[2], 10); + mul_eltfp25519_1w_adx(T[2], T[2], T[0]); + copy_eltfp25519_1w(T[3], T[2]); + sqrn_eltfp25519_1w_adx(T[3], 20); + mul_eltfp25519_1w_adx(T[3], T[3], T[2]); + sqrn_eltfp25519_1w_adx(T[3], 10); + mul_eltfp25519_1w_adx(T[3], T[3], T[0]); + copy_eltfp25519_1w(T[0], T[3]); + sqrn_eltfp25519_1w_adx(T[0], 50); + mul_eltfp25519_1w_adx(T[0], T[0], T[3]); + copy_eltfp25519_1w(T[2], T[0]); + sqrn_eltfp25519_1w_adx(T[2], 100); + mul_eltfp25519_1w_adx(T[2], T[2], T[0]); + sqrn_eltfp25519_1w_adx(T[2], 50); + mul_eltfp25519_1w_adx(T[2], T[2], T[3]); + sqrn_eltfp25519_1w_adx(T[2], 5); + mul_eltfp25519_1w_adx(T[1], T[1], T[2]); + + memzero_explicit(&m, sizeof(m)); +} + +static void inv_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a) +{ + struct { + eltfp25519_1w_buffer buffer; + eltfp25519_1w x0, x1, x2; + } __aligned(32) m; + u64 *T[5]; + + T[0] = m.x0; + T[1] = c; /* x^(-1) */ + T[2] = m.x1; + T[3] = m.x2; + + copy_eltfp25519_1w(T[1], a); + sqrn_eltfp25519_1w_bmi2(T[1], 1); + copy_eltfp25519_1w(T[2], T[1]); + sqrn_eltfp25519_1w_bmi2(T[2], 2); + mul_eltfp25519_1w_bmi2(T[0], a, T[2]); + mul_eltfp25519_1w_bmi2(T[1], T[1], T[0]); + copy_eltfp25519_1w(T[2], T[1]); + sqrn_eltfp25519_1w_bmi2(T[2], 1); + mul_eltfp25519_1w_bmi2(T[0], T[0], T[2]); + copy_eltfp25519_1w(T[2], T[0]); + sqrn_eltfp25519_1w_bmi2(T[2], 5); + mul_eltfp25519_1w_bmi2(T[0], T[0], T[2]); + copy_eltfp25519_1w(T[2], T[0]); + sqrn_eltfp25519_1w_bmi2(T[2], 10); + mul_eltfp25519_1w_bmi2(T[2], T[2], T[0]); + copy_eltfp25519_1w(T[3], T[2]); + sqrn_eltfp25519_1w_bmi2(T[3], 20); + mul_eltfp25519_1w_bmi2(T[3], T[3], T[2]); + sqrn_eltfp25519_1w_bmi2(T[3], 10); + mul_eltfp25519_1w_bmi2(T[3], T[3], T[0]); + copy_eltfp25519_1w(T[0], T[3]); + sqrn_eltfp25519_1w_bmi2(T[0], 50); + mul_eltfp25519_1w_bmi2(T[0], T[0], T[3]); + copy_eltfp25519_1w(T[2], T[0]); + sqrn_eltfp25519_1w_bmi2(T[2], 100); + mul_eltfp25519_1w_bmi2(T[2], T[2], T[0]); + sqrn_eltfp25519_1w_bmi2(T[2], 50); + mul_eltfp25519_1w_bmi2(T[2], T[2], T[3]); + sqrn_eltfp25519_1w_bmi2(T[2], 5); + mul_eltfp25519_1w_bmi2(T[1], T[1], T[2]); + + memzero_explicit(&m, sizeof(m)); +} + +/* Given c, a 256-bit number, fred_eltfp25519_1w updates c + * with a number such that 0 <= C < 2**255-19. + */ +static __always_inline void fred_eltfp25519_1w(u64 *const c) +{ + u64 tmp0 = 38, tmp1 = 19; + asm volatile( + "btrq $63, %3 ;" /* Put bit 255 in carry flag and clear */ + "cmovncl %k5, %k4 ;" /* c[255] ? 38 : 19 */ + + /* Add either 19 or 38 to c */ + "addq %4, %0 ;" + "adcq $0, %1 ;" + "adcq $0, %2 ;" + "adcq $0, %3 ;" + + /* Test for bit 255 again; only triggered on overflow modulo 2^255-19 */ + "movl $0, %k4 ;" + "cmovnsl %k5, %k4 ;" /* c[255] ? 0 : 19 */ + "btrq $63, %3 ;" /* Clear bit 255 */ + + /* Subtract 19 if necessary */ + "subq %4, %0 ;" + "sbbq $0, %1 ;" + "sbbq $0, %2 ;" + "sbbq $0, %3 ;" + + : "+r"(c[0]), "+r"(c[1]), "+r"(c[2]), "+r"(c[3]), "+r"(tmp0), + "+r"(tmp1) + : + : "memory", "cc"); +} + +static __always_inline void cswap(u8 bit, u64 *const px, u64 *const py) +{ + u64 temp; + asm volatile( + "test %9, %9 ;" + "movq %0, %8 ;" + "cmovnzq %4, %0 ;" + "cmovnzq %8, %4 ;" + "movq %1, %8 ;" + "cmovnzq %5, %1 ;" + "cmovnzq %8, %5 ;" + "movq %2, %8 ;" + "cmovnzq %6, %2 ;" + "cmovnzq %8, %6 ;" + "movq %3, %8 ;" + "cmovnzq %7, %3 ;" + "cmovnzq %8, %7 ;" + : "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3]), + "+r"(py[0]), "+r"(py[1]), "+r"(py[2]), "+r"(py[3]), + "=r"(temp) + : "r"(bit) + : "cc" + ); +} + +static __always_inline void cselect(u8 bit, u64 *const px, const u64 *const py) +{ + asm volatile( + "test %4, %4 ;" + "cmovnzq %5, %0 ;" + "cmovnzq %6, %1 ;" + "cmovnzq %7, %2 ;" + "cmovnzq %8, %3 ;" + : "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3]) + : "r"(bit), "rm"(py[0]), "rm"(py[1]), "rm"(py[2]), "rm"(py[3]) + : "cc" + ); +} + +static void curve25519_adx(u8 shared[CURVE25519_KEY_SIZE], + const u8 private_key[CURVE25519_KEY_SIZE], + const u8 session_key[CURVE25519_KEY_SIZE]) +{ + struct { + u64 buffer[4 * NUM_WORDS_ELTFP25519]; + u64 coordinates[4 * NUM_WORDS_ELTFP25519]; + u64 workspace[6 * NUM_WORDS_ELTFP25519]; + u8 session[CURVE25519_KEY_SIZE]; + u8 private[CURVE25519_KEY_SIZE]; + } __aligned(32) m; + + int i = 0, j = 0; + u64 prev = 0; + u64 *const X1 = (u64 *)m.session; + u64 *const key = (u64 *)m.private; + u64 *const Px = m.coordinates + 0; + u64 *const Pz = m.coordinates + 4; + u64 *const Qx = m.coordinates + 8; + u64 *const Qz = m.coordinates + 12; + u64 *const X2 = Qx; + u64 *const Z2 = Qz; + u64 *const X3 = Px; + u64 *const Z3 = Pz; + u64 *const X2Z2 = Qx; + u64 *const X3Z3 = Px; + + u64 *const A = m.workspace + 0; + u64 *const B = m.workspace + 4; + u64 *const D = m.workspace + 8; + u64 *const C = m.workspace + 12; + u64 *const DA = m.workspace + 16; + u64 *const CB = m.workspace + 20; + u64 *const AB = A; + u64 *const DC = D; + u64 *const DACB = DA; + + memcpy(m.private, private_key, sizeof(m.private)); + memcpy(m.session, session_key, sizeof(m.session)); + + curve25519_clamp_secret(m.private); + + /* As in the draft: + * When receiving such an array, implementations of curve25519 + * MUST mask the most-significant bit in the final byte. This + * is done to preserve compatibility with point formats which + * reserve the sign bit for use in other protocols and to + * increase resistance to implementation fingerprinting + */ + m.session[CURVE25519_KEY_SIZE - 1] &= (1 << (255 % 8)) - 1; + + copy_eltfp25519_1w(Px, X1); + setzero_eltfp25519_1w(Pz); + setzero_eltfp25519_1w(Qx); + setzero_eltfp25519_1w(Qz); + + Pz[0] = 1; + Qx[0] = 1; + + /* main-loop */ + prev = 0; + j = 62; + for (i = 3; i >= 0; --i) { + while (j >= 0) { + u64 bit = (key[i] >> j) & 0x1; + u64 swap = bit ^ prev; + prev = bit; + + add_eltfp25519_1w_adx(A, X2, Z2); /* A = (X2+Z2) */ + sub_eltfp25519_1w(B, X2, Z2); /* B = (X2-Z2) */ + add_eltfp25519_1w_adx(C, X3, Z3); /* C = (X3+Z3) */ + sub_eltfp25519_1w(D, X3, Z3); /* D = (X3-Z3) */ + mul_eltfp25519_2w_adx(DACB, AB, DC); /* [DA|CB] = [A|B]*[D|C] */ + + cselect(swap, A, C); + cselect(swap, B, D); + + sqr_eltfp25519_2w_adx(AB); /* [AA|BB] = [A^2|B^2] */ + add_eltfp25519_1w_adx(X3, DA, CB); /* X3 = (DA+CB) */ + sub_eltfp25519_1w(Z3, DA, CB); /* Z3 = (DA-CB) */ + sqr_eltfp25519_2w_adx(X3Z3); /* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */ + + copy_eltfp25519_1w(X2, B); /* X2 = B^2 */ + sub_eltfp25519_1w(Z2, A, B); /* Z2 = E = AA-BB */ + + mul_a24_eltfp25519_1w(B, Z2); /* B = a24*E */ + add_eltfp25519_1w_adx(B, B, X2); /* B = a24*E+B */ + mul_eltfp25519_2w_adx(X2Z2, X2Z2, AB); /* [X2|Z2] = [B|E]*[A|a24*E+B] */ + mul_eltfp25519_1w_adx(Z3, Z3, X1); /* Z3 = Z3*X1 */ + --j; + } + j = 63; + } + + inv_eltfp25519_1w_adx(A, Qz); + mul_eltfp25519_1w_adx((u64 *)shared, Qx, A); + fred_eltfp25519_1w((u64 *)shared); + + memzero_explicit(&m, sizeof(m)); +} + +static void curve25519_adx_base(u8 session_key[CURVE25519_KEY_SIZE], + const u8 private_key[CURVE25519_KEY_SIZE]) +{ + struct { + u64 buffer[4 * NUM_WORDS_ELTFP25519]; + u64 coordinates[4 * NUM_WORDS_ELTFP25519]; + u64 workspace[4 * NUM_WORDS_ELTFP25519]; + u8 private[CURVE25519_KEY_SIZE]; + } __aligned(32) m; + + const int ite[4] = { 64, 64, 64, 63 }; + const int q = 3; + u64 swap = 1; + + int i = 0, j = 0, k = 0; + u64 *const key = (u64 *)m.private; + u64 *const Ur1 = m.coordinates + 0; + u64 *const Zr1 = m.coordinates + 4; + u64 *const Ur2 = m.coordinates + 8; + u64 *const Zr2 = m.coordinates + 12; + + u64 *const UZr1 = m.coordinates + 0; + u64 *const ZUr2 = m.coordinates + 8; + + u64 *const A = m.workspace + 0; + u64 *const B = m.workspace + 4; + u64 *const C = m.workspace + 8; + u64 *const D = m.workspace + 12; + + u64 *const AB = m.workspace + 0; + u64 *const CD = m.workspace + 8; + + const u64 *const P = table_ladder_8k; + + memcpy(m.private, private_key, sizeof(m.private)); + + curve25519_clamp_secret(m.private); + + setzero_eltfp25519_1w(Ur1); + setzero_eltfp25519_1w(Zr1); + setzero_eltfp25519_1w(Zr2); + Ur1[0] = 1; + Zr1[0] = 1; + Zr2[0] = 1; + + /* G-S */ + Ur2[3] = 0x1eaecdeee27cab34UL; + Ur2[2] = 0xadc7a0b9235d48e2UL; + Ur2[1] = 0xbbf095ae14b2edf8UL; + Ur2[0] = 0x7e94e1fec82faabdUL; + + /* main-loop */ + j = q; + for (i = 0; i < NUM_WORDS_ELTFP25519; ++i) { + while (j < ite[i]) { + u64 bit = (key[i] >> j) & 0x1; + k = (64 * i + j - q); + swap = swap ^ bit; + cswap(swap, Ur1, Ur2); + cswap(swap, Zr1, Zr2); + swap = bit; + /* Addition */ + sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */ + add_eltfp25519_1w_adx(A, Ur1, Zr1); /* A = Ur1+Zr1 */ + mul_eltfp25519_1w_adx(C, &P[4 * k], B); /* C = M0-B */ + sub_eltfp25519_1w(B, A, C); /* B = (Ur1+Zr1) - M*(Ur1-Zr1) */ + add_eltfp25519_1w_adx(A, A, C); /* A = (Ur1+Zr1) + M*(Ur1-Zr1) */ + sqr_eltfp25519_2w_adx(AB); /* A = A^2 | B = B^2 */ + mul_eltfp25519_2w_adx(UZr1, ZUr2, AB); /* Ur1 = Zr2*A | Zr1 = Ur2*B */ + ++j; + } + j = 0; + } + + /* Doubling */ + for (i = 0; i < q; ++i) { + add_eltfp25519_1w_adx(A, Ur1, Zr1); /* A = Ur1+Zr1 */ + sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */ + sqr_eltfp25519_2w_adx(AB); /* A = A**2 B = B**2 */ + copy_eltfp25519_1w(C, B); /* C = B */ + sub_eltfp25519_1w(B, A, B); /* B = A-B */ + mul_a24_eltfp25519_1w(D, B); /* D = my_a24*B */ + add_eltfp25519_1w_adx(D, D, C); /* D = D+C */ + mul_eltfp25519_2w_adx(UZr1, AB, CD); /* Ur1 = A*B Zr1 = Zr1*A */ + } + + /* Convert to affine coordinates */ + inv_eltfp25519_1w_adx(A, Zr1); + mul_eltfp25519_1w_adx((u64 *)session_key, Ur1, A); + fred_eltfp25519_1w((u64 *)session_key); + + memzero_explicit(&m, sizeof(m)); +} + +static void curve25519_bmi2(u8 shared[CURVE25519_KEY_SIZE], + const u8 private_key[CURVE25519_KEY_SIZE], + const u8 session_key[CURVE25519_KEY_SIZE]) +{ + struct { + u64 buffer[4 * NUM_WORDS_ELTFP25519]; + u64 coordinates[4 * NUM_WORDS_ELTFP25519]; + u64 workspace[6 * NUM_WORDS_ELTFP25519]; + u8 session[CURVE25519_KEY_SIZE]; + u8 private[CURVE25519_KEY_SIZE]; + } __aligned(32) m; + + int i = 0, j = 0; + u64 prev = 0; + u64 *const X1 = (u64 *)m.session; + u64 *const key = (u64 *)m.private; + u64 *const Px = m.coordinates + 0; + u64 *const Pz = m.coordinates + 4; + u64 *const Qx = m.coordinates + 8; + u64 *const Qz = m.coordinates + 12; + u64 *const X2 = Qx; + u64 *const Z2 = Qz; + u64 *const X3 = Px; + u64 *const Z3 = Pz; + u64 *const X2Z2 = Qx; + u64 *const X3Z3 = Px; + + u64 *const A = m.workspace + 0; + u64 *const B = m.workspace + 4; + u64 *const D = m.workspace + 8; + u64 *const C = m.workspace + 12; + u64 *const DA = m.workspace + 16; + u64 *const CB = m.workspace + 20; + u64 *const AB = A; + u64 *const DC = D; + u64 *const DACB = DA; + + memcpy(m.private, private_key, sizeof(m.private)); + memcpy(m.session, session_key, sizeof(m.session)); + + curve25519_clamp_secret(m.private); + + /* As in the draft: + * When receiving such an array, implementations of curve25519 + * MUST mask the most-significant bit in the final byte. This + * is done to preserve compatibility with point formats which + * reserve the sign bit for use in other protocols and to + * increase resistance to implementation fingerprinting + */ + m.session[CURVE25519_KEY_SIZE - 1] &= (1 << (255 % 8)) - 1; + + copy_eltfp25519_1w(Px, X1); + setzero_eltfp25519_1w(Pz); + setzero_eltfp25519_1w(Qx); + setzero_eltfp25519_1w(Qz); + + Pz[0] = 1; + Qx[0] = 1; + + /* main-loop */ + prev = 0; + j = 62; + for (i = 3; i >= 0; --i) { + while (j >= 0) { + u64 bit = (key[i] >> j) & 0x1; + u64 swap = bit ^ prev; + prev = bit; + + add_eltfp25519_1w_bmi2(A, X2, Z2); /* A = (X2+Z2) */ + sub_eltfp25519_1w(B, X2, Z2); /* B = (X2-Z2) */ + add_eltfp25519_1w_bmi2(C, X3, Z3); /* C = (X3+Z3) */ + sub_eltfp25519_1w(D, X3, Z3); /* D = (X3-Z3) */ + mul_eltfp25519_2w_bmi2(DACB, AB, DC); /* [DA|CB] = [A|B]*[D|C] */ + + cselect(swap, A, C); + cselect(swap, B, D); + + sqr_eltfp25519_2w_bmi2(AB); /* [AA|BB] = [A^2|B^2] */ + add_eltfp25519_1w_bmi2(X3, DA, CB); /* X3 = (DA+CB) */ + sub_eltfp25519_1w(Z3, DA, CB); /* Z3 = (DA-CB) */ + sqr_eltfp25519_2w_bmi2(X3Z3); /* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */ + + copy_eltfp25519_1w(X2, B); /* X2 = B^2 */ + sub_eltfp25519_1w(Z2, A, B); /* Z2 = E = AA-BB */ + + mul_a24_eltfp25519_1w(B, Z2); /* B = a24*E */ + add_eltfp25519_1w_bmi2(B, B, X2); /* B = a24*E+B */ + mul_eltfp25519_2w_bmi2(X2Z2, X2Z2, AB); /* [X2|Z2] = [B|E]*[A|a24*E+B] */ + mul_eltfp25519_1w_bmi2(Z3, Z3, X1); /* Z3 = Z3*X1 */ + --j; + } + j = 63; + } + + inv_eltfp25519_1w_bmi2(A, Qz); + mul_eltfp25519_1w_bmi2((u64 *)shared, Qx, A); + fred_eltfp25519_1w((u64 *)shared); + + memzero_explicit(&m, sizeof(m)); +} + +static void curve25519_bmi2_base(u8 session_key[CURVE25519_KEY_SIZE], + const u8 private_key[CURVE25519_KEY_SIZE]) +{ + struct { + u64 buffer[4 * NUM_WORDS_ELTFP25519]; + u64 coordinates[4 * NUM_WORDS_ELTFP25519]; + u64 workspace[4 * NUM_WORDS_ELTFP25519]; + u8 private[CURVE25519_KEY_SIZE]; + } __aligned(32) m; + + const int ite[4] = { 64, 64, 64, 63 }; + const int q = 3; + u64 swap = 1; + + int i = 0, j = 0, k = 0; + u64 *const key = (u64 *)m.private; + u64 *const Ur1 = m.coordinates + 0; + u64 *const Zr1 = m.coordinates + 4; + u64 *const Ur2 = m.coordinates + 8; + u64 *const Zr2 = m.coordinates + 12; + + u64 *const UZr1 = m.coordinates + 0; + u64 *const ZUr2 = m.coordinates + 8; + + u64 *const A = m.workspace + 0; + u64 *const B = m.workspace + 4; + u64 *const C = m.workspace + 8; + u64 *const D = m.workspace + 12; + + u64 *const AB = m.workspace + 0; + u64 *const CD = m.workspace + 8; + + const u64 *const P = table_ladder_8k; + + memcpy(m.private, private_key, sizeof(m.private)); + + curve25519_clamp_secret(m.private); + + setzero_eltfp25519_1w(Ur1); + setzero_eltfp25519_1w(Zr1); + setzero_eltfp25519_1w(Zr2); + Ur1[0] = 1; + Zr1[0] = 1; + Zr2[0] = 1; + + /* G-S */ + Ur2[3] = 0x1eaecdeee27cab34UL; + Ur2[2] = 0xadc7a0b9235d48e2UL; + Ur2[1] = 0xbbf095ae14b2edf8UL; + Ur2[0] = 0x7e94e1fec82faabdUL; + + /* main-loop */ + j = q; + for (i = 0; i < NUM_WORDS_ELTFP25519; ++i) { + while (j < ite[i]) { + u64 bit = (key[i] >> j) & 0x1; + k = (64 * i + j - q); + swap = swap ^ bit; + cswap(swap, Ur1, Ur2); + cswap(swap, Zr1, Zr2); + swap = bit; + /* Addition */ + sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */ + add_eltfp25519_1w_bmi2(A, Ur1, Zr1); /* A = Ur1+Zr1 */ + mul_eltfp25519_1w_bmi2(C, &P[4 * k], B);/* C = M0-B */ + sub_eltfp25519_1w(B, A, C); /* B = (Ur1+Zr1) - M*(Ur1-Zr1) */ + add_eltfp25519_1w_bmi2(A, A, C); /* A = (Ur1+Zr1) + M*(Ur1-Zr1) */ + sqr_eltfp25519_2w_bmi2(AB); /* A = A^2 | B = B^2 */ + mul_eltfp25519_2w_bmi2(UZr1, ZUr2, AB); /* Ur1 = Zr2*A | Zr1 = Ur2*B */ + ++j; + } + j = 0; + } + + /* Doubling */ + for (i = 0; i < q; ++i) { + add_eltfp25519_1w_bmi2(A, Ur1, Zr1); /* A = Ur1+Zr1 */ + sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */ + sqr_eltfp25519_2w_bmi2(AB); /* A = A**2 B = B**2 */ + copy_eltfp25519_1w(C, B); /* C = B */ + sub_eltfp25519_1w(B, A, B); /* B = A-B */ + mul_a24_eltfp25519_1w(D, B); /* D = my_a24*B */ + add_eltfp25519_1w_bmi2(D, D, C); /* D = D+C */ + mul_eltfp25519_2w_bmi2(UZr1, AB, CD); /* Ur1 = A*B Zr1 = Zr1*A */ + } + + /* Convert to affine coordinates */ + inv_eltfp25519_1w_bmi2(A, Zr1); + mul_eltfp25519_1w_bmi2((u64 *)session_key, Ur1, A); + fred_eltfp25519_1w((u64 *)session_key); + + memzero_explicit(&m, sizeof(m)); +} + +void curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE], + const u8 basepoint[CURVE25519_KEY_SIZE]) +{ + if (static_branch_likely(&curve25519_use_adx)) + curve25519_adx(mypublic, secret, basepoint); + else if (static_branch_likely(&curve25519_use_bmi2)) + curve25519_bmi2(mypublic, secret, basepoint); + else + curve25519_generic(mypublic, secret, basepoint); +} +EXPORT_SYMBOL(curve25519_arch); + +void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE]) +{ + if (static_branch_likely(&curve25519_use_adx)) + curve25519_adx_base(pub, secret); + else if (static_branch_likely(&curve25519_use_bmi2)) + curve25519_bmi2_base(pub, secret); + else + curve25519_generic(pub, secret, curve25519_base_point); +} +EXPORT_SYMBOL(curve25519_base_arch); + +static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf, + unsigned int len) +{ + u8 *secret = kpp_tfm_ctx(tfm); + + if (!len) + curve25519_generate_secret(secret); + else if (len == CURVE25519_KEY_SIZE && + crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE)) + memcpy(secret, buf, CURVE25519_KEY_SIZE); + else + return -EINVAL; + return 0; +} + +static int curve25519_generate_public_key(struct kpp_request *req) +{ + struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); + const u8 *secret = kpp_tfm_ctx(tfm); + u8 buf[CURVE25519_KEY_SIZE]; + int copied, nbytes; + + if (req->src) + return -EINVAL; + + curve25519_base_arch(buf, secret); + + /* might want less than we've got */ + nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len); + copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst, + nbytes), + buf, nbytes); + if (copied != nbytes) + return -EINVAL; + return 0; +} + +static int curve25519_compute_shared_secret(struct kpp_request *req) +{ + struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); + const u8 *secret = kpp_tfm_ctx(tfm); + u8 public_key[CURVE25519_KEY_SIZE]; + u8 buf[CURVE25519_KEY_SIZE]; + int copied, nbytes; + + if (!req->src) + return -EINVAL; + + copied = sg_copy_to_buffer(req->src, + sg_nents_for_len(req->src, + CURVE25519_KEY_SIZE), + public_key, CURVE25519_KEY_SIZE); + if (copied != CURVE25519_KEY_SIZE) + return -EINVAL; + + curve25519_arch(buf, secret, public_key); + + /* might want less than we've got */ + nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len); + copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst, + nbytes), + buf, nbytes); + if (copied != nbytes) + return -EINVAL; + return 0; +} + +static unsigned int curve25519_max_size(struct crypto_kpp *tfm) +{ + return CURVE25519_KEY_SIZE; +} + +static struct kpp_alg curve25519_alg = { + .base.cra_name = "curve25519", + .base.cra_driver_name = "curve25519-x86", + .base.cra_priority = 200, + .base.cra_module = THIS_MODULE, + .base.cra_ctxsize = CURVE25519_KEY_SIZE, + + .set_secret = curve25519_set_secret, + .generate_public_key = curve25519_generate_public_key, + .compute_shared_secret = curve25519_compute_shared_secret, + .max_size = curve25519_max_size, +}; + +static int __init curve25519_mod_init(void) +{ + if (boot_cpu_has(X86_FEATURE_BMI2)) + static_branch_enable(&curve25519_use_bmi2); + else if (boot_cpu_has(X86_FEATURE_ADX)) + static_branch_enable(&curve25519_use_adx); + else + return 0; + return crypto_register_kpp(&curve25519_alg); +} + +static void __exit curve25519_mod_exit(void) +{ + if (boot_cpu_has(X86_FEATURE_BMI2) || + boot_cpu_has(X86_FEATURE_ADX)) + crypto_unregister_kpp(&curve25519_alg); +} + +module_init(curve25519_mod_init); +module_exit(curve25519_mod_exit); + +MODULE_ALIAS_CRYPTO("curve25519"); +MODULE_ALIAS_CRYPTO("curve25519-x86"); +MODULE_LICENSE("GPL v2"); diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c index 4a1c05dce950..370cd88068ec 100644 --- a/arch/x86/crypto/poly1305_glue.c +++ b/arch/x86/crypto/poly1305_glue.c @@ -7,47 +7,23 @@ #include <crypto/algapi.h> #include <crypto/internal/hash.h> +#include <crypto/internal/poly1305.h> #include <crypto/internal/simd.h> -#include <crypto/poly1305.h> #include <linux/crypto.h> +#include <linux/jump_label.h> #include <linux/kernel.h> #include <linux/module.h> #include <asm/simd.h> -struct poly1305_simd_desc_ctx { - struct poly1305_desc_ctx base; - /* derived key u set? */ - bool uset; -#ifdef CONFIG_AS_AVX2 - /* derived keys r^3, r^4 set? */ - bool wset; -#endif - /* derived Poly1305 key r^2 */ - u32 u[5]; - /* ... silently appended r^3 and r^4 when using AVX2 */ -}; - asmlinkage void poly1305_block_sse2(u32 *h, const u8 *src, const u32 *r, unsigned int blocks); asmlinkage void poly1305_2block_sse2(u32 *h, const u8 *src, const u32 *r, unsigned int blocks, const u32 *u); -#ifdef CONFIG_AS_AVX2 asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r, unsigned int blocks, const u32 *u); -static bool poly1305_use_avx2; -#endif - -static int poly1305_simd_init(struct shash_desc *desc) -{ - struct poly1305_simd_desc_ctx *sctx = shash_desc_ctx(desc); - sctx->uset = false; -#ifdef CONFIG_AS_AVX2 - sctx->wset = false; -#endif - - return crypto_poly1305_init(desc); -} +static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_simd); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2); static void poly1305_simd_mult(u32 *a, const u32 *b) { @@ -60,72 +36,85 @@ static void poly1305_simd_mult(u32 *a, const u32 *b) poly1305_block_sse2(a, m, b, 1); } +static unsigned int poly1305_scalar_blocks(struct poly1305_desc_ctx *dctx, + const u8 *src, unsigned int srclen) +{ + unsigned int datalen; + + if (unlikely(!dctx->sset)) { + datalen = crypto_poly1305_setdesckey(dctx, src, srclen); + src += srclen - datalen; + srclen = datalen; + } + if (srclen >= POLY1305_BLOCK_SIZE) { + poly1305_core_blocks(&dctx->h, dctx->r, src, + srclen / POLY1305_BLOCK_SIZE, 1); + srclen %= POLY1305_BLOCK_SIZE; + } + return srclen; +} + static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, unsigned int srclen) { - struct poly1305_simd_desc_ctx *sctx; unsigned int blocks, datalen; - BUILD_BUG_ON(offsetof(struct poly1305_simd_desc_ctx, base)); - sctx = container_of(dctx, struct poly1305_simd_desc_ctx, base); - if (unlikely(!dctx->sset)) { datalen = crypto_poly1305_setdesckey(dctx, src, srclen); src += srclen - datalen; srclen = datalen; } -#ifdef CONFIG_AS_AVX2 - if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) { - if (unlikely(!sctx->wset)) { - if (!sctx->uset) { - memcpy(sctx->u, dctx->r.r, sizeof(sctx->u)); - poly1305_simd_mult(sctx->u, dctx->r.r); - sctx->uset = true; + if (IS_ENABLED(CONFIG_AS_AVX2) && + static_branch_likely(&poly1305_use_avx2) && + srclen >= POLY1305_BLOCK_SIZE * 4) { + if (unlikely(dctx->rset < 4)) { + if (dctx->rset < 2) { + dctx->r[1] = dctx->r[0]; + poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r); } - memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u)); - poly1305_simd_mult(sctx->u + 5, dctx->r.r); - memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u)); - poly1305_simd_mult(sctx->u + 10, dctx->r.r); - sctx->wset = true; + dctx->r[2] = dctx->r[1]; + poly1305_simd_mult(dctx->r[2].r, dctx->r[0].r); + dctx->r[3] = dctx->r[2]; + poly1305_simd_mult(dctx->r[3].r, dctx->r[0].r); + dctx->rset = 4; } blocks = srclen / (POLY1305_BLOCK_SIZE * 4); - poly1305_4block_avx2(dctx->h.h, src, dctx->r.r, blocks, - sctx->u); + poly1305_4block_avx2(dctx->h.h, src, dctx->r[0].r, blocks, + dctx->r[1].r); src += POLY1305_BLOCK_SIZE * 4 * blocks; srclen -= POLY1305_BLOCK_SIZE * 4 * blocks; } -#endif + if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) { - if (unlikely(!sctx->uset)) { - memcpy(sctx->u, dctx->r.r, sizeof(sctx->u)); - poly1305_simd_mult(sctx->u, dctx->r.r); - sctx->uset = true; + if (unlikely(dctx->rset < 2)) { + dctx->r[1] = dctx->r[0]; + poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r); + dctx->rset = 2; } blocks = srclen / (POLY1305_BLOCK_SIZE * 2); - poly1305_2block_sse2(dctx->h.h, src, dctx->r.r, blocks, - sctx->u); + poly1305_2block_sse2(dctx->h.h, src, dctx->r[0].r, + blocks, dctx->r[1].r); src += POLY1305_BLOCK_SIZE * 2 * blocks; srclen -= POLY1305_BLOCK_SIZE * 2 * blocks; } if (srclen >= POLY1305_BLOCK_SIZE) { - poly1305_block_sse2(dctx->h.h, src, dctx->r.r, 1); + poly1305_block_sse2(dctx->h.h, src, dctx->r[0].r, 1); srclen -= POLY1305_BLOCK_SIZE; } return srclen; } -static int poly1305_simd_update(struct shash_desc *desc, - const u8 *src, unsigned int srclen) +void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key) { - struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); - unsigned int bytes; - - /* kernel_fpu_begin/end is costly, use fallback for small updates */ - if (srclen <= 288 || !crypto_simd_usable()) - return crypto_poly1305_update(desc, src, srclen); + poly1305_init_generic(desc, key); +} +EXPORT_SYMBOL(poly1305_init_arch); - kernel_fpu_begin(); +void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, + unsigned int srclen) +{ + unsigned int bytes; if (unlikely(dctx->buflen)) { bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen); @@ -135,34 +124,84 @@ static int poly1305_simd_update(struct shash_desc *desc, dctx->buflen += bytes; if (dctx->buflen == POLY1305_BLOCK_SIZE) { - poly1305_simd_blocks(dctx, dctx->buf, - POLY1305_BLOCK_SIZE); + if (static_branch_likely(&poly1305_use_simd) && + likely(crypto_simd_usable())) { + kernel_fpu_begin(); + poly1305_simd_blocks(dctx, dctx->buf, + POLY1305_BLOCK_SIZE); + kernel_fpu_end(); + } else { + poly1305_scalar_blocks(dctx, dctx->buf, + POLY1305_BLOCK_SIZE); + } dctx->buflen = 0; } } if (likely(srclen >= POLY1305_BLOCK_SIZE)) { - bytes = poly1305_simd_blocks(dctx, src, srclen); + if (static_branch_likely(&poly1305_use_simd) && + likely(crypto_simd_usable())) { + kernel_fpu_begin(); + bytes = poly1305_simd_blocks(dctx, src, srclen); + kernel_fpu_end(); + } else { + bytes = poly1305_scalar_blocks(dctx, src, srclen); + } src += srclen - bytes; srclen = bytes; } - kernel_fpu_end(); - if (unlikely(srclen)) { dctx->buflen = srclen; memcpy(dctx->buf, src, srclen); } +} +EXPORT_SYMBOL(poly1305_update_arch); + +void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *digest) +{ + poly1305_final_generic(desc, digest); +} +EXPORT_SYMBOL(poly1305_final_arch); + +static int crypto_poly1305_init(struct shash_desc *desc) +{ + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); + + poly1305_core_init(&dctx->h); + dctx->buflen = 0; + dctx->rset = 0; + dctx->sset = false; + + return 0; +} + +static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) +{ + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); + + if (unlikely(!dctx->sset)) + return -ENOKEY; + + poly1305_final_generic(dctx, dst); + return 0; +} + +static int poly1305_simd_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen) +{ + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); + poly1305_update_arch(dctx, src, srclen); return 0; } static struct shash_alg alg = { .digestsize = POLY1305_DIGEST_SIZE, - .init = poly1305_simd_init, + .init = crypto_poly1305_init, .update = poly1305_simd_update, .final = crypto_poly1305_final, - .descsize = sizeof(struct poly1305_simd_desc_ctx), + .descsize = sizeof(struct poly1305_desc_ctx), .base = { .cra_name = "poly1305", .cra_driver_name = "poly1305-simd", @@ -175,16 +214,16 @@ static struct shash_alg alg = { static int __init poly1305_simd_mod_init(void) { if (!boot_cpu_has(X86_FEATURE_XMM2)) - return -ENODEV; - -#ifdef CONFIG_AS_AVX2 - poly1305_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) && - boot_cpu_has(X86_FEATURE_AVX2) && - cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); - alg.descsize = sizeof(struct poly1305_simd_desc_ctx); - if (poly1305_use_avx2) - alg.descsize += 10 * sizeof(u32); -#endif + return 0; + + static_branch_enable(&poly1305_use_simd); + + if (IS_ENABLED(CONFIG_AS_AVX2) && + boot_cpu_has(X86_FEATURE_AVX) && + boot_cpu_has(X86_FEATURE_AVX2) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) + static_branch_enable(&poly1305_use_avx2); + return crypto_register_shash(&alg); } diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index e7d35f60d53f..64c3e70b0556 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -5,12 +5,14 @@ #include <linux/init.h> #include <linux/slab.h> #include <linux/delay.h> +#include <linux/jiffies.h> #include <asm/apicdef.h> #include <asm/nmi.h> #include "../perf_event.h" -static DEFINE_PER_CPU(unsigned int, perf_nmi_counter); +static DEFINE_PER_CPU(unsigned long, perf_nmi_tstamp); +static unsigned long perf_nmi_window; static __initconst const u64 amd_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] @@ -641,11 +643,12 @@ static void amd_pmu_disable_event(struct perf_event *event) * handler when multiple PMCs are active or PMC overflow while handling some * other source of an NMI. * - * Attempt to mitigate this by using the number of active PMCs to determine - * whether to return NMI_HANDLED if the perf NMI handler did not handle/reset - * any PMCs. The per-CPU perf_nmi_counter variable is set to a minimum of the - * number of active PMCs or 2. The value of 2 is used in case an NMI does not - * arrive at the LAPIC in time to be collapsed into an already pending NMI. + * Attempt to mitigate this by creating an NMI window in which un-handled NMIs + * received during this window will be claimed. This prevents extending the + * window past when it is possible that latent NMIs should be received. The + * per-CPU perf_nmi_tstamp will be set to the window end time whenever perf has + * handled a counter. When an un-handled NMI is received, it will be claimed + * only if arriving within that window. */ static int amd_pmu_handle_irq(struct pt_regs *regs) { @@ -663,21 +666,19 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) handled = x86_pmu_handle_irq(regs); /* - * If a counter was handled, record the number of possible remaining - * NMIs that can occur. + * If a counter was handled, record a timestamp such that un-handled + * NMIs will be claimed if arriving within that window. */ if (handled) { - this_cpu_write(perf_nmi_counter, - min_t(unsigned int, 2, active)); + this_cpu_write(perf_nmi_tstamp, + jiffies + perf_nmi_window); return handled; } - if (!this_cpu_read(perf_nmi_counter)) + if (time_after(jiffies, this_cpu_read(perf_nmi_tstamp))) return NMI_DONE; - this_cpu_dec(perf_nmi_counter); - return NMI_HANDLED; } @@ -909,6 +910,9 @@ static int __init amd_core_pmu_init(void) if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) return 0; + /* Avoid calulating the value each time in the NMI handler */ + perf_nmi_window = msecs_to_jiffies(100); + switch (boot_cpu_data.x86) { case 0x15: pr_cont("Fam15h "); diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 5b35b7ea5d72..26c36357c4c9 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -377,7 +377,8 @@ static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs, struct hw_perf_event *hwc, u64 config) { config &= ~perf_ibs->cnt_mask; - wrmsrl(hwc->config_base, config); + if (boot_cpu_data.x86 == 0x10) + wrmsrl(hwc->config_base, config); config &= ~perf_ibs->enable_mask; wrmsrl(hwc->config_base, config); } @@ -553,7 +554,8 @@ static struct perf_ibs perf_ibs_op = { }, .msr = MSR_AMD64_IBSOPCTL, .config_mask = IBS_OP_CONFIG_MASK, - .cnt_mask = IBS_OP_MAX_CNT, + .cnt_mask = IBS_OP_MAX_CNT | IBS_OP_CUR_CNT | + IBS_OP_CUR_CNT_RAND, .enable_mask = IBS_OP_ENABLE, .valid_mask = IBS_OP_VAL, .max_period = IBS_OP_MAX_CNT << 4, @@ -614,7 +616,7 @@ fail: if (event->attr.sample_type & PERF_SAMPLE_RAW) offset_max = perf_ibs->offset_max; else if (check_rip) - offset_max = 2; + offset_max = 3; else offset_max = 1; do { diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 27ee47a7be66..937363b803c1 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3323,8 +3323,19 @@ static int intel_pmu_hw_config(struct perf_event *event) return 0; } +#ifdef CONFIG_RETPOLINE +static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr); +static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr); +#endif + struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr) { +#ifdef CONFIG_RETPOLINE + if (x86_pmu.guest_get_msrs == intel_guest_get_msrs) + return intel_guest_get_msrs(nr); + else if (x86_pmu.guest_get_msrs == core_guest_get_msrs) + return core_guest_get_msrs(nr); +#endif if (x86_pmu.guest_get_msrs) return x86_pmu.guest_get_msrs(nr); *nr = 0; @@ -4983,6 +4994,8 @@ __init int intel_pmu_init(void) case INTEL_FAM6_SKYLAKE: case INTEL_FAM6_KABYLAKE_L: case INTEL_FAM6_KABYLAKE: + case INTEL_FAM6_COMETLAKE_L: + case INTEL_FAM6_COMETLAKE: x86_add_quirk(intel_pebs_isolation_quirk); x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -5031,6 +5044,8 @@ __init int intel_pmu_init(void) /* fall through */ case INTEL_FAM6_ICELAKE_L: case INTEL_FAM6_ICELAKE: + case INTEL_FAM6_TIGERLAKE_L: + case INTEL_FAM6_TIGERLAKE: x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 9f2f39003d96..e1daf4151e11 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -45,46 +45,49 @@ * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter * perf code: 0x01 * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,GLM, - CNL + * CNL,KBL,CML * Scope: Core * MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter * perf code: 0x02 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, - * SKL,KNL,GLM,CNL + * SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL * Scope: Core * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter * perf code: 0x03 - * Available model: SNB,IVB,HSW,BDW,SKL,CNL + * Available model: SNB,IVB,HSW,BDW,SKL,CNL,KBL,CML, + * ICL,TGL * Scope: Core * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. * perf code: 0x00 - * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL + * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL, + * KBL,CML,ICL,TGL * Scope: Package (physical package) * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. * perf code: 0x01 * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL, - * GLM,CNL + * GLM,CNL,KBL,CML,ICL,TGL * Scope: Package (physical package) * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter. * perf code: 0x02 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW - * SKL,KNL,GLM,CNL + * SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL * Scope: Package (physical package) * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. * perf code: 0x03 - * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL + * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL, + * KBL,CML,ICL,TGL * Scope: Package (physical package) * MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter. * perf code: 0x04 - * Available model: HSW ULT,KBL,CNL + * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL * Scope: Package (physical package) * MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter. * perf code: 0x05 - * Available model: HSW ULT,KBL,CNL + * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL * Scope: Package (physical package) * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter. * perf code: 0x06 - * Available model: HSW ULT,KBL,GLM,CNL + * Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL * Scope: Package (physical package) * */ @@ -544,6 +547,19 @@ static const struct cstate_model cnl_cstates __initconst = { BIT(PERF_CSTATE_PKG_C10_RES), }; +static const struct cstate_model icl_cstates __initconst = { + .core_events = BIT(PERF_CSTATE_CORE_C6_RES) | + BIT(PERF_CSTATE_CORE_C7_RES), + + .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) | + BIT(PERF_CSTATE_PKG_C3_RES) | + BIT(PERF_CSTATE_PKG_C6_RES) | + BIT(PERF_CSTATE_PKG_C7_RES) | + BIT(PERF_CSTATE_PKG_C8_RES) | + BIT(PERF_CSTATE_PKG_C9_RES) | + BIT(PERF_CSTATE_PKG_C10_RES), +}; + static const struct cstate_model slm_cstates __initconst = { .core_events = BIT(PERF_CSTATE_CORE_C1_RES) | BIT(PERF_CSTATE_CORE_C6_RES), @@ -614,6 +630,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_L, hswult_cstates), X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE, hswult_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_COMETLAKE_L, hswult_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_COMETLAKE, hswult_cstates), X86_CSTATES_MODEL(INTEL_FAM6_CANNONLAKE_L, cnl_cstates), @@ -625,8 +643,10 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_L, snb_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_L, icl_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE, icl_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_TIGERLAKE_L, icl_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_TIGERLAKE, icl_cstates), { }, }; MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 74e80ed9c6c4..05e43d0f430b 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -627,7 +627,7 @@ static struct topa *topa_alloc(int cpu, gfp_t gfp) * link as the 2nd entry in the table */ if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) { - TOPA_ENTRY(&tp->topa, 1)->base = page_to_phys(p); + TOPA_ENTRY(&tp->topa, 1)->base = page_to_phys(p) >> TOPA_SHIFT; TOPA_ENTRY(&tp->topa, 1)->end = 1; } diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 6fc2e06ab4c6..86467f85c383 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -502,10 +502,8 @@ void uncore_pmu_event_start(struct perf_event *event, int flags) local64_set(&event->hw.prev_count, uncore_read_counter(box, event)); uncore_enable_event(box, event); - if (box->n_active == 1) { - uncore_enable_box(box); + if (box->n_active == 1) uncore_pmu_start_hrtimer(box); - } } void uncore_pmu_event_stop(struct perf_event *event, int flags) @@ -529,10 +527,8 @@ void uncore_pmu_event_stop(struct perf_event *event, int flags) WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); hwc->state |= PERF_HES_STOPPED; - if (box->n_active == 0) { - uncore_disable_box(box); + if (box->n_active == 0) uncore_pmu_cancel_hrtimer(box); - } } if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { @@ -778,6 +774,40 @@ static int uncore_pmu_event_init(struct perf_event *event) return ret; } +static void uncore_pmu_enable(struct pmu *pmu) +{ + struct intel_uncore_pmu *uncore_pmu; + struct intel_uncore_box *box; + + uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu); + if (!uncore_pmu) + return; + + box = uncore_pmu_to_box(uncore_pmu, smp_processor_id()); + if (!box) + return; + + if (uncore_pmu->type->ops->enable_box) + uncore_pmu->type->ops->enable_box(box); +} + +static void uncore_pmu_disable(struct pmu *pmu) +{ + struct intel_uncore_pmu *uncore_pmu; + struct intel_uncore_box *box; + + uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu); + if (!uncore_pmu) + return; + + box = uncore_pmu_to_box(uncore_pmu, smp_processor_id()); + if (!box) + return; + + if (uncore_pmu->type->ops->disable_box) + uncore_pmu->type->ops->disable_box(box); +} + static ssize_t uncore_get_attr_cpumask(struct device *dev, struct device_attribute *attr, char *buf) { @@ -803,6 +833,8 @@ static int uncore_pmu_register(struct intel_uncore_pmu *pmu) pmu->pmu = (struct pmu) { .attr_groups = pmu->type->attr_groups, .task_ctx_nr = perf_invalid_context, + .pmu_enable = uncore_pmu_enable, + .pmu_disable = uncore_pmu_disable, .event_init = uncore_pmu_event_init, .add = uncore_pmu_event_add, .del = uncore_pmu_event_del, diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index f36f7bebbc1b..bbfdaa720b45 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -441,18 +441,6 @@ static inline int uncore_freerunning_hw_config(struct intel_uncore_box *box, return -EINVAL; } -static inline void uncore_disable_box(struct intel_uncore_box *box) -{ - if (box->pmu->type->ops->disable_box) - box->pmu->type->ops->disable_box(box); -} - -static inline void uncore_enable_box(struct intel_uncore_box *box) -{ - if (box->pmu->type->ops->enable_box) - box->pmu->type->ops->enable_box(box); -} - static inline void uncore_disable_event(struct intel_uncore_box *box, struct perf_event *event) { diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index b1afc77f0704..6f86650b3f77 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -89,7 +89,14 @@ static bool test_intel(int idx, void *data) case INTEL_FAM6_SKYLAKE_X: case INTEL_FAM6_KABYLAKE_L: case INTEL_FAM6_KABYLAKE: + case INTEL_FAM6_COMETLAKE_L: + case INTEL_FAM6_COMETLAKE: case INTEL_FAM6_ICELAKE_L: + case INTEL_FAM6_ICELAKE: + case INTEL_FAM6_ICELAKE_X: + case INTEL_FAM6_ICELAKE_D: + case INTEL_FAM6_TIGERLAKE_L: + case INTEL_FAM6_TIGERLAKE: if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF) return true; break; diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index 5c056b8aebef..e01078e93dd3 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -260,11 +260,21 @@ void __init hv_apic_init(void) } if (ms_hyperv.hints & HV_X64_APIC_ACCESS_RECOMMENDED) { - pr_info("Hyper-V: Using MSR based APIC access\n"); + pr_info("Hyper-V: Using enlightened APIC (%s mode)", + x2apic_enabled() ? "x2apic" : "xapic"); + /* + * With x2apic, architectural x2apic MSRs are equivalent to the + * respective synthetic MSRs, so there's no need to override + * the apic accessors. The only exception is + * hv_apic_eoi_write, because it benefits from lazy EOI when + * available, but it works for both xapic and x2apic modes. + */ apic_set_eoi_write(hv_apic_eoi_write); - apic->read = hv_apic_read; - apic->write = hv_apic_write; - apic->icr_write = hv_apic_icr_write; - apic->icr_read = hv_apic_icr_read; + if (!x2apic_enabled()) { + apic->read = hv_apic_read; + apic->write = hv_apic_write; + apic->icr_write = hv_apic_icr_write; + apic->icr_read = hv_apic_icr_read; + } } } diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index cff3f3f3bfe0..8348f7d69fd5 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_CPU_ENTRY_AREA_H #define _ASM_X86_CPU_ENTRY_AREA_H diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 0652d3eed9bd..c4fbe379cc0b 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -399,5 +399,7 @@ #define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ #define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ #define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */ +#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ +#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index f04622500da3..c606c0b70738 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -83,6 +83,9 @@ #define INTEL_FAM6_TIGERLAKE_L 0x8C #define INTEL_FAM6_TIGERLAKE 0x8D +#define INTEL_FAM6_COMETLAKE 0xA5 +#define INTEL_FAM6_COMETLAKE_L 0xA6 + /* "Small Core" Processors (Atom) */ #define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 23edf56cf577..b79cd6aa4075 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -156,10 +156,8 @@ enum kvm_reg { VCPU_REGS_R15 = __VCPU_REGS_R15, #endif VCPU_REGS_RIP, - NR_VCPU_REGS -}; + NR_VCPU_REGS, -enum kvm_reg_ex { VCPU_EXREG_PDPTR = NR_VCPU_REGS, VCPU_EXREG_CR3, VCPU_EXREG_RFLAGS, @@ -219,13 +217,6 @@ enum { PFERR_WRITE_MASK | \ PFERR_PRESENT_MASK) -/* - * The mask used to denote special SPTEs, which can be either MMIO SPTEs or - * Access Tracking SPTEs. We use bit 62 instead of bit 63 to avoid conflicting - * with the SVE bit in EPT PTEs. - */ -#define SPTE_SPECIAL_MASK (1ULL << 62) - /* apic attention bits */ #define KVM_APIC_CHECK_VAPIC 0 /* @@ -319,9 +310,12 @@ struct kvm_rmap_head { struct kvm_mmu_page { struct list_head link; struct hlist_node hash_link; + struct list_head lpage_disallowed_link; + bool unsync; u8 mmu_valid_gen; bool mmio_cached; + bool lpage_disallowed; /* Can't be replaced by an equiv large page */ /* * The following two entries are used to key the shadow page in the @@ -458,6 +452,11 @@ struct kvm_pmc { u64 eventsel; struct perf_event *perf_event; struct kvm_vcpu *vcpu; + /* + * eventsel value for general purpose counters, + * ctrl value for fixed counters. + */ + u64 current_config; }; struct kvm_pmu { @@ -476,7 +475,21 @@ struct kvm_pmu { struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC]; struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED]; struct irq_work irq_work; - u64 reprogram_pmi; + DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX); + DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX); + DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX); + + /* + * The gate to release perf_events not marked in + * pmc_in_use only once in a vcpu time slice. + */ + bool need_cleanup; + + /* + * The total number of programmed perf_events and it helps to avoid + * redundant check before cleanup if guest don't use vPMU at all. + */ + u8 event_count; }; struct kvm_pmu_ops; @@ -569,6 +582,7 @@ struct kvm_vcpu_arch { u64 smbase; u64 smi_count; bool tpr_access_reporting; + bool xsaves_enabled; u64 ia32_xss; u64 microcode_version; u64 arch_capabilities; @@ -866,6 +880,7 @@ struct kvm_arch { */ struct list_head active_mmu_pages; struct list_head zapped_obsolete_pages; + struct list_head lpage_disallowed_mmu_pages; struct kvm_page_track_notifier_node mmu_sp_tracker; struct kvm_page_track_notifier_head track_notifier_head; @@ -940,6 +955,7 @@ struct kvm_arch { bool exception_payload_enabled; struct kvm_pmu_event_filter *pmu_event_filter; + struct task_struct *nx_lpage_recovery_thread; }; struct kvm_vm_stat { @@ -953,6 +969,7 @@ struct kvm_vm_stat { ulong mmu_unsync; ulong remote_tlb_flush; ulong lpages; + ulong nx_lpage_splits; ulong max_mmu_page_hash_collisions; }; @@ -1042,7 +1059,6 @@ struct kvm_x86_ops { struct kvm_segment *var, int seg); void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu); - void (*decache_cr3)(struct kvm_vcpu *vcpu); void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu); void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); @@ -1091,7 +1107,7 @@ struct kvm_x86_ops { void (*enable_nmi_window)(struct kvm_vcpu *vcpu); void (*enable_irq_window)(struct kvm_vcpu *vcpu); void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); - bool (*get_enable_apicv)(struct kvm_vcpu *vcpu); + bool (*get_enable_apicv)(struct kvm *kvm); void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr); @@ -1196,7 +1212,7 @@ struct kvm_x86_ops { int (*set_nested_state)(struct kvm_vcpu *vcpu, struct kvm_nested_state __user *user_kvm_nested_state, struct kvm_nested_state *kvm_state); - void (*get_vmcs12_pages)(struct kvm_vcpu *vcpu); + bool (*get_vmcs12_pages)(struct kvm_vcpu *vcpu); int (*smi_allowed)(struct kvm_vcpu *vcpu); int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate); @@ -1358,6 +1374,7 @@ int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, void kvm_enable_efer_bits(u64); bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer); +int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated); int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data); int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data); int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu); @@ -1578,6 +1595,8 @@ bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); void kvm_make_mclock_inprogress_request(struct kvm *kvm); void kvm_make_scan_ioapic_request(struct kvm *kvm); +void kvm_make_scan_ioapic_request_mask(struct kvm *kvm, + unsigned long *vcpu_bitmap); void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 20ce682a2540..084e98da04a7 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -93,6 +93,18 @@ * Microarchitectural Data * Sampling (MDS) vulnerabilities. */ +#define ARCH_CAP_PSCHANGE_MC_NO BIT(6) /* + * The processor is not susceptible to a + * machine check error due to modifying the + * code page size along with either the + * physical address or cache type + * without TLB invalidation. + */ +#define ARCH_CAP_TSX_CTRL_MSR BIT(7) /* MSR for TSX control is available. */ +#define ARCH_CAP_TAA_NO BIT(8) /* + * Not susceptible to + * TSX Async Abort (TAA) vulnerabilities. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* @@ -103,6 +115,10 @@ #define MSR_IA32_BBL_CR_CTL 0x00000119 #define MSR_IA32_BBL_CR_CTL3 0x0000011e +#define MSR_IA32_TSX_CTRL 0x00000122 +#define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */ +#define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */ + #define MSR_IA32_SYSENTER_CS 0x00000174 #define MSR_IA32_SYSENTER_ESP 0x00000175 #define MSR_IA32_SYSENTER_EIP 0x00000176 @@ -393,6 +409,8 @@ #define MSR_AMD_PSTATE_DEF_BASE 0xc0010064 #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 #define MSR_AMD64_OSVW_STATUS 0xc0010141 +#define MSR_AMD_PPIN_CTL 0xc00102f0 +#define MSR_AMD_PPIN 0xc00102f1 #define MSR_AMD64_LS_CFG 0xc0011020 #define MSR_AMD64_DC_CFG 0xc0011022 #define MSR_AMD64_BU_CFG2 0xc001102a diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index e28f8b723b5c..9d5252c9685c 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -21,7 +21,7 @@ #define MWAIT_ECX_INTERRUPT_BREAK 0x1 #define MWAITX_ECX_TIMER_ENABLE BIT(1) #define MWAITX_MAX_LOOPS ((u32)-1) -#define MWAITX_DISABLE_CSTATES 0xf +#define MWAITX_DISABLE_CSTATES 0xf0 static inline void __monitor(const void *eax, unsigned long ecx, unsigned long edx) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 80bc209c0708..5c24a7b35166 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -314,7 +314,7 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear); #include <asm/segment.h> /** - * mds_clear_cpu_buffers - Mitigation for MDS vulnerability + * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability * * This uses the otherwise unused and obsolete VERW instruction in * combination with microcode which triggers a CPU buffer flush when the @@ -337,7 +337,7 @@ static inline void mds_clear_cpu_buffers(void) } /** - * mds_user_clear_cpu_buffers - Mitigation for MDS vulnerability + * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability * * Clear CPU buffers if the corresponding static key is enabled */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 0bc530c4eb13..ad97dc155195 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1463,6 +1463,12 @@ static inline bool arch_has_pfn_modify_check(void) return boot_cpu_has_bug(X86_BUG_L1TF); } +#define arch_faults_on_old_pte arch_faults_on_old_pte +static inline bool arch_faults_on_old_pte(void) +{ + return false; +} + #include <asm-generic/pgtable.h> #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 6e0a3b43d027..54f5d54280f6 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -988,4 +988,11 @@ enum mds_mitigations { MDS_MITIGATION_VMWERV, }; +enum taa_mitigations { + TAA_MITIGATION_OFF, + TAA_MITIGATION_UCODE_NEEDED, + TAA_MITIGATION_VERW, + TAA_MITIGATION_TSX_DISABLED, +}; + #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h index 5df09a0b80b8..07375b476c4f 100644 --- a/arch/x86/include/asm/pti.h +++ b/arch/x86/include/asm/pti.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_PTI_H #define _ASM_X86_PTI_H #ifndef __ASSEMBLY__ diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index 5e8319bb207a..23c626a742e8 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -26,10 +26,11 @@ static inline void apply_paravirt(struct paravirt_patch_site *start, #define POKE_MAX_OPCODE_SIZE 5 struct text_poke_loc { - void *detour; void *addr; - size_t len; - const char opcode[POKE_MAX_OPCODE_SIZE]; + int len; + s32 rel32; + u8 opcode; + const u8 text[POKE_MAX_OPCODE_SIZE]; }; extern void text_poke_early(void *addr, const void *opcode, size_t len); @@ -51,8 +52,10 @@ extern void text_poke_early(void *addr, const void *opcode, size_t len); extern void *text_poke(void *addr, const void *opcode, size_t len); extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len); extern int poke_int3_handler(struct pt_regs *regs); -extern void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler); +extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate); extern void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries); +extern void text_poke_loc_init(struct text_poke_loc *tp, void *addr, + const void *opcode, size_t len, const void *emulate); extern int after_bootmem; extern __ro_after_init struct mm_struct *poking_mm; extern __ro_after_init unsigned long poking_addr; @@ -63,8 +66,17 @@ static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip) regs->ip = ip; } -#define INT3_INSN_SIZE 1 -#define CALL_INSN_SIZE 5 +#define INT3_INSN_SIZE 1 +#define INT3_INSN_OPCODE 0xCC + +#define CALL_INSN_SIZE 5 +#define CALL_INSN_OPCODE 0xE8 + +#define JMP32_INSN_SIZE 5 +#define JMP32_INSN_OPCODE 0xE9 + +#define JMP8_INSN_SIZE 2 +#define JMP8_INSN_OPCODE 0xEB static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val) { diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 35c225ede0e4..61d93f062a36 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -734,5 +734,28 @@ do { \ if (unlikely(__gu_err)) goto err_label; \ } while (0) +/* + * We want the unsafe accessors to always be inlined and use + * the error labels - thus the macro games. + */ +#define unsafe_copy_loop(dst, src, len, type, label) \ + while (len >= sizeof(type)) { \ + unsafe_put_user(*(type *)src,(type __user *)dst,label); \ + dst += sizeof(type); \ + src += sizeof(type); \ + len -= sizeof(type); \ + } + +#define unsafe_copy_to_user(_dst,_src,_len,label) \ +do { \ + char __user *__ucu_dst = (_dst); \ + const char *__ucu_src = (_src); \ + size_t __ucu_len = (_len); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label); \ +} while (0) + #endif /* _ASM_X86_UACCESS_H */ diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h index e00c9e875933..ac9fc51e2b18 100644 --- a/arch/x86/include/asm/vmware.h +++ b/arch/x86/include/asm/vmware.h @@ -4,6 +4,7 @@ #include <asm/cpufeatures.h> #include <asm/alternative.h> +#include <linux/stringify.h> /* * The hypercall definitions differ in the low word of the %edx argument @@ -20,8 +21,8 @@ */ /* Old port-based version */ -#define VMWARE_HYPERVISOR_PORT "0x5658" -#define VMWARE_HYPERVISOR_PORT_HB "0x5659" +#define VMWARE_HYPERVISOR_PORT 0x5658 +#define VMWARE_HYPERVISOR_PORT_HB 0x5659 /* Current vmcall / vmmcall version */ #define VMWARE_HYPERVISOR_HB BIT(0) @@ -29,7 +30,8 @@ /* The low bandwidth call. The low word of edx is presumed clear. */ #define VMWARE_HYPERCALL \ - ALTERNATIVE_2("movw $" VMWARE_HYPERVISOR_PORT ", %%dx; inl (%%dx)", \ + ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT) ", %%dx; " \ + "inl (%%dx), %%eax", \ "vmcall", X86_FEATURE_VMCALL, \ "vmmcall", X86_FEATURE_VMW_VMMCALL) @@ -38,7 +40,8 @@ * HB and OUT bits set. */ #define VMWARE_HYPERCALL_HB_OUT \ - ALTERNATIVE_2("movw $" VMWARE_HYPERVISOR_PORT_HB ", %%dx; rep outsb", \ + ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT_HB) ", %%dx; " \ + "rep outsb", \ "vmcall", X86_FEATURE_VMCALL, \ "vmmcall", X86_FEATURE_VMW_VMMCALL) @@ -47,7 +50,8 @@ * HB bit set. */ #define VMWARE_HYPERCALL_HB_IN \ - ALTERNATIVE_2("movw $" VMWARE_HYPERVISOR_PORT_HB ", %%dx; rep insb", \ + ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT_HB) ", %%dx; " \ + "rep insb", \ "vmcall", X86_FEATURE_VMCALL, \ "vmmcall", X86_FEATURE_VMW_VMMCALL) #endif diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 9d3a971ea364..9ec463fe96f2 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -956,16 +956,15 @@ NOKPROBE_SYMBOL(patch_cmp); int poke_int3_handler(struct pt_regs *regs) { struct text_poke_loc *tp; - unsigned char int3 = 0xcc; void *ip; /* * Having observed our INT3 instruction, we now must observe * bp_patching.nr_entries. * - * nr_entries != 0 INT3 - * WMB RMB - * write INT3 if (nr_entries) + * nr_entries != 0 INT3 + * WMB RMB + * write INT3 if (nr_entries) * * Idem for other elements in bp_patching. */ @@ -978,9 +977,9 @@ int poke_int3_handler(struct pt_regs *regs) return 0; /* - * Discount the sizeof(int3). See text_poke_bp_batch(). + * Discount the INT3. See text_poke_bp_batch(). */ - ip = (void *) regs->ip - sizeof(int3); + ip = (void *) regs->ip - INT3_INSN_SIZE; /* * Skip the binary search if there is a single member in the vector. @@ -997,8 +996,28 @@ int poke_int3_handler(struct pt_regs *regs) return 0; } - /* set up the specified breakpoint detour */ - regs->ip = (unsigned long) tp->detour; + ip += tp->len; + + switch (tp->opcode) { + case INT3_INSN_OPCODE: + /* + * Someone poked an explicit INT3, they'll want to handle it, + * do not consume. + */ + return 0; + + case CALL_INSN_OPCODE: + int3_emulate_call(regs, (long)ip + tp->rel32); + break; + + case JMP32_INSN_OPCODE: + case JMP8_INSN_OPCODE: + int3_emulate_jmp(regs, (long)ip + tp->rel32); + break; + + default: + BUG(); + } return 1; } @@ -1014,7 +1033,7 @@ NOKPROBE_SYMBOL(poke_int3_handler); * synchronization using int3 breakpoint. * * The way it is done: - * - For each entry in the vector: + * - For each entry in the vector: * - add a int3 trap to the address that will be patched * - sync cores * - For each entry in the vector: @@ -1027,9 +1046,9 @@ NOKPROBE_SYMBOL(poke_int3_handler); */ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) { - int patched_all_but_first = 0; - unsigned char int3 = 0xcc; + unsigned char int3 = INT3_INSN_OPCODE; unsigned int i; + int do_sync; lockdep_assert_held(&text_mutex); @@ -1053,16 +1072,16 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) /* * Second step: update all but the first byte of the patched range. */ - for (i = 0; i < nr_entries; i++) { + for (do_sync = 0, i = 0; i < nr_entries; i++) { if (tp[i].len - sizeof(int3) > 0) { text_poke((char *)tp[i].addr + sizeof(int3), - (const char *)tp[i].opcode + sizeof(int3), + (const char *)tp[i].text + sizeof(int3), tp[i].len - sizeof(int3)); - patched_all_but_first++; + do_sync++; } } - if (patched_all_but_first) { + if (do_sync) { /* * According to Intel, this core syncing is very likely * not necessary and we'd be safe even without it. But @@ -1075,10 +1094,17 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) * Third step: replace the first byte (int3) by the first byte of * replacing opcode. */ - for (i = 0; i < nr_entries; i++) - text_poke(tp[i].addr, tp[i].opcode, sizeof(int3)); + for (do_sync = 0, i = 0; i < nr_entries; i++) { + if (tp[i].text[0] == INT3_INSN_OPCODE) + continue; + + text_poke(tp[i].addr, tp[i].text, sizeof(int3)); + do_sync++; + } + + if (do_sync) + on_each_cpu(do_sync_core, NULL, 1); - on_each_cpu(do_sync_core, NULL, 1); /* * sync_core() implies an smp_mb() and orders this store against * the writing of the new instruction. @@ -1087,6 +1113,60 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) bp_patching.nr_entries = 0; } +void text_poke_loc_init(struct text_poke_loc *tp, void *addr, + const void *opcode, size_t len, const void *emulate) +{ + struct insn insn; + + if (!opcode) + opcode = (void *)tp->text; + else + memcpy((void *)tp->text, opcode, len); + + if (!emulate) + emulate = opcode; + + kernel_insn_init(&insn, emulate, MAX_INSN_SIZE); + insn_get_length(&insn); + + BUG_ON(!insn_complete(&insn)); + BUG_ON(len != insn.length); + + tp->addr = addr; + tp->len = len; + tp->opcode = insn.opcode.bytes[0]; + + switch (tp->opcode) { + case INT3_INSN_OPCODE: + break; + + case CALL_INSN_OPCODE: + case JMP32_INSN_OPCODE: + case JMP8_INSN_OPCODE: + tp->rel32 = insn.immediate.value; + break; + + default: /* assume NOP */ + switch (len) { + case 2: /* NOP2 -- emulate as JMP8+0 */ + BUG_ON(memcmp(emulate, ideal_nops[len], len)); + tp->opcode = JMP8_INSN_OPCODE; + tp->rel32 = 0; + break; + + case 5: /* NOP5 -- emulate as JMP32+0 */ + BUG_ON(memcmp(emulate, ideal_nops[NOP_ATOMIC5], len)); + tp->opcode = JMP32_INSN_OPCODE; + tp->rel32 = 0; + break; + + default: /* unknown instruction */ + BUG(); + } + break; + } +} + /** * text_poke_bp() -- update instructions on live kernel on SMP * @addr: address to patch @@ -1098,20 +1178,10 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) * dynamically allocated memory. This function should be used when it is * not possible to allocate memory. */ -void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler) +void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate) { - struct text_poke_loc tp = { - .detour = handler, - .addr = addr, - .len = len, - }; - - if (len > POKE_MAX_OPCODE_SIZE) { - WARN_ONCE(1, "len is larger than %d\n", POKE_MAX_OPCODE_SIZE); - return; - } - - memcpy((void *)tp.opcode, opcode, len); + struct text_poke_loc tp; + text_poke_loc_init(&tp, addr, opcode, len, emulate); text_poke_bp_batch(&tp, 1); } diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index a6ac3712db8b..4bbccb9d16dc 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -510,10 +510,9 @@ static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) iommu_size -= round_up(a, PMD_PAGE_SIZE) - a; if (iommu_size < 64*1024*1024) { - pr_warning( - "PCI-DMA: Warning: Small IOMMU %luMB." + pr_warn("PCI-DMA: Warning: Small IOMMU %luMB." " Consider increasing the AGP aperture in BIOS\n", - iommu_size >> 20); + iommu_size >> 20); } return iommu_size; @@ -665,8 +664,7 @@ static __init int init_amd_gatt(struct agp_kern_info *info) nommu: /* Should not happen anymore */ - pr_warning("PCI-DMA: More than 4GB of RAM and no IOMMU\n" - "falling back to iommu=soft.\n"); + pr_warn("PCI-DMA: More than 4GB of RAM and no IOMMU - falling back to iommu=soft.\n"); return -1; } @@ -733,8 +731,8 @@ int __init gart_iommu_init(void) !gart_iommu_aperture || (no_agp && init_amd_gatt(&info) < 0)) { if (max_pfn > MAX_DMA32_PFN) { - pr_warning("More than 4GB of memory but GART IOMMU not available.\n"); - pr_warning("falling back to iommu=soft.\n"); + pr_warn("More than 4GB of memory but GART IOMMU not available.\n"); + pr_warn("falling back to iommu=soft.\n"); } return 0; } diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 81471064235c..28446fa6bf18 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -780,8 +780,8 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) res = (((u64)deltapm) * mult) >> 22; do_div(res, 1000000); - pr_warning("APIC calibration not consistent " - "with PM-Timer: %ldms instead of 100ms\n",(long)res); + pr_warn("APIC calibration not consistent " + "with PM-Timer: %ldms instead of 100ms\n", (long)res); /* Correct the lapic counter value */ res = (((u64)(*delta)) * pm_100ms); @@ -977,7 +977,7 @@ static int __init calibrate_APIC_clock(void) */ if (lapic_timer_period < (1000000 / HZ)) { local_irq_enable(); - pr_warning("APIC frequency too slow, disabling apic timer\n"); + pr_warn("APIC frequency too slow, disabling apic timer\n"); return -1; } @@ -1021,7 +1021,7 @@ static int __init calibrate_APIC_clock(void) local_irq_enable(); if (levt->features & CLOCK_EVT_FEAT_DUMMY) { - pr_warning("APIC timer disabled due to verification failure\n"); + pr_warn("APIC timer disabled due to verification failure\n"); return -1; } @@ -1095,8 +1095,8 @@ static void local_apic_timer_interrupt(void) * spurious. */ if (!evt->event_handler) { - pr_warning("Spurious LAPIC timer interrupt on cpu %d\n", - smp_processor_id()); + pr_warn("Spurious LAPIC timer interrupt on cpu %d\n", + smp_processor_id()); /* Switch it off */ lapic_timer_shutdown(evt); return; @@ -1586,9 +1586,6 @@ static void setup_local_APIC(void) { int cpu = smp_processor_id(); unsigned int value; -#ifdef CONFIG_X86_32 - int logical_apicid, ldr_apicid; -#endif if (disable_apic) { disable_ioapic_support(); @@ -1626,16 +1623,21 @@ static void setup_local_APIC(void) apic->init_apic_ldr(); #ifdef CONFIG_X86_32 - /* - * APIC LDR is initialized. If logical_apicid mapping was - * initialized during get_smp_config(), make sure it matches the - * actual value. - */ - logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); - ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); - WARN_ON(logical_apicid != BAD_APICID && logical_apicid != ldr_apicid); - /* always use the value from LDR */ - early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid; + if (apic->dest_logical) { + int logical_apicid, ldr_apicid; + + /* + * APIC LDR is initialized. If logical_apicid mapping was + * initialized during get_smp_config(), make sure it matches + * the actual value. + */ + logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); + ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); + if (logical_apicid != BAD_APICID) + WARN_ON(logical_apicid != ldr_apicid); + /* Always use the value from LDR. */ + early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid; + } #endif /* @@ -1809,11 +1811,11 @@ static int __init setup_nox2apic(char *str) int apicid = native_apic_msr_read(APIC_ID); if (apicid >= 255) { - pr_warning("Apicid: %08x, cannot enforce nox2apic\n", - apicid); + pr_warn("Apicid: %08x, cannot enforce nox2apic\n", + apicid); return 0; } - pr_warning("x2apic already enabled.\n"); + pr_warn("x2apic already enabled.\n"); __x2apic_disable(); } setup_clear_cpu_cap(X86_FEATURE_X2APIC); @@ -1981,7 +1983,7 @@ static int __init apic_verify(void) */ features = cpuid_edx(1); if (!(features & (1 << X86_FEATURE_APIC))) { - pr_warning("Could not enable APIC!\n"); + pr_warn("Could not enable APIC!\n"); return -1; } set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); @@ -2408,9 +2410,8 @@ int generic_processor_info(int apicid, int version) disabled_cpu_apicid == apicid) { int thiscpu = num_processors + disabled_cpus; - pr_warning("APIC: Disabling requested cpu." - " Processor %d/0x%x ignored.\n", - thiscpu, apicid); + pr_warn("APIC: Disabling requested cpu." + " Processor %d/0x%x ignored.\n", thiscpu, apicid); disabled_cpus++; return -ENODEV; @@ -2424,8 +2425,7 @@ int generic_processor_info(int apicid, int version) apicid != boot_cpu_physical_apicid) { int thiscpu = max + disabled_cpus - 1; - pr_warning( - "APIC: NR_CPUS/possible_cpus limit of %i almost" + pr_warn("APIC: NR_CPUS/possible_cpus limit of %i almost" " reached. Keeping one slot for boot cpu." " Processor %d/0x%x ignored.\n", max, thiscpu, apicid); @@ -2436,9 +2436,8 @@ int generic_processor_info(int apicid, int version) if (num_processors >= nr_cpu_ids) { int thiscpu = max + disabled_cpus; - pr_warning("APIC: NR_CPUS/possible_cpus limit of %i " - "reached. Processor %d/0x%x ignored.\n", - max, thiscpu, apicid); + pr_warn("APIC: NR_CPUS/possible_cpus limit of %i reached. " + "Processor %d/0x%x ignored.\n", max, thiscpu, apicid); disabled_cpus++; return -EINVAL; @@ -2468,13 +2467,13 @@ int generic_processor_info(int apicid, int version) * Validate version */ if (version == 0x0) { - pr_warning("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n", - cpu, apicid); + pr_warn("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n", + cpu, apicid); version = 0x10; } if (version != boot_cpu_apic_version) { - pr_warning("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n", + pr_warn("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n", boot_cpu_apic_version, cpu, version); } @@ -2843,7 +2842,7 @@ static int __init apic_set_verbosity(char *arg) apic_verbosity = APIC_VERBOSE; #ifdef CONFIG_X86_64 else { - pr_warning("APIC Verbosity level %s not recognised" + pr_warn("APIC Verbosity level %s not recognised" " use apic=verbose or apic=debug\n", arg); return -EINVAL; } diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index d6af97fd170a..913c88617848 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1725,19 +1725,20 @@ static bool io_apic_level_ack_pending(struct mp_chip_data *data) return false; } -static inline bool ioapic_irqd_mask(struct irq_data *data) +static inline bool ioapic_prepare_move(struct irq_data *data) { - /* If we are moving the irq we need to mask it */ + /* If we are moving the IRQ we need to mask it */ if (unlikely(irqd_is_setaffinity_pending(data))) { - mask_ioapic_irq(data); + if (!irqd_irq_masked(data)) + mask_ioapic_irq(data); return true; } return false; } -static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked) +static inline void ioapic_finish_move(struct irq_data *data, bool moveit) { - if (unlikely(masked)) { + if (unlikely(moveit)) { /* Only migrate the irq if the ack has been received. * * On rare occasions the broadcast level triggered ack gets @@ -1766,15 +1767,17 @@ static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked) */ if (!io_apic_level_ack_pending(data->chip_data)) irq_move_masked_irq(data); - unmask_ioapic_irq(data); + /* If the IRQ is masked in the core, leave it: */ + if (!irqd_irq_masked(data)) + unmask_ioapic_irq(data); } } #else -static inline bool ioapic_irqd_mask(struct irq_data *data) +static inline bool ioapic_prepare_move(struct irq_data *data) { return false; } -static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked) +static inline void ioapic_finish_move(struct irq_data *data, bool moveit) { } #endif @@ -1783,11 +1786,11 @@ static void ioapic_ack_level(struct irq_data *irq_data) { struct irq_cfg *cfg = irqd_cfg(irq_data); unsigned long v; - bool masked; + bool moveit; int i; irq_complete_move(cfg); - masked = ioapic_irqd_mask(irq_data); + moveit = ioapic_prepare_move(irq_data); /* * It appears there is an erratum which affects at least version 0x11 @@ -1842,7 +1845,7 @@ static void ioapic_ack_level(struct irq_data *irq_data) eoi_ioapic_pin(cfg->vector, irq_data->chip_data); } - ioapic_irqd_unmask(irq_data, masked); + ioapic_finish_move(irq_data, moveit); } static void ioapic_ir_ack_level(struct irq_data *irq_data) diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 45e92cba92f5..b0889c48a2ac 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -156,7 +156,8 @@ static int x2apic_dead_cpu(unsigned int dead_cpu) { struct cluster_mask *cmsk = per_cpu(cluster_masks, dead_cpu); - cpumask_clear_cpu(dead_cpu, &cmsk->mask); + if (cmsk) + cpumask_clear_cpu(dead_cpu, &cmsk->mask); free_cpumask_var(per_cpu(ipi_mask, dead_cpu)); return 0; } diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index d7a1e5a9331c..890f60083eca 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -30,7 +30,7 @@ obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o ifdef CONFIG_CPU_SUP_INTEL -obj-y += intel.o intel_pconfig.o +obj-y += intel.o intel_pconfig.o tsx.o obj-$(CONFIG_PM) += intel_epb.o endif obj-$(CONFIG_CPU_SUP_AMD) += amd.o diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 91c2561b905f..4c7b0fa15a19 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -39,6 +39,7 @@ static void __init spectre_v2_select_mitigation(void); static void __init ssb_select_mitigation(void); static void __init l1tf_select_mitigation(void); static void __init mds_select_mitigation(void); +static void __init taa_select_mitigation(void); /* The base value of the SPEC_CTRL MSR that always has to be preserved. */ u64 x86_spec_ctrl_base; @@ -105,6 +106,7 @@ void __init check_bugs(void) ssb_select_mitigation(); l1tf_select_mitigation(); mds_select_mitigation(); + taa_select_mitigation(); arch_smt_update(); @@ -269,6 +271,100 @@ static int __init mds_cmdline(char *str) early_param("mds", mds_cmdline); #undef pr_fmt +#define pr_fmt(fmt) "TAA: " fmt + +/* Default mitigation for TAA-affected CPUs */ +static enum taa_mitigations taa_mitigation __ro_after_init = TAA_MITIGATION_VERW; +static bool taa_nosmt __ro_after_init; + +static const char * const taa_strings[] = { + [TAA_MITIGATION_OFF] = "Vulnerable", + [TAA_MITIGATION_UCODE_NEEDED] = "Vulnerable: Clear CPU buffers attempted, no microcode", + [TAA_MITIGATION_VERW] = "Mitigation: Clear CPU buffers", + [TAA_MITIGATION_TSX_DISABLED] = "Mitigation: TSX disabled", +}; + +static void __init taa_select_mitigation(void) +{ + u64 ia32_cap; + + if (!boot_cpu_has_bug(X86_BUG_TAA)) { + taa_mitigation = TAA_MITIGATION_OFF; + return; + } + + /* TSX previously disabled by tsx=off */ + if (!boot_cpu_has(X86_FEATURE_RTM)) { + taa_mitigation = TAA_MITIGATION_TSX_DISABLED; + goto out; + } + + if (cpu_mitigations_off()) { + taa_mitigation = TAA_MITIGATION_OFF; + return; + } + + /* TAA mitigation is turned off on the cmdline (tsx_async_abort=off) */ + if (taa_mitigation == TAA_MITIGATION_OFF) + goto out; + + if (boot_cpu_has(X86_FEATURE_MD_CLEAR)) + taa_mitigation = TAA_MITIGATION_VERW; + else + taa_mitigation = TAA_MITIGATION_UCODE_NEEDED; + + /* + * VERW doesn't clear the CPU buffers when MD_CLEAR=1 and MDS_NO=1. + * A microcode update fixes this behavior to clear CPU buffers. It also + * adds support for MSR_IA32_TSX_CTRL which is enumerated by the + * ARCH_CAP_TSX_CTRL_MSR bit. + * + * On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode + * update is required. + */ + ia32_cap = x86_read_arch_cap_msr(); + if ( (ia32_cap & ARCH_CAP_MDS_NO) && + !(ia32_cap & ARCH_CAP_TSX_CTRL_MSR)) + taa_mitigation = TAA_MITIGATION_UCODE_NEEDED; + + /* + * TSX is enabled, select alternate mitigation for TAA which is + * the same as MDS. Enable MDS static branch to clear CPU buffers. + * + * For guests that can't determine whether the correct microcode is + * present on host, enable the mitigation for UCODE_NEEDED as well. + */ + static_branch_enable(&mds_user_clear); + + if (taa_nosmt || cpu_mitigations_auto_nosmt()) + cpu_smt_disable(false); + +out: + pr_info("%s\n", taa_strings[taa_mitigation]); +} + +static int __init tsx_async_abort_parse_cmdline(char *str) +{ + if (!boot_cpu_has_bug(X86_BUG_TAA)) + return 0; + + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) { + taa_mitigation = TAA_MITIGATION_OFF; + } else if (!strcmp(str, "full")) { + taa_mitigation = TAA_MITIGATION_VERW; + } else if (!strcmp(str, "full,nosmt")) { + taa_mitigation = TAA_MITIGATION_VERW; + taa_nosmt = true; + } + + return 0; +} +early_param("tsx_async_abort", tsx_async_abort_parse_cmdline); + +#undef pr_fmt #define pr_fmt(fmt) "Spectre V1 : " fmt enum spectre_v1_mitigation { @@ -786,13 +882,10 @@ static void update_mds_branch_idle(void) } #define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" +#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n" void cpu_bugs_smt_update(void) { - /* Enhanced IBRS implies STIBP. No update required. */ - if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) - return; - mutex_lock(&spec_ctrl_mutex); switch (spectre_v2_user) { @@ -819,6 +912,17 @@ void cpu_bugs_smt_update(void) break; } + switch (taa_mitigation) { + case TAA_MITIGATION_VERW: + case TAA_MITIGATION_UCODE_NEEDED: + if (sched_smt_active()) + pr_warn_once(TAA_MSG_SMT); + break; + case TAA_MITIGATION_TSX_DISABLED: + case TAA_MITIGATION_OFF: + break; + } + mutex_unlock(&spec_ctrl_mutex); } @@ -1149,6 +1253,9 @@ void x86_spec_ctrl_setup_ap(void) x86_amd_ssb_disable(); } +bool itlb_multihit_kvm_mitigation; +EXPORT_SYMBOL_GPL(itlb_multihit_kvm_mitigation); + #undef pr_fmt #define pr_fmt(fmt) "L1TF: " fmt @@ -1304,11 +1411,24 @@ static ssize_t l1tf_show_state(char *buf) l1tf_vmx_states[l1tf_vmx_mitigation], sched_smt_active() ? "vulnerable" : "disabled"); } + +static ssize_t itlb_multihit_show_state(char *buf) +{ + if (itlb_multihit_kvm_mitigation) + return sprintf(buf, "KVM: Mitigation: Split huge pages\n"); + else + return sprintf(buf, "KVM: Vulnerable\n"); +} #else static ssize_t l1tf_show_state(char *buf) { return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG); } + +static ssize_t itlb_multihit_show_state(char *buf) +{ + return sprintf(buf, "Processor vulnerable\n"); +} #endif static ssize_t mds_show_state(char *buf) @@ -1328,6 +1448,21 @@ static ssize_t mds_show_state(char *buf) sched_smt_active() ? "vulnerable" : "disabled"); } +static ssize_t tsx_async_abort_show_state(char *buf) +{ + if ((taa_mitigation == TAA_MITIGATION_TSX_DISABLED) || + (taa_mitigation == TAA_MITIGATION_OFF)) + return sprintf(buf, "%s\n", taa_strings[taa_mitigation]); + + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { + return sprintf(buf, "%s; SMT Host state unknown\n", + taa_strings[taa_mitigation]); + } + + return sprintf(buf, "%s; SMT %s\n", taa_strings[taa_mitigation], + sched_smt_active() ? "vulnerable" : "disabled"); +} + static char *stibp_state(void) { if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) @@ -1398,6 +1533,12 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_MDS: return mds_show_state(buf); + case X86_BUG_TAA: + return tsx_async_abort_show_state(buf); + + case X86_BUG_ITLB_MULTIHIT: + return itlb_multihit_show_state(buf); + default: break; } @@ -1434,4 +1575,14 @@ ssize_t cpu_show_mds(struct device *dev, struct device_attribute *attr, char *bu { return cpu_show_common(dev, attr, buf, X86_BUG_MDS); } + +ssize_t cpu_show_tsx_async_abort(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_TAA); +} + +ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_ITLB_MULTIHIT); +} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9ae7d1bcd4f4..fffe21945374 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1016,13 +1016,14 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #endif } -#define NO_SPECULATION BIT(0) -#define NO_MELTDOWN BIT(1) -#define NO_SSB BIT(2) -#define NO_L1TF BIT(3) -#define NO_MDS BIT(4) -#define MSBDS_ONLY BIT(5) -#define NO_SWAPGS BIT(6) +#define NO_SPECULATION BIT(0) +#define NO_MELTDOWN BIT(1) +#define NO_SSB BIT(2) +#define NO_L1TF BIT(3) +#define NO_MDS BIT(4) +#define MSBDS_ONLY BIT(5) +#define NO_SWAPGS BIT(6) +#define NO_ITLB_MULTIHIT BIT(7) #define VULNWL(_vendor, _family, _model, _whitelist) \ { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist } @@ -1043,27 +1044,27 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION), /* Intel Family 6 */ - VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION), - VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION), - VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION), - VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION), - VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION), - - VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), - VULNWL_INTEL(ATOM_SILVERMONT_D, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), - VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), - VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), - VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), - VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), + VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT), + + VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_SILVERMONT_D, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), VULNWL_INTEL(CORE_YONAH, NO_SSB), - VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS), - VULNWL_INTEL(ATOM_AIRMONT_NP, NO_L1TF | NO_SWAPGS), + VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_AIRMONT_NP, NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS), - VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS), - VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS), + VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), /* * Technically, swapgs isn't serializing on AMD (despite it previously @@ -1073,15 +1074,17 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { * good enough for our purposes. */ + VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT), + /* AMD Family 0xf - 0x12 */ - VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), - VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), - VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), - VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ - VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS), - VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS), + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), {} }; @@ -1092,19 +1095,30 @@ static bool __init cpu_matches(unsigned long which) return m && !!(m->driver_data & which); } -static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) +u64 x86_read_arch_cap_msr(void) { u64 ia32_cap = 0; + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); + + return ia32_cap; +} + +static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) +{ + u64 ia32_cap = x86_read_arch_cap_msr(); + + /* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */ + if (!cpu_matches(NO_ITLB_MULTIHIT) && !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO)) + setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT); + if (cpu_matches(NO_SPECULATION)) return; setup_force_cpu_bug(X86_BUG_SPECTRE_V1); setup_force_cpu_bug(X86_BUG_SPECTRE_V2); - if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); - if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) && !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); @@ -1121,6 +1135,21 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) if (!cpu_matches(NO_SWAPGS)) setup_force_cpu_bug(X86_BUG_SWAPGS); + /* + * When the CPU is not mitigated for TAA (TAA_NO=0) set TAA bug when: + * - TSX is supported or + * - TSX_CTRL is present + * + * TSX_CTRL check is needed for cases when TSX could be disabled before + * the kernel boot e.g. kexec. + * TSX_CTRL check alone is not sufficient for cases when the microcode + * update is not present or running as guest that don't get TSX_CTRL. + */ + if (!(ia32_cap & ARCH_CAP_TAA_NO) && + (cpu_has(c, X86_FEATURE_RTM) || + (ia32_cap & ARCH_CAP_TSX_CTRL_MSR))) + setup_force_cpu_bug(X86_BUG_TAA); + if (cpu_matches(NO_MELTDOWN)) return; @@ -1554,6 +1583,8 @@ void __init identify_boot_cpu(void) #endif cpu_detect_tlb(&boot_cpu_data); setup_cr_pinning(); + + tsx_init(); } void identify_secondary_cpu(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index c0e2407abdd6..38ab6e115eac 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -44,6 +44,22 @@ struct _tlb_table { extern const struct cpu_dev *const __x86_cpu_dev_start[], *const __x86_cpu_dev_end[]; +#ifdef CONFIG_CPU_SUP_INTEL +enum tsx_ctrl_states { + TSX_CTRL_ENABLE, + TSX_CTRL_DISABLE, + TSX_CTRL_NOT_SUPPORTED, +}; + +extern __ro_after_init enum tsx_ctrl_states tsx_ctrl_state; + +extern void __init tsx_init(void); +extern void tsx_enable(void); +extern void tsx_disable(void); +#else +static inline void tsx_init(void) { } +#endif /* CONFIG_CPU_SUP_INTEL */ + extern void get_cpu_cap(struct cpuinfo_x86 *c); extern void get_cpu_address_sizes(struct cpuinfo_x86 *c); extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); @@ -62,4 +78,6 @@ unsigned int aperfmperf_get_khz(int cpu); extern void x86_spec_ctrl_setup_ap(void); +extern u64 x86_read_arch_cap_msr(void); + #endif /* ARCH_X86_CPU_H */ diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index c2fdc00df163..11d5c5950e2d 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -762,6 +762,11 @@ static void init_intel(struct cpuinfo_x86 *c) detect_tme(c); init_intel_misc_features(c); + + if (tsx_ctrl_state == TSX_CTRL_ENABLE) + tsx_enable(); + if (tsx_ctrl_state == TSX_CTRL_DISABLE) + tsx_disable(); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 6ea7fdc82f3c..5167bd2bb6b1 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -583,7 +583,7 @@ bool amd_filter_mce(struct mce *m) * - Prevent possible spurious interrupts from the IF bank on Family 0x17 * Models 0x10-0x2F due to Erratum #1114. */ -void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank) +static void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank) { int i, num_msrs; u64 hwcr; diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 743370ee4983..5f42f25bac8f 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -488,8 +488,9 @@ int mce_usable_address(struct mce *m) if (!(m->status & MCI_STATUS_ADDRV)) return 0; - /* Checks after this one are Intel-specific: */ - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + /* Checks after this one are Intel/Zhaoxin-specific: */ + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL && + boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN) return 1; if (!(m->status & MCI_STATUS_MISCV)) @@ -507,10 +508,13 @@ EXPORT_SYMBOL_GPL(mce_usable_address); bool mce_is_memory_error(struct mce *m) { - if (m->cpuvendor == X86_VENDOR_AMD || - m->cpuvendor == X86_VENDOR_HYGON) { + switch (m->cpuvendor) { + case X86_VENDOR_AMD: + case X86_VENDOR_HYGON: return amd_mce_is_memory_error(m); - } else if (m->cpuvendor == X86_VENDOR_INTEL) { + + case X86_VENDOR_INTEL: + case X86_VENDOR_ZHAOXIN: /* * Intel SDM Volume 3B - 15.9.2 Compound Error Codes * @@ -527,9 +531,10 @@ bool mce_is_memory_error(struct mce *m) return (m->status & 0xef80) == BIT(7) || (m->status & 0xef00) == BIT(8) || (m->status & 0xeffc) == 0xc; - } - return false; + default: + return false; + } } EXPORT_SYMBOL_GPL(mce_is_memory_error); @@ -1127,6 +1132,12 @@ static bool __mc_check_crashing_cpu(int cpu) u64 mcgstatus; mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); + + if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN) { + if (mcgstatus & MCG_STATUS_LMCES) + return false; + } + if (mcgstatus & MCG_STATUS_RIPV) { mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); return true; @@ -1277,9 +1288,10 @@ void do_machine_check(struct pt_regs *regs, long error_code) /* * Check if this MCE is signaled to only this logical processor, - * on Intel only. + * on Intel, Zhaoxin only. */ - if (m.cpuvendor == X86_VENDOR_INTEL) + if (m.cpuvendor == X86_VENDOR_INTEL || + m.cpuvendor == X86_VENDOR_ZHAOXIN) lmce = m.mcgstatus & MCG_STATUS_LMCES; /* @@ -1697,6 +1709,18 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) if (c->x86 == 6 && c->x86_model == 45) quirk_no_way_out = quirk_sandybridge_ifu; } + + if (c->x86_vendor == X86_VENDOR_ZHAOXIN) { + /* + * All newer Zhaoxin CPUs support MCE broadcasting. Enable + * synchronization with a one second timeout. + */ + if (c->x86 > 6 || (c->x86_model == 0x19 || c->x86_model == 0x1f)) { + if (cfg->monarch_timeout < 0) + cfg->monarch_timeout = USEC_PER_SEC; + } + } + if (cfg->monarch_timeout < 0) cfg->monarch_timeout = 0; if (cfg->bootlog != 0) @@ -1760,6 +1784,35 @@ static void mce_centaur_feature_init(struct cpuinfo_x86 *c) } } +static void mce_zhaoxin_feature_init(struct cpuinfo_x86 *c) +{ + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); + + /* + * These CPUs have MCA bank 8 which reports only one error type called + * SVAD (System View Address Decoder). The reporting of that error is + * controlled by IA32_MC8.CTL.0. + * + * If enabled, prefetching on these CPUs will cause SVAD MCE when + * virtual machines start and result in a system panic. Always disable + * bank 8 SVAD error by default. + */ + if ((c->x86 == 7 && c->x86_model == 0x1b) || + (c->x86_model == 0x19 || c->x86_model == 0x1f)) { + if (this_cpu_read(mce_num_banks) > 8) + mce_banks[8].ctl = 0; + } + + intel_init_cmci(); + intel_init_lmce(); + mce_adjust_timer = cmci_intel_adjust_timer; +} + +static void mce_zhaoxin_feature_clear(struct cpuinfo_x86 *c) +{ + intel_clear_lmce(); +} + static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) { switch (c->x86_vendor) { @@ -1781,6 +1834,10 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) mce_centaur_feature_init(c); break; + case X86_VENDOR_ZHAOXIN: + mce_zhaoxin_feature_init(c); + break; + default: break; } @@ -1792,6 +1849,11 @@ static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c) case X86_VENDOR_INTEL: mce_intel_feature_clear(c); break; + + case X86_VENDOR_ZHAOXIN: + mce_zhaoxin_feature_clear(c); + break; + default: break; } @@ -2014,15 +2076,16 @@ static void mce_disable_error_reporting(void) static void vendor_disable_error_reporting(void) { /* - * Don't clear on Intel or AMD or Hygon CPUs. Some of these MSRs - * are socket-wide. - * Disabling them for just a single offlined CPU is bad, since it will - * inhibit reporting for all shared resources on the socket like the - * last level cache (LLC), the integrated memory controller (iMC), etc. + * Don't clear on Intel or AMD or Hygon or Zhaoxin CPUs. Some of these + * MSRs are socket-wide. Disabling them for just a single offlined CPU + * is bad, since it will inhibit reporting for all shared resources on + * the socket like the last level cache (LLC), the integrated memory + * controller (iMC), etc. */ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL || boot_cpu_data.x86_vendor == X86_VENDOR_HYGON || - boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + boot_cpu_data.x86_vendor == X86_VENDOR_AMD || + boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN) return; mce_disable_error_reporting(); diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index 88cd9598fa57..e270d0770134 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -85,8 +85,10 @@ static int cmci_supported(int *banks) * initialization is vendor keyed and this * makes sure none of the backdoors are entered otherwise. */ - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL && + boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN) return 0; + if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6) return 0; rdmsrl(MSR_IA32_MCG_CAP, cap); @@ -423,7 +425,7 @@ void cmci_disable_bank(int bank) raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); } -static void intel_init_cmci(void) +void intel_init_cmci(void) { int banks; @@ -442,7 +444,7 @@ static void intel_init_cmci(void) cmci_recheck(); } -static void intel_init_lmce(void) +void intel_init_lmce(void) { u64 val; @@ -455,7 +457,7 @@ static void intel_init_lmce(void) wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN); } -static void intel_clear_lmce(void) +void intel_clear_lmce(void) { u64 val; @@ -482,6 +484,7 @@ static void intel_ppin_init(struct cpuinfo_x86 *c) case INTEL_FAM6_BROADWELL_D: case INTEL_FAM6_BROADWELL_X: case INTEL_FAM6_SKYLAKE_X: + case INTEL_FAM6_ICELAKE_X: case INTEL_FAM6_XEON_PHI_KNL: case INTEL_FAM6_XEON_PHI_KNM: diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index 43031db429d2..842b273bce31 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -45,11 +45,17 @@ unsigned long cmci_intel_adjust_timer(unsigned long interval); bool mce_intel_cmci_poll(void); void mce_intel_hcpu_update(unsigned long cpu); void cmci_disable_bank(int bank); +void intel_init_cmci(void); +void intel_init_lmce(void); +void intel_clear_lmce(void); #else # define cmci_intel_adjust_timer mce_adjust_timer_default static inline bool mce_intel_cmci_poll(void) { return false; } static inline void mce_intel_hcpu_update(unsigned long cpu) { } static inline void cmci_disable_bank(int bank) { } +static inline void intel_init_cmci(void) { } +static inline void intel_init_lmce(void) { } +static inline void intel_clear_lmce(void) { } #endif void mce_timer_kick(unsigned long interval); diff --git a/arch/x86/kernel/cpu/mce/therm_throt.c b/arch/x86/kernel/cpu/mce/therm_throt.c index 6e2becf547c5..d01e0da0163a 100644 --- a/arch/x86/kernel/cpu/mce/therm_throt.c +++ b/arch/x86/kernel/cpu/mce/therm_throt.c @@ -40,15 +40,58 @@ #define THERMAL_THROTTLING_EVENT 0 #define POWER_LIMIT_EVENT 1 -/* - * Current thermal event state: +/** + * struct _thermal_state - Represent the current thermal event state + * @next_check: Stores the next timestamp, when it is allowed + * to log the next warning message. + * @last_interrupt_time: Stores the timestamp for the last threshold + * high event. + * @therm_work: Delayed workqueue structure + * @count: Stores the current running count for thermal + * or power threshold interrupts. + * @last_count: Stores the previous running count for thermal + * or power threshold interrupts. + * @max_time_ms: This shows the maximum amount of time CPU was + * in throttled state for a single thermal + * threshold high to low state. + * @total_time_ms: This is a cumulative time during which CPU was + * in the throttled state. + * @rate_control_active: Set when a throttling message is logged. + * This is used for the purpose of rate-control. + * @new_event: Stores the last high/low status of the + * THERM_STATUS_PROCHOT or + * THERM_STATUS_POWER_LIMIT. + * @level: Stores whether this _thermal_state instance is + * for a CORE level or for PACKAGE level. + * @sample_index: Index for storing the next sample in the buffer + * temp_samples[]. + * @sample_count: Total number of samples collected in the buffer + * temp_samples[]. + * @average: The last moving average of temperature samples + * @baseline_temp: Temperature at which thermal threshold high + * interrupt was generated. + * @temp_samples: Storage for temperature samples to calculate + * moving average. + * + * This structure is used to represent data related to thermal state for a CPU. + * There is a separate storage for core and package level for each CPU. */ struct _thermal_state { - bool new_event; - int event; u64 next_check; + u64 last_interrupt_time; + struct delayed_work therm_work; unsigned long count; unsigned long last_count; + unsigned long max_time_ms; + unsigned long total_time_ms; + bool rate_control_active; + bool new_event; + u8 level; + u8 sample_index; + u8 sample_count; + u8 average; + u8 baseline_temp; + u8 temp_samples[3]; }; struct thermal_state { @@ -121,8 +164,22 @@ define_therm_throt_device_one_ro(package_throttle_count); define_therm_throt_device_show_func(package_power_limit, count); define_therm_throt_device_one_ro(package_power_limit_count); +define_therm_throt_device_show_func(core_throttle, max_time_ms); +define_therm_throt_device_one_ro(core_throttle_max_time_ms); + +define_therm_throt_device_show_func(package_throttle, max_time_ms); +define_therm_throt_device_one_ro(package_throttle_max_time_ms); + +define_therm_throt_device_show_func(core_throttle, total_time_ms); +define_therm_throt_device_one_ro(core_throttle_total_time_ms); + +define_therm_throt_device_show_func(package_throttle, total_time_ms); +define_therm_throt_device_one_ro(package_throttle_total_time_ms); + static struct attribute *thermal_throttle_attrs[] = { &dev_attr_core_throttle_count.attr, + &dev_attr_core_throttle_max_time_ms.attr, + &dev_attr_core_throttle_total_time_ms.attr, NULL }; @@ -135,6 +192,105 @@ static const struct attribute_group thermal_attr_group = { #define CORE_LEVEL 0 #define PACKAGE_LEVEL 1 +#define THERM_THROT_POLL_INTERVAL HZ +#define THERM_STATUS_PROCHOT_LOG BIT(1) + +static void clear_therm_status_log(int level) +{ + int msr; + u64 msr_val; + + if (level == CORE_LEVEL) + msr = MSR_IA32_THERM_STATUS; + else + msr = MSR_IA32_PACKAGE_THERM_STATUS; + + rdmsrl(msr, msr_val); + wrmsrl(msr, msr_val & ~THERM_STATUS_PROCHOT_LOG); +} + +static void get_therm_status(int level, bool *proc_hot, u8 *temp) +{ + int msr; + u64 msr_val; + + if (level == CORE_LEVEL) + msr = MSR_IA32_THERM_STATUS; + else + msr = MSR_IA32_PACKAGE_THERM_STATUS; + + rdmsrl(msr, msr_val); + if (msr_val & THERM_STATUS_PROCHOT_LOG) + *proc_hot = true; + else + *proc_hot = false; + + *temp = (msr_val >> 16) & 0x7F; +} + +static void throttle_active_work(struct work_struct *work) +{ + struct _thermal_state *state = container_of(to_delayed_work(work), + struct _thermal_state, therm_work); + unsigned int i, avg, this_cpu = smp_processor_id(); + u64 now = get_jiffies_64(); + bool hot; + u8 temp; + + get_therm_status(state->level, &hot, &temp); + /* temperature value is offset from the max so lesser means hotter */ + if (!hot && temp > state->baseline_temp) { + if (state->rate_control_active) + pr_info("CPU%d: %s temperature/speed normal (total events = %lu)\n", + this_cpu, + state->level == CORE_LEVEL ? "Core" : "Package", + state->count); + + state->rate_control_active = false; + return; + } + + if (time_before64(now, state->next_check) && + state->rate_control_active) + goto re_arm; + + state->next_check = now + CHECK_INTERVAL; + + if (state->count != state->last_count) { + /* There was one new thermal interrupt */ + state->last_count = state->count; + state->average = 0; + state->sample_count = 0; + state->sample_index = 0; + } + + state->temp_samples[state->sample_index] = temp; + state->sample_count++; + state->sample_index = (state->sample_index + 1) % ARRAY_SIZE(state->temp_samples); + if (state->sample_count < ARRAY_SIZE(state->temp_samples)) + goto re_arm; + + avg = 0; + for (i = 0; i < ARRAY_SIZE(state->temp_samples); ++i) + avg += state->temp_samples[i]; + + avg /= ARRAY_SIZE(state->temp_samples); + + if (state->average > avg) { + pr_warn("CPU%d: %s temperature is above threshold, cpu clock is throttled (total events = %lu)\n", + this_cpu, + state->level == CORE_LEVEL ? "Core" : "Package", + state->count); + state->rate_control_active = true; + } + + state->average = avg; + +re_arm: + clear_therm_status_log(state->level); + schedule_delayed_work_on(this_cpu, &state->therm_work, THERM_THROT_POLL_INTERVAL); +} + /*** * therm_throt_process - Process thermal throttling event from interrupt * @curr: Whether the condition is current or not (boolean), since the @@ -178,27 +334,33 @@ static void therm_throt_process(bool new_event, int event, int level) if (new_event) state->count++; - if (time_before64(now, state->next_check) && - state->count != state->last_count) + if (event != THERMAL_THROTTLING_EVENT) return; - state->next_check = now + CHECK_INTERVAL; - state->last_count = state->count; + if (new_event && !state->last_interrupt_time) { + bool hot; + u8 temp; + + get_therm_status(state->level, &hot, &temp); + /* + * Ignore short temperature spike as the system is not close + * to PROCHOT. 10C offset is large enough to ignore. It is + * already dropped from the high threshold temperature. + */ + if (temp > 10) + return; - /* if we just entered the thermal event */ - if (new_event) { - if (event == THERMAL_THROTTLING_EVENT) - pr_crit("CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n", - this_cpu, - level == CORE_LEVEL ? "Core" : "Package", - state->count); - return; - } - if (old_event) { - if (event == THERMAL_THROTTLING_EVENT) - pr_info("CPU%d: %s temperature/speed normal\n", this_cpu, - level == CORE_LEVEL ? "Core" : "Package"); - return; + state->baseline_temp = temp; + state->last_interrupt_time = now; + schedule_delayed_work_on(this_cpu, &state->therm_work, THERM_THROT_POLL_INTERVAL); + } else if (old_event && state->last_interrupt_time) { + unsigned long throttle_time; + + throttle_time = jiffies_delta_to_msecs(now - state->last_interrupt_time); + if (throttle_time > state->max_time_ms) + state->max_time_ms = throttle_time; + state->total_time_ms += throttle_time; + state->last_interrupt_time = 0; } } @@ -244,20 +406,47 @@ static int thermal_throttle_add_dev(struct device *dev, unsigned int cpu) if (err) return err; - if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) + if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) { err = sysfs_add_file_to_group(&dev->kobj, &dev_attr_core_power_limit_count.attr, thermal_attr_group.name); + if (err) + goto del_group; + } + if (cpu_has(c, X86_FEATURE_PTS)) { err = sysfs_add_file_to_group(&dev->kobj, &dev_attr_package_throttle_count.attr, thermal_attr_group.name); - if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) + if (err) + goto del_group; + + err = sysfs_add_file_to_group(&dev->kobj, + &dev_attr_package_throttle_max_time_ms.attr, + thermal_attr_group.name); + if (err) + goto del_group; + + err = sysfs_add_file_to_group(&dev->kobj, + &dev_attr_package_throttle_total_time_ms.attr, + thermal_attr_group.name); + if (err) + goto del_group; + + if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) { err = sysfs_add_file_to_group(&dev->kobj, &dev_attr_package_power_limit_count.attr, thermal_attr_group.name); + if (err) + goto del_group; + } } + return 0; + +del_group: + sysfs_remove_group(&dev->kobj, &thermal_attr_group); + return err; } @@ -269,15 +458,29 @@ static void thermal_throttle_remove_dev(struct device *dev) /* Get notified when a cpu comes on/off. Be hotplug friendly. */ static int thermal_throttle_online(unsigned int cpu) { + struct thermal_state *state = &per_cpu(thermal_state, cpu); struct device *dev = get_cpu_device(cpu); + state->package_throttle.level = PACKAGE_LEVEL; + state->core_throttle.level = CORE_LEVEL; + + INIT_DELAYED_WORK(&state->package_throttle.therm_work, throttle_active_work); + INIT_DELAYED_WORK(&state->core_throttle.therm_work, throttle_active_work); + return thermal_throttle_add_dev(dev, cpu); } static int thermal_throttle_offline(unsigned int cpu) { + struct thermal_state *state = &per_cpu(thermal_state, cpu); struct device *dev = get_cpu_device(cpu); + cancel_delayed_work(&state->package_throttle.therm_work); + cancel_delayed_work(&state->core_throttle.therm_work); + + state->package_throttle.rate_control_active = false; + state->core_throttle.rate_control_active = false; + thermal_throttle_remove_dev(dev); return 0; } diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index a0e52bd00ecc..3f6b137ef4e6 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -567,7 +567,7 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax) void reload_ucode_amd(void) { struct microcode_amd *mc; - u32 rev, dummy; + u32 rev, dummy __always_unused; mc = (struct microcode_amd *)amd_ucode_patch; @@ -673,7 +673,7 @@ static enum ucode_state apply_microcode_amd(int cpu) struct ucode_cpu_info *uci; struct ucode_patch *p; enum ucode_state ret; - u32 rev, dummy; + u32 rev, dummy __always_unused; BUG_ON(raw_smp_processor_id() != cpu); diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index cb0fdcaf1415..7019d4b2df0c 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -63,11 +63,6 @@ LIST_HEAD(microcode_cache); */ static DEFINE_MUTEX(microcode_mutex); -/* - * Serialize late loading so that CPUs get updated one-by-one. - */ -static DEFINE_RAW_SPINLOCK(update_lock); - struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; struct cpu_info_ctx { @@ -566,11 +561,18 @@ static int __reload_late(void *info) if (__wait_for_cpus(&late_cpus_in, NSEC_PER_SEC)) return -1; - raw_spin_lock(&update_lock); - apply_microcode_local(&err); - raw_spin_unlock(&update_lock); + /* + * On an SMT system, it suffices to load the microcode on one sibling of + * the core because the microcode engine is shared between the threads. + * Synchronization still needs to take place so that no concurrent + * loading attempts happen on multiple threads of an SMT core. See + * below. + */ + if (cpumask_first(topology_sibling_cpumask(cpu)) == cpu) + apply_microcode_local(&err); + else + goto wait_for_siblings; - /* siblings return UCODE_OK because their engine got updated already */ if (err > UCODE_NFOUND) { pr_warn("Error reloading microcode on CPU %d\n", cpu); ret = -1; @@ -578,14 +580,18 @@ static int __reload_late(void *info) ret = 1; } +wait_for_siblings: + if (__wait_for_cpus(&late_cpus_out, NSEC_PER_SEC)) + panic("Timeout during microcode update!\n"); + /* - * Increase the wait timeout to a safe value here since we're - * serializing the microcode update and that could take a while on a - * large number of CPUs. And that is fine as the *actual* timeout will - * be determined by the last CPU finished updating and thus cut short. + * At least one thread has completed update on each core. + * For others, simply call the update to make sure the + * per-cpu cpuinfo can be updated with right microcode + * revision. */ - if (__wait_for_cpus(&late_cpus_out, NSEC_PER_SEC * num_online_cpus())) - panic("Timeout during microcode update!\n"); + if (cpumask_first(topology_sibling_cpumask(cpu)) != cpu) + apply_microcode_local(&err); return ret; } diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index ce799cfe9434..6a99535d7f37 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -791,6 +791,7 @@ static enum ucode_state apply_microcode_intel(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; struct cpuinfo_x86 *c = &cpu_data(cpu); + bool bsp = c->cpu_index == boot_cpu_data.cpu_index; struct microcode_intel *mc; enum ucode_state ret; static int prev_rev; @@ -836,7 +837,7 @@ static enum ucode_state apply_microcode_intel(int cpu) return UCODE_ERROR; } - if (rev != prev_rev) { + if (bsp && rev != prev_rev) { pr_info("updated to revision 0x%x, date = %04x-%02x-%02x\n", rev, mc->hdr.date & 0xffff, @@ -852,7 +853,7 @@ out: c->microcode = rev; /* Update boot_cpu_data's revision too, if we're on the BSP: */ - if (c->cpu_index == boot_cpu_data.cpu_index) + if (bsp) boot_cpu_data.microcode = rev; return ret; diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 267daad8c036..c656d92cd708 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -216,6 +216,10 @@ static void __init ms_hyperv_init_platform(void) int hv_host_info_ecx; int hv_host_info_edx; +#ifdef CONFIG_PARAVIRT + pv_info.name = "Hyper-V"; +#endif + /* * Extract the features and hints */ diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index efbd54cc4e69..055c8613b531 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -522,6 +522,10 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) int ret = 0; rdtgrp = rdtgroup_kn_lock_live(of->kn); + if (!rdtgrp) { + ret = -ENOENT; + goto out; + } md.priv = of->kn->priv; resid = md.u.rid; diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index a46dee8e78db..2e3b06d6bbc6 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -461,10 +461,8 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of, } rdtgrp = rdtgroup_kn_lock_live(of->kn); - rdt_last_cmd_clear(); if (!rdtgrp) { ret = -ENOENT; - rdt_last_cmd_puts("Directory was removed\n"); goto unlock; } @@ -2648,10 +2646,8 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, int ret; prdtgrp = rdtgroup_kn_lock_live(prgrp_kn); - rdt_last_cmd_clear(); if (!prdtgrp) { ret = -ENODEV; - rdt_last_cmd_puts("Directory was removed\n"); goto out_unlock; } diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c new file mode 100644 index 000000000000..3e20d322bc98 --- /dev/null +++ b/arch/x86/kernel/cpu/tsx.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel Transactional Synchronization Extensions (TSX) control. + * + * Copyright (C) 2019 Intel Corporation + * + * Author: + * Pawan Gupta <pawan.kumar.gupta@linux.intel.com> + */ + +#include <linux/cpufeature.h> + +#include <asm/cmdline.h> + +#include "cpu.h" + +enum tsx_ctrl_states tsx_ctrl_state __ro_after_init = TSX_CTRL_NOT_SUPPORTED; + +void tsx_disable(void) +{ + u64 tsx; + + rdmsrl(MSR_IA32_TSX_CTRL, tsx); + + /* Force all transactions to immediately abort */ + tsx |= TSX_CTRL_RTM_DISABLE; + + /* + * Ensure TSX support is not enumerated in CPUID. + * This is visible to userspace and will ensure they + * do not waste resources trying TSX transactions that + * will always abort. + */ + tsx |= TSX_CTRL_CPUID_CLEAR; + + wrmsrl(MSR_IA32_TSX_CTRL, tsx); +} + +void tsx_enable(void) +{ + u64 tsx; + + rdmsrl(MSR_IA32_TSX_CTRL, tsx); + + /* Enable the RTM feature in the cpu */ + tsx &= ~TSX_CTRL_RTM_DISABLE; + + /* + * Ensure TSX support is enumerated in CPUID. + * This is visible to userspace and will ensure they + * can enumerate and use the TSX feature. + */ + tsx &= ~TSX_CTRL_CPUID_CLEAR; + + wrmsrl(MSR_IA32_TSX_CTRL, tsx); +} + +static bool __init tsx_ctrl_is_supported(void) +{ + u64 ia32_cap = x86_read_arch_cap_msr(); + + /* + * TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this + * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES. + * + * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a + * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES + * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get + * MSR_IA32_TSX_CTRL support even after a microcode update. Thus, + * tsx= cmdline requests will do nothing on CPUs without + * MSR_IA32_TSX_CTRL support. + */ + return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR); +} + +static enum tsx_ctrl_states x86_get_tsx_auto_mode(void) +{ + if (boot_cpu_has_bug(X86_BUG_TAA)) + return TSX_CTRL_DISABLE; + + return TSX_CTRL_ENABLE; +} + +void __init tsx_init(void) +{ + char arg[5] = {}; + int ret; + + if (!tsx_ctrl_is_supported()) + return; + + ret = cmdline_find_option(boot_command_line, "tsx", arg, sizeof(arg)); + if (ret >= 0) { + if (!strcmp(arg, "on")) { + tsx_ctrl_state = TSX_CTRL_ENABLE; + } else if (!strcmp(arg, "off")) { + tsx_ctrl_state = TSX_CTRL_DISABLE; + } else if (!strcmp(arg, "auto")) { + tsx_ctrl_state = x86_get_tsx_auto_mode(); + } else { + tsx_ctrl_state = TSX_CTRL_DISABLE; + pr_err("tsx: invalid option, defaulting to off\n"); + } + } else { + /* tsx= not provided */ + if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_AUTO)) + tsx_ctrl_state = x86_get_tsx_auto_mode(); + else if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_OFF)) + tsx_ctrl_state = TSX_CTRL_DISABLE; + else + tsx_ctrl_state = TSX_CTRL_ENABLE; + } + + if (tsx_ctrl_state == TSX_CTRL_DISABLE) { + tsx_disable(); + + /* + * tsx_disable() will change the state of the + * RTM CPUID bit. Clear it here since it is now + * expected to be not set. + */ + setup_clear_cpu_cap(X86_FEATURE_RTM); + } else if (tsx_ctrl_state == TSX_CTRL_ENABLE) { + + /* + * HW defaults TSX to be enabled at bootup. + * We may still need the TSX enable support + * during init for special cases like + * kexec after TSX is disabled. + */ + tsx_enable(); + + /* + * tsx_enable() will change the state of the + * RTM CPUID bit. Force it here since it is now + * expected to be set. + */ + setup_force_cpu_cap(X86_FEATURE_RTM); + } +} diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 9735139cfdf8..46d732696c1c 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -49,7 +49,7 @@ #define VMWARE_CMD_VCPU_RESERVED 31 #define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \ - __asm__("inl (%%dx)" : \ + __asm__("inl (%%dx), %%eax" : \ "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \ "a"(VMWARE_HYPERVISOR_MAGIC), \ "c"(VMWARE_CMD_##cmd), \ diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 753b8cfe8b8a..87b97897a881 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -94,6 +94,13 @@ static bool in_exception_stack(unsigned long *stack, struct stack_info *info) BUILD_BUG_ON(N_EXCEPTION_STACKS != 6); begin = (unsigned long)__this_cpu_read(cea_exception_stacks); + /* + * Handle the case where stack trace is collected _before_ + * cea_exception_stacks had been initialized. + */ + if (!begin) + return false; + end = begin + sizeof(struct cea_exception_stacks); /* Bail if @stack is outside the exception stack area. */ if (stk < begin || stk >= end) diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 6f6b1d04dadf..4cba91ec8049 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -710,6 +710,8 @@ static struct chipset early_qrk[] __initdata = { */ { PCI_VENDOR_ID_INTEL, 0x0f00, PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, + { PCI_VENDOR_ID_INTEL, 0x3ec4, + PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, { PCI_VENDOR_ID_BROADCOM, 0x4331, PCI_CLASS_NETWORK_OTHER, PCI_ANY_ID, 0, apple_airport_reset}, {} diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 29ffa495bd1c..206a4b6144c2 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -222,13 +222,31 @@ unsigned long __head __startup_64(unsigned long physaddr, * we might write invalid pmds, when the kernel is relocated * cleanup_highmap() fixes this up along with the mappings * beyond _end. + * + * Only the region occupied by the kernel image has so far + * been checked against the table of usable memory regions + * provided by the firmware, so invalidate pages outside that + * region. A page table entry that maps to a reserved area of + * memory would allow processor speculation into that area, + * and on some hardware (particularly the UV platform) even + * speculative access to some reserved areas is caught as an + * error, causing the BIOS to halt the system. */ pmd = fixup_pointer(level2_kernel_pgt, physaddr); - for (i = 0; i < PTRS_PER_PMD; i++) { + + /* invalidate pages before the kernel image */ + for (i = 0; i < pmd_index((unsigned long)_text); i++) + pmd[i] &= ~_PAGE_PRESENT; + + /* fixup pages that are part of the kernel image */ + for (; i <= pmd_index((unsigned long)_end); i++) if (pmd[i] & _PAGE_PRESENT) pmd[i] += load_delta; - } + + /* invalidate pages after the kernel image */ + for (; i < PTRS_PER_PMD; i++) + pmd[i] &= ~_PAGE_PRESENT; /* * Fixup phys_base - remove the memory encryption mask to obtain diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index 044053235302..c1a8b9e71408 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -89,8 +89,7 @@ static void __ref __jump_label_transform(struct jump_entry *entry, return; } - text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE, - (void *)jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE); + text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE, NULL); } void arch_jump_label_transform(struct jump_entry *entry, @@ -147,11 +146,9 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry, } __jump_label_set_jump_code(entry, type, - (union jump_code_union *) &tp->opcode, 0); + (union jump_code_union *)&tp->text, 0); - tp->addr = entry_code; - tp->detour = entry_code + JUMP_LABEL_NOP_SIZE; - tp->len = JUMP_LABEL_NOP_SIZE; + text_poke_loc_init(tp, entry_code, NULL, JUMP_LABEL_NOP_SIZE, NULL); tp_vec_nr++; diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index b348dd506d58..8900329c28a7 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -437,8 +437,7 @@ void arch_optimize_kprobes(struct list_head *oplist) insn_buff[0] = RELATIVEJUMP_OPCODE; *(s32 *)(&insn_buff[1]) = rel; - text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, - op->optinsn.insn); + text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, NULL); list_del_init(&op->list); } @@ -448,12 +447,18 @@ void arch_optimize_kprobes(struct list_head *oplist) void arch_unoptimize_kprobe(struct optimized_kprobe *op) { u8 insn_buff[RELATIVEJUMP_SIZE]; + u8 emulate_buff[RELATIVEJUMP_SIZE]; /* Set int3 to first byte for kprobes */ insn_buff[0] = BREAKPOINT_INSTRUCTION; memcpy(insn_buff + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); + + emulate_buff[0] = RELATIVEJUMP_OPCODE; + *(s32 *)(&emulate_buff[1]) = (s32)((long)op->optinsn.insn - + ((long)op->kp.addr + RELATIVEJUMP_SIZE)); + text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, - op->optinsn.insn); + emulate_buff); } /* diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index e820568ed4d5..32ef1ee733b7 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -33,6 +33,7 @@ #include <asm/apicdef.h> #include <asm/hypervisor.h> #include <asm/tlb.h> +#include <asm/cpuidle_haltpoll.h> static int kvmapf = 1; diff --git a/arch/x86/kernel/process.h b/arch/x86/kernel/process.h index 320ab978fb1f..1d0797b2338a 100644 --- a/arch/x86/kernel/process.h +++ b/arch/x86/kernel/process.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ // // Code shared between 32 and 64 bit diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 86663874ef04..e6d7894ad127 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -207,8 +207,8 @@ void __init setup_per_cpu_areas(void) pcpu_cpu_distance, pcpu_fc_alloc, pcpu_fc_free); if (rc < 0) - pr_warning("%s allocator failed (%d), falling back to page size\n", - pcpu_fc_names[pcpu_chosen_fc], rc); + pr_warn("%s allocator failed (%d), falling back to page size\n", + pcpu_fc_names[pcpu_chosen_fc], rc); } if (rc < 0) rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index a49fe1dcb47e..4c61f0713832 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -57,7 +57,7 @@ void __init tboot_probe(void) */ if (!e820__mapped_any(boot_params.tboot_addr, boot_params.tboot_addr, E820_TYPE_RESERVED)) { - pr_warning("non-0 tboot_addr but it is not of type E820_TYPE_RESERVED\n"); + pr_warn("non-0 tboot_addr but it is not of type E820_TYPE_RESERVED\n"); return; } @@ -65,13 +65,12 @@ void __init tboot_probe(void) set_fixmap(FIX_TBOOT_BASE, boot_params.tboot_addr); tboot = (struct tboot *)fix_to_virt(FIX_TBOOT_BASE); if (memcmp(&tboot_uuid, &tboot->uuid, sizeof(tboot->uuid))) { - pr_warning("tboot at 0x%llx is invalid\n", - boot_params.tboot_addr); + pr_warn("tboot at 0x%llx is invalid\n", boot_params.tboot_addr); tboot = NULL; return; } if (tboot->version < 5) { - pr_warning("tboot version is invalid: %u\n", tboot->version); + pr_warn("tboot version is invalid: %u\n", tboot->version); tboot = NULL; return; } @@ -289,7 +288,7 @@ static int tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control) if (sleep_state >= ACPI_S_STATE_COUNT || acpi_shutdown_map[sleep_state] == -1) { - pr_warning("unsupported sleep state 0x%x\n", sleep_state); + pr_warn("unsupported sleep state 0x%x\n", sleep_state); return -1; } @@ -302,7 +301,7 @@ static int tboot_extended_sleep(u8 sleep_state, u32 val_a, u32 val_b) if (!tboot_enabled()) return 0; - pr_warning("tboot is not able to suspend on platforms with reduced hardware sleep (ACPIv5)"); + pr_warn("tboot is not able to suspend on platforms with reduced hardware sleep (ACPIv5)"); return -ENODEV; } @@ -320,7 +319,7 @@ static int tboot_wait_for_aps(int num_aps) } if (timeout) - pr_warning("tboot wait for APs timeout\n"); + pr_warn("tboot wait for APs timeout\n"); return !(atomic_read((atomic_t *)&tboot->num_in_wfs) == num_aps); } @@ -516,7 +515,7 @@ int tboot_force_iommu(void) return 1; if (no_iommu || swiotlb || dmar_disabled) - pr_warning("Forcing Intel-IOMMU to enabled\n"); + pr_warn("Forcing Intel-IOMMU to enabled\n"); dmar_disabled = 0; #ifdef CONFIG_SWIOTLB diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index c59454c382fd..7e322e2daaf5 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1505,6 +1505,9 @@ void __init tsc_init(void) return; } + if (tsc_clocksource_reliable || no_tsc_watchdog) + clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY; + clocksource_register_khz(&clocksource_tsc_early, tsc_khz); detect_art(); } diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index ec534f978867..b8acf639abd1 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -364,12 +364,12 @@ retry: /* Force it to 0 if random warps brought us here */ atomic_set(&test_runs, 0); - pr_warning("TSC synchronization [CPU#%d -> CPU#%d]:\n", + pr_warn("TSC synchronization [CPU#%d -> CPU#%d]:\n", smp_processor_id(), cpu); - pr_warning("Measured %Ld cycles TSC warp between CPUs, " - "turning off TSC clock.\n", max_warp); + pr_warn("Measured %Ld cycles TSC warp between CPUs, " + "turning off TSC clock.\n", max_warp); if (random_warps) - pr_warning("TSC warped randomly between CPUs\n"); + pr_warn("TSC warped randomly between CPUs\n"); mark_tsc_unstable("check_tsc_sync_source failed"); } diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c index 548fefed71ee..b4a304893189 100644 --- a/arch/x86/kernel/umip.c +++ b/arch/x86/kernel/umip.c @@ -91,7 +91,7 @@ const char * const umip_insns[5] = { #define umip_pr_err(regs, fmt, ...) \ umip_printk(regs, KERN_ERR, fmt, ##__VA_ARGS__) -#define umip_pr_warning(regs, fmt, ...) \ +#define umip_pr_warn(regs, fmt, ...) \ umip_printk(regs, KERN_WARNING, fmt, ##__VA_ARGS__) /** @@ -380,14 +380,14 @@ bool fixup_umip_exception(struct pt_regs *regs) if (umip_inst < 0) return false; - umip_pr_warning(regs, "%s instruction cannot be used by applications.\n", + umip_pr_warn(regs, "%s instruction cannot be used by applications.\n", umip_insns[umip_inst]); /* Do not emulate (spoof) SLDT or STR. */ if (umip_inst == UMIP_INST_STR || umip_inst == UMIP_INST_SLDT) return false; - umip_pr_warning(regs, "For now, expensive software emulation returns the result.\n"); + umip_pr_warn(regs, "For now, expensive software emulation returns the result.\n"); if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size, user_64bit_mode(regs))) diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 31ecf7a76d5a..b19ef421084d 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -8,9 +8,9 @@ kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o -kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ +kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \ i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \ - hyperv.o page_track.o debugfs.o + hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o vmx/evmcs.o vmx/nested.o kvm-amd-y += svm.o pmu_amd.o diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 63316036f85a..c0aa07487eb8 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -363,7 +363,7 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index) /* cpuid 7.0.ecx*/ const u32 kvm_cpuid_7_0_ecx_x86_features = - F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | + F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) | F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/; @@ -485,6 +485,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 0x80000008.ebx */ const u32 kvm_cpuid_8000_0008_ebx_x86_features = + F(CLZERO) | F(XSAVEERPTR) | F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) | F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON); @@ -618,16 +619,20 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function, */ case 0x1f: case 0xb: { - int i, level_type; + int i; - /* read more entries until level_type is zero */ - for (i = 1; ; ++i) { + /* + * We filled in entry[0] for CPUID(EAX=<function>, + * ECX=00H) above. If its level type (ECX[15:8]) is + * zero, then the leaf is unimplemented, and we're + * done. Otherwise, continue to populate entries + * until the level type (ECX[15:8]) of the previously + * added entry is zero. + */ + for (i = 1; entry[i - 1].ecx & 0xff00; ++i) { if (*nent >= maxnent) goto out; - level_type = entry[i - 1].ecx & 0xff00; - if (!level_type) - break; do_host_cpuid(&entry[i], function, i); ++*nent; } @@ -811,8 +816,6 @@ static int do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 func, return __do_cpuid_func(entry, func, nent, maxnent); } -#undef F - struct kvm_cpuid_param { u32 func; bool (*qualifier)(const struct kvm_cpuid_param *param); @@ -969,53 +972,72 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry); /* - * If no match is found, check whether we exceed the vCPU's limit - * and return the content of the highest valid _standard_ leaf instead. - * This is to satisfy the CPUID specification. + * If the basic or extended CPUID leaf requested is higher than the + * maximum supported basic or extended leaf, respectively, then it is + * out of range. */ -static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu, - u32 function, u32 index) +static bool cpuid_function_in_range(struct kvm_vcpu *vcpu, u32 function) { - struct kvm_cpuid_entry2 *maxlevel; - - maxlevel = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0); - if (!maxlevel || maxlevel->eax >= function) - return NULL; - if (function & 0x80000000) { - maxlevel = kvm_find_cpuid_entry(vcpu, 0, 0); - if (!maxlevel) - return NULL; - } - return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index); + struct kvm_cpuid_entry2 *max; + + max = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0); + return max && function <= max->eax; } bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, bool check_limit) { u32 function = *eax, index = *ecx; - struct kvm_cpuid_entry2 *best; - bool entry_found = true; - - best = kvm_find_cpuid_entry(vcpu, function, index); - - if (!best) { - entry_found = false; - if (!check_limit) - goto out; + struct kvm_cpuid_entry2 *entry; + struct kvm_cpuid_entry2 *max; + bool found; - best = check_cpuid_limit(vcpu, function, index); + entry = kvm_find_cpuid_entry(vcpu, function, index); + found = entry; + /* + * Intel CPUID semantics treats any query for an out-of-range + * leaf as if the highest basic leaf (i.e. CPUID.0H:EAX) were + * requested. AMD CPUID semantics returns all zeroes for any + * undefined leaf, whether or not the leaf is in range. + */ + if (!entry && check_limit && !guest_cpuid_is_amd(vcpu) && + !cpuid_function_in_range(vcpu, function)) { + max = kvm_find_cpuid_entry(vcpu, 0, 0); + if (max) { + function = max->eax; + entry = kvm_find_cpuid_entry(vcpu, function, index); + } } - -out: - if (best) { - *eax = best->eax; - *ebx = best->ebx; - *ecx = best->ecx; - *edx = best->edx; - } else + if (entry) { + *eax = entry->eax; + *ebx = entry->ebx; + *ecx = entry->ecx; + *edx = entry->edx; + if (function == 7 && index == 0) { + u64 data; + if (!__kvm_get_msr(vcpu, MSR_IA32_TSX_CTRL, &data, true) && + (data & TSX_CTRL_CPUID_CLEAR)) + *ebx &= ~(F(RTM) | F(HLE)); + } + } else { *eax = *ebx = *ecx = *edx = 0; - trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx, entry_found); - return entry_found; + /* + * When leaf 0BH or 1FH is defined, CL is pass-through + * and EDX is always the x2APIC ID, even for undefined + * subleaves. Index 1 will exist iff the leaf is + * implemented, so we pass through CL iff leaf 1 + * exists. EDX can be copied from any existing index. + */ + if (function == 0xb || function == 0x1f) { + entry = kvm_find_cpuid_entry(vcpu, function, 1); + if (entry) { + *ecx = index & 0xff; + *edx = entry->edx; + } + } + } + trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx, found); + return found; } EXPORT_SYMBOL_GPL(kvm_cpuid); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 698efb8c3897..952d1a4f4d7e 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2770,11 +2770,10 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt) return emulate_ud(ctxt); ops->get_msr(ctxt, MSR_EFER, &efer); - setup_syscalls_segments(ctxt, &cs, &ss); - if (!(efer & EFER_SCE)) return emulate_ud(ctxt); + setup_syscalls_segments(ctxt, &cs, &ss); ops->get_msr(ctxt, MSR_STAR, &msr_data); msr_data >>= 32; cs_sel = (u16)(msr_data & 0xfffc); @@ -2838,12 +2837,11 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt) if (ctxt->mode == X86EMUL_MODE_PROT64) return X86EMUL_UNHANDLEABLE; - setup_syscalls_segments(ctxt, &cs, &ss); - ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); if ((msr_data & 0xfffc) == 0x0) return emulate_gp(ctxt, 0); + setup_syscalls_segments(ctxt, &cs, &ss); ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF); cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK; ss_sel = cs_sel + 8; diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index d859ae8890d0..9fd2dd89a1c5 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -271,8 +271,9 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) { unsigned index; bool mask_before, mask_after; - int old_remote_irr, old_delivery_status; union kvm_ioapic_redirect_entry *e; + unsigned long vcpu_bitmap; + int old_remote_irr, old_delivery_status, old_dest_id, old_dest_mode; switch (ioapic->ioregsel) { case IOAPIC_REG_VERSION: @@ -296,6 +297,8 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) /* Preserve read-only fields */ old_remote_irr = e->fields.remote_irr; old_delivery_status = e->fields.delivery_status; + old_dest_id = e->fields.dest_id; + old_dest_mode = e->fields.dest_mode; if (ioapic->ioregsel & 1) { e->bits &= 0xffffffff; e->bits |= (u64) val << 32; @@ -321,7 +324,34 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG && ioapic->irr & (1 << index)) ioapic_service(ioapic, index, false); - kvm_make_scan_ioapic_request(ioapic->kvm); + if (e->fields.delivery_mode == APIC_DM_FIXED) { + struct kvm_lapic_irq irq; + + irq.shorthand = 0; + irq.vector = e->fields.vector; + irq.delivery_mode = e->fields.delivery_mode << 8; + irq.dest_id = e->fields.dest_id; + irq.dest_mode = e->fields.dest_mode; + bitmap_zero(&vcpu_bitmap, 16); + kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq, + &vcpu_bitmap); + if (old_dest_mode != e->fields.dest_mode || + old_dest_id != e->fields.dest_id) { + /* + * Update vcpu_bitmap with vcpus specified in + * the previous request as well. This is done to + * keep ioapic_handled_vectors synchronized. + */ + irq.dest_id = old_dest_id; + irq.dest_mode = old_dest_mode; + kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq, + &vcpu_bitmap); + } + kvm_make_scan_ioapic_request_mask(ioapic->kvm, + &vcpu_bitmap); + } else { + kvm_make_scan_ioapic_request(ioapic->kvm); + } break; } } diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index 1cc6c47dc77e..58767020de41 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -37,22 +37,50 @@ BUILD_KVM_GPR_ACCESSORS(r14, R14) BUILD_KVM_GPR_ACCESSORS(r15, R15) #endif -static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, - enum kvm_reg reg) +static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu, + enum kvm_reg reg) { - if (!test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail)) + return test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); +} + +static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu, + enum kvm_reg reg) +{ + return test_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty); +} + +static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu, + enum kvm_reg reg) +{ + __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); +} + +static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu, + enum kvm_reg reg) +{ + __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); + __set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty); +} + +static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, int reg) +{ + if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS)) + return 0; + + if (!kvm_register_is_available(vcpu, reg)) kvm_x86_ops->cache_reg(vcpu, reg); return vcpu->arch.regs[reg]; } -static inline void kvm_register_write(struct kvm_vcpu *vcpu, - enum kvm_reg reg, +static inline void kvm_register_write(struct kvm_vcpu *vcpu, int reg, unsigned long val) { + if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS)) + return; + vcpu->arch.regs[reg] = val; - __set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty); - __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); + kvm_register_mark_dirty(vcpu, reg); } static inline unsigned long kvm_rip_read(struct kvm_vcpu *vcpu) @@ -79,9 +107,8 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index) { might_sleep(); /* on svm */ - if (!test_bit(VCPU_EXREG_PDPTR, - (unsigned long *)&vcpu->arch.regs_avail)) - kvm_x86_ops->cache_reg(vcpu, (enum kvm_reg)VCPU_EXREG_PDPTR); + if (!kvm_register_is_available(vcpu, VCPU_EXREG_PDPTR)) + kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_PDPTR); return vcpu->arch.walk_mmu->pdptrs[index]; } @@ -109,8 +136,8 @@ static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask) static inline ulong kvm_read_cr3(struct kvm_vcpu *vcpu) { - if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail)) - kvm_x86_ops->decache_cr3(vcpu); + if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3)) + kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_CR3); return vcpu->arch.cr3; } diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 3a3a6854dcca..cf9177b4a07f 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -66,9 +66,10 @@ #define X2APIC_BROADCAST 0xFFFFFFFFul static bool lapic_timer_advance_dynamic __read_mostly; -#define LAPIC_TIMER_ADVANCE_ADJUST_MIN 100 -#define LAPIC_TIMER_ADVANCE_ADJUST_MAX 5000 -#define LAPIC_TIMER_ADVANCE_ADJUST_INIT 1000 +#define LAPIC_TIMER_ADVANCE_ADJUST_MIN 100 /* clock cycles */ +#define LAPIC_TIMER_ADVANCE_ADJUST_MAX 10000 /* clock cycles */ +#define LAPIC_TIMER_ADVANCE_NS_INIT 1000 +#define LAPIC_TIMER_ADVANCE_NS_MAX 5000 /* step-by-step approximation to mitigate fluctuation */ #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8 @@ -110,11 +111,6 @@ static inline int apic_enabled(struct kvm_lapic *apic) (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) -static inline u8 kvm_xapic_id(struct kvm_lapic *apic) -{ - return kvm_lapic_get_reg(apic, APIC_ID) >> 24; -} - static inline u32 kvm_x2apic_id(struct kvm_lapic *apic) { return apic->vcpu->vcpu_id; @@ -561,60 +557,53 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, irq->level, irq->trig_mode, dest_map); } +static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map, + struct kvm_lapic_irq *irq, u32 min) +{ + int i, count = 0; + struct kvm_vcpu *vcpu; + + if (min > map->max_apic_id) + return 0; + + for_each_set_bit(i, ipi_bitmap, + min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) { + if (map->phys_map[min + i]) { + vcpu = map->phys_map[min + i]->vcpu; + count += kvm_apic_set_irq(vcpu, irq, NULL); + } + } + + return count; +} + int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, unsigned long ipi_bitmap_high, u32 min, unsigned long icr, int op_64_bit) { - int i; struct kvm_apic_map *map; - struct kvm_vcpu *vcpu; struct kvm_lapic_irq irq = {0}; int cluster_size = op_64_bit ? 64 : 32; - int count = 0; + int count; + + if (icr & (APIC_DEST_MASK | APIC_SHORT_MASK)) + return -KVM_EINVAL; irq.vector = icr & APIC_VECTOR_MASK; irq.delivery_mode = icr & APIC_MODE_MASK; irq.level = (icr & APIC_INT_ASSERT) != 0; irq.trig_mode = icr & APIC_INT_LEVELTRIG; - if (icr & APIC_DEST_MASK) - return -KVM_EINVAL; - if (icr & APIC_SHORT_MASK) - return -KVM_EINVAL; - rcu_read_lock(); map = rcu_dereference(kvm->arch.apic_map); - if (unlikely(!map)) { - count = -EOPNOTSUPP; - goto out; - } - - if (min > map->max_apic_id) - goto out; - /* Bits above cluster_size are masked in the caller. */ - for_each_set_bit(i, &ipi_bitmap_low, - min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) { - if (map->phys_map[min + i]) { - vcpu = map->phys_map[min + i]->vcpu; - count += kvm_apic_set_irq(vcpu, &irq, NULL); - } - } - - min += cluster_size; - - if (min > map->max_apic_id) - goto out; - - for_each_set_bit(i, &ipi_bitmap_high, - min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) { - if (map->phys_map[min + i]) { - vcpu = map->phys_map[min + i]->vcpu; - count += kvm_apic_set_irq(vcpu, &irq, NULL); - } + count = -EOPNOTSUPP; + if (likely(map)) { + count = __pv_send_ipi(&ipi_bitmap_low, map, &irq, min); + min += cluster_size; + count += __pv_send_ipi(&ipi_bitmap_high, map, &irq, min); } -out: rcu_read_unlock(); return count; } @@ -1128,6 +1117,50 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, return result; } +/* + * This routine identifies the destination vcpus mask meant to receive the + * IOAPIC interrupts. It either uses kvm_apic_map_get_dest_lapic() to find + * out the destination vcpus array and set the bitmap or it traverses to + * each available vcpu to identify the same. + */ +void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq, + unsigned long *vcpu_bitmap) +{ + struct kvm_lapic **dest_vcpu = NULL; + struct kvm_lapic *src = NULL; + struct kvm_apic_map *map; + struct kvm_vcpu *vcpu; + unsigned long bitmap; + int i, vcpu_idx; + bool ret; + + rcu_read_lock(); + map = rcu_dereference(kvm->arch.apic_map); + + ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dest_vcpu, + &bitmap); + if (ret) { + for_each_set_bit(i, &bitmap, 16) { + if (!dest_vcpu[i]) + continue; + vcpu_idx = dest_vcpu[i]->vcpu->vcpu_idx; + __set_bit(vcpu_idx, vcpu_bitmap); + } + } else { + kvm_for_each_vcpu(i, vcpu, kvm) { + if (!kvm_apic_present(vcpu)) + continue; + if (!kvm_apic_match_dest(vcpu, NULL, + irq->delivery_mode, + irq->dest_id, + irq->dest_mode)) + continue; + __set_bit(i, vcpu_bitmap); + } + } + rcu_read_unlock(); +} + int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) { return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; @@ -1504,8 +1537,8 @@ static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu, timer_advance_ns += ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP; } - if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_ADJUST_MAX)) - timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT; + if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_NS_MAX)) + timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT; apic->lapic_timer.timer_advance_ns = timer_advance_ns; } @@ -2302,7 +2335,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) HRTIMER_MODE_ABS_HARD); apic->lapic_timer.timer.function = apic_timer_fn; if (timer_advance_ns == -1) { - apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT; + apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT; lapic_timer_advance_dynamic = true; } else { apic->lapic_timer.timer_advance_ns = timer_advance_ns; @@ -2713,7 +2746,7 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) * KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs * and leave the INIT pending. */ - if (is_smm(vcpu) || kvm_x86_ops->apic_init_signal_blocked(vcpu)) { + if (kvm_vcpu_latch_init(vcpu)) { WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED); if (test_bit(KVM_APIC_SIPI, &apic->pending_events)) clear_bit(KVM_APIC_SIPI, &apic->pending_events); diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 2aad7e226fc0..39925afdfcdc 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -226,6 +226,9 @@ bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu); +void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq, + unsigned long *vcpu_bitmap); + bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, struct kvm_vcpu **dest_vcpu); int kvm_vector_to_index(u32 vector, u32 dest_vcpus, @@ -242,4 +245,9 @@ static inline enum lapic_mode kvm_apic_mode(u64 apic_base) return apic_base & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); } +static inline u8 kvm_xapic_id(struct kvm_lapic *apic) +{ + return kvm_lapic_get_reg(apic, APIC_ID) >> 24; +} + #endif diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 11f8ec89433b..d55674f44a18 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -210,4 +210,8 @@ void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn); bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, struct kvm_memory_slot *slot, u64 gfn); int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu); + +int kvm_mmu_post_init_vm(struct kvm *kvm); +void kvm_mmu_pre_destroy_vm(struct kvm *kvm); + #endif diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu/mmu.c index 5269aa057dfa..6f92b40d798c 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -37,6 +37,7 @@ #include <linux/uaccess.h> #include <linux/hash.h> #include <linux/kern_levels.h> +#include <linux/kthread.h> #include <asm/page.h> #include <asm/pat.h> @@ -47,6 +48,35 @@ #include <asm/kvm_page_track.h> #include "trace.h" +extern bool itlb_multihit_kvm_mitigation; + +static int __read_mostly nx_huge_pages = -1; +#ifdef CONFIG_PREEMPT_RT +/* Recovery can cause latency spikes, disable it for PREEMPT_RT. */ +static uint __read_mostly nx_huge_pages_recovery_ratio = 0; +#else +static uint __read_mostly nx_huge_pages_recovery_ratio = 60; +#endif + +static int set_nx_huge_pages(const char *val, const struct kernel_param *kp); +static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp); + +static struct kernel_param_ops nx_huge_pages_ops = { + .set = set_nx_huge_pages, + .get = param_get_bool, +}; + +static struct kernel_param_ops nx_huge_pages_recovery_ratio_ops = { + .set = set_nx_huge_pages_recovery_ratio, + .get = param_get_uint, +}; + +module_param_cb(nx_huge_pages, &nx_huge_pages_ops, &nx_huge_pages, 0644); +__MODULE_PARM_TYPE(nx_huge_pages, "bool"); +module_param_cb(nx_huge_pages_recovery_ratio, &nx_huge_pages_recovery_ratio_ops, + &nx_huge_pages_recovery_ratio, 0644); +__MODULE_PARM_TYPE(nx_huge_pages_recovery_ratio, "uint"); + /* * When setting this variable to true it enables Two-Dimensional-Paging * where the hardware walks 2 page tables: @@ -83,7 +113,17 @@ module_param(dbg, bool, 0644); #define PTE_PREFETCH_NUM 8 #define PT_FIRST_AVAIL_BITS_SHIFT 10 -#define PT64_SECOND_AVAIL_BITS_SHIFT 52 +#define PT64_SECOND_AVAIL_BITS_SHIFT 54 + +/* + * The mask used to denote special SPTEs, which can be either MMIO SPTEs or + * Access Tracking SPTEs. + */ +#define SPTE_SPECIAL_MASK (3ULL << 52) +#define SPTE_AD_ENABLED_MASK (0ULL << 52) +#define SPTE_AD_DISABLED_MASK (1ULL << 52) +#define SPTE_AD_WRPROT_ONLY_MASK (2ULL << 52) +#define SPTE_MMIO_MASK (3ULL << 52) #define PT64_LEVEL_BITS 9 @@ -219,12 +259,11 @@ static u64 __read_mostly shadow_present_mask; static u64 __read_mostly shadow_me_mask; /* - * SPTEs used by MMUs without A/D bits are marked with shadow_acc_track_value. - * Non-present SPTEs with shadow_acc_track_value set are in place for access - * tracking. + * SPTEs used by MMUs without A/D bits are marked with SPTE_AD_DISABLED_MASK; + * shadow_acc_track_mask is the set of bits to be cleared in non-accessed + * pages. */ static u64 __read_mostly shadow_acc_track_mask; -static const u64 shadow_acc_track_value = SPTE_SPECIAL_MASK; /* * The mask/shift to use for saving the original R/X bits when marking the PTE @@ -304,7 +343,7 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value, u64 access_mask) { BUG_ON((u64)(unsigned)access_mask != access_mask); BUG_ON((mmio_mask & mmio_value) != mmio_value); - shadow_mmio_value = mmio_value | SPTE_SPECIAL_MASK; + shadow_mmio_value = mmio_value | SPTE_MMIO_MASK; shadow_mmio_mask = mmio_mask | SPTE_SPECIAL_MASK; shadow_mmio_access_mask = access_mask; } @@ -320,10 +359,32 @@ static inline bool sp_ad_disabled(struct kvm_mmu_page *sp) return sp->role.ad_disabled; } +static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu) +{ + /* + * When using the EPT page-modification log, the GPAs in the log + * would come from L2 rather than L1. Therefore, we need to rely + * on write protection to record dirty pages. This also bypasses + * PML, since writes now result in a vmexit. + */ + return vcpu->arch.mmu == &vcpu->arch.guest_mmu; +} + static inline bool spte_ad_enabled(u64 spte) { MMU_WARN_ON(is_mmio_spte(spte)); - return !(spte & shadow_acc_track_value); + return (spte & SPTE_SPECIAL_MASK) != SPTE_AD_DISABLED_MASK; +} + +static inline bool spte_ad_need_write_protect(u64 spte) +{ + MMU_WARN_ON(is_mmio_spte(spte)); + return (spte & SPTE_SPECIAL_MASK) != SPTE_AD_ENABLED_MASK; +} + +static bool is_nx_huge_page_enabled(void) +{ + return READ_ONCE(nx_huge_pages); } static inline u64 spte_shadow_accessed_mask(u64 spte) @@ -461,7 +522,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, { BUG_ON(!dirty_mask != !accessed_mask); BUG_ON(!accessed_mask && !acc_track_mask); - BUG_ON(acc_track_mask & shadow_acc_track_value); + BUG_ON(acc_track_mask & SPTE_SPECIAL_MASK); shadow_user_mask = user_mask; shadow_accessed_mask = accessed_mask; @@ -1164,6 +1225,17 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) kvm_mmu_gfn_disallow_lpage(slot, gfn); } +static void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp) +{ + if (sp->lpage_disallowed) + return; + + ++kvm->stat.nx_lpage_splits; + list_add_tail(&sp->lpage_disallowed_link, + &kvm->arch.lpage_disallowed_mmu_pages); + sp->lpage_disallowed = true; +} + static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) { struct kvm_memslots *slots; @@ -1181,6 +1253,13 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) kvm_mmu_gfn_allow_lpage(slot, gfn); } +static void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp) +{ + --kvm->stat.nx_lpage_splits; + sp->lpage_disallowed = false; + list_del(&sp->lpage_disallowed_link); +} + static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level, struct kvm_memory_slot *slot) { @@ -1589,16 +1668,16 @@ static bool spte_clear_dirty(u64 *sptep) rmap_printk("rmap_clear_dirty: spte %p %llx\n", sptep, *sptep); + MMU_WARN_ON(!spte_ad_enabled(spte)); spte &= ~shadow_dirty_mask; - return mmu_spte_update(sptep, spte); } -static bool wrprot_ad_disabled_spte(u64 *sptep) +static bool spte_wrprot_for_clear_dirty(u64 *sptep) { bool was_writable = test_and_clear_bit(PT_WRITABLE_SHIFT, (unsigned long *)sptep); - if (was_writable) + if (was_writable && !spte_ad_enabled(*sptep)) kvm_set_pfn_dirty(spte_to_pfn(*sptep)); return was_writable; @@ -1617,10 +1696,10 @@ static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head) bool flush = false; for_each_rmap_spte(rmap_head, &iter, sptep) - if (spte_ad_enabled(*sptep)) - flush |= spte_clear_dirty(sptep); + if (spte_ad_need_write_protect(*sptep)) + flush |= spte_wrprot_for_clear_dirty(sptep); else - flush |= wrprot_ad_disabled_spte(sptep); + flush |= spte_clear_dirty(sptep); return flush; } @@ -1631,6 +1710,11 @@ static bool spte_set_dirty(u64 *sptep) rmap_printk("rmap_set_dirty: spte %p %llx\n", sptep, *sptep); + /* + * Similar to the !kvm_x86_ops->slot_disable_log_dirty case, + * do not bother adding back write access to pages marked + * SPTE_AD_WRPROT_ONLY_MASK. + */ spte |= shadow_dirty_mask; return mmu_spte_update(sptep, spte); @@ -2622,7 +2706,7 @@ static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep, shadow_user_mask | shadow_x_mask | shadow_me_mask; if (sp_ad_disabled(sp)) - spte |= shadow_acc_track_value; + spte |= SPTE_AD_DISABLED_MASK; else spte |= shadow_accessed_mask; @@ -2761,6 +2845,9 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm, kvm_reload_remote_mmus(kvm); } + if (sp->lpage_disallowed) + unaccount_huge_nx_page(kvm, sp); + sp->role.invalid = 1; return list_unstable; } @@ -2968,7 +3055,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, sp = page_header(__pa(sptep)); if (sp_ad_disabled(sp)) - spte |= shadow_acc_track_value; + spte |= SPTE_AD_DISABLED_MASK; + else if (kvm_vcpu_ad_need_write_protect(vcpu)) + spte |= SPTE_AD_WRPROT_ONLY_MASK; /* * For the EPT case, shadow_present_mask is 0 if hardware @@ -2980,6 +3069,11 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, if (!speculative) spte |= spte_shadow_accessed_mask(spte); + if (level > PT_PAGE_TABLE_LEVEL && (pte_access & ACC_EXEC_MASK) && + is_nx_huge_page_enabled()) { + pte_access &= ~ACC_EXEC_MASK; + } + if (pte_access & ACC_EXEC_MASK) spte |= shadow_x_mask; else @@ -3200,9 +3294,32 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) __direct_pte_prefetch(vcpu, sp, sptep); } +static void disallowed_hugepage_adjust(struct kvm_shadow_walk_iterator it, + gfn_t gfn, kvm_pfn_t *pfnp, int *levelp) +{ + int level = *levelp; + u64 spte = *it.sptep; + + if (it.level == level && level > PT_PAGE_TABLE_LEVEL && + is_nx_huge_page_enabled() && + is_shadow_present_pte(spte) && + !is_large_pte(spte)) { + /* + * A small SPTE exists for this pfn, but FNAME(fetch) + * and __direct_map would like to create a large PTE + * instead: just force them to go down another level, + * patching back for them into pfn the next 9 bits of + * the address. + */ + u64 page_mask = KVM_PAGES_PER_HPAGE(level) - KVM_PAGES_PER_HPAGE(level - 1); + *pfnp |= gfn & page_mask; + (*levelp)--; + } +} + static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write, int map_writable, int level, kvm_pfn_t pfn, - bool prefault) + bool prefault, bool lpage_disallowed) { struct kvm_shadow_walk_iterator it; struct kvm_mmu_page *sp; @@ -3215,6 +3332,12 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write, trace_kvm_mmu_spte_requested(gpa, level, pfn); for_each_shadow_entry(vcpu, gpa, it) { + /* + * We cannot overwrite existing page tables with an NX + * large page, as the leaf could be executable. + */ + disallowed_hugepage_adjust(it, gfn, &pfn, &level); + base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); if (it.level == level) break; @@ -3225,6 +3348,8 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write, it.level - 1, true, ACC_ALL); link_shadow_page(vcpu, it.sptep, sp); + if (lpage_disallowed) + account_huge_nx_page(vcpu->kvm, sp); } } @@ -3273,7 +3398,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, * here. */ if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) && - level == PT_PAGE_TABLE_LEVEL && + !kvm_is_zone_device_pfn(pfn) && level == PT_PAGE_TABLE_LEVEL && PageTransCompoundMap(pfn_to_page(pfn)) && !mmu_gfn_lpage_is_disallowed(vcpu, gfn, PT_DIRECTORY_LEVEL)) { unsigned long mask; @@ -3517,11 +3642,14 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, { int r; int level; - bool force_pt_level = false; + bool force_pt_level; kvm_pfn_t pfn; unsigned long mmu_seq; bool map_writable, write = error_code & PFERR_WRITE_MASK; + bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) && + is_nx_huge_page_enabled(); + force_pt_level = lpage_disallowed; level = mapping_level(vcpu, gfn, &force_pt_level); if (likely(!force_pt_level)) { /* @@ -3555,7 +3683,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, goto out_unlock; if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); - r = __direct_map(vcpu, v, write, map_writable, level, pfn, prefault); + r = __direct_map(vcpu, v, write, map_writable, level, pfn, + prefault, false); out_unlock: spin_unlock(&vcpu->kvm->mmu_lock); kvm_release_pfn_clean(pfn); @@ -4141,6 +4270,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, unsigned long mmu_seq; int write = error_code & PFERR_WRITE_MASK; bool map_writable; + bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) && + is_nx_huge_page_enabled(); MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa)); @@ -4151,8 +4282,9 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, if (r) return r; - force_pt_level = !check_hugepage_cache_consistency(vcpu, gfn, - PT_DIRECTORY_LEVEL); + force_pt_level = + lpage_disallowed || + !check_hugepage_cache_consistency(vcpu, gfn, PT_DIRECTORY_LEVEL); level = mapping_level(vcpu, gfn, &force_pt_level); if (likely(!force_pt_level)) { if (level > PT_DIRECTORY_LEVEL && @@ -4181,7 +4313,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, goto out_unlock; if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); - r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault); + r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, + prefault, lpage_disallowed); out_unlock: spin_unlock(&vcpu->kvm->mmu_lock); kvm_release_pfn_clean(pfn); @@ -4262,7 +4395,7 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3, kvm_make_request(KVM_REQ_LOAD_CR3, vcpu); if (!skip_tlb_flush) { kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); - kvm_x86_ops->tlb_flush(vcpu, true); + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); } /* @@ -5881,9 +6014,9 @@ restart: * the guest, and the guest page table is using 4K page size * mapping if the indirect sp has level = 1. */ - if (sp->role.direct && - !kvm_is_reserved_pfn(pfn) && - PageTransCompoundMap(pfn_to_page(pfn))) { + if (sp->role.direct && !kvm_is_reserved_pfn(pfn) && + !kvm_is_zone_device_pfn(pfn) && + PageTransCompoundMap(pfn_to_page(pfn))) { pte_list_remove(rmap_head, sptep); if (kvm_available_flush_tlb_with_range()) @@ -6122,10 +6255,59 @@ static void kvm_set_mmio_spte_mask(void) kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK); } +static bool get_nx_auto_mode(void) +{ + /* Return true when CPU has the bug, and mitigations are ON */ + return boot_cpu_has_bug(X86_BUG_ITLB_MULTIHIT) && !cpu_mitigations_off(); +} + +static void __set_nx_huge_pages(bool val) +{ + nx_huge_pages = itlb_multihit_kvm_mitigation = val; +} + +static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) +{ + bool old_val = nx_huge_pages; + bool new_val; + + /* In "auto" mode deploy workaround only if CPU has the bug. */ + if (sysfs_streq(val, "off")) + new_val = 0; + else if (sysfs_streq(val, "force")) + new_val = 1; + else if (sysfs_streq(val, "auto")) + new_val = get_nx_auto_mode(); + else if (strtobool(val, &new_val) < 0) + return -EINVAL; + + __set_nx_huge_pages(new_val); + + if (new_val != old_val) { + struct kvm *kvm; + + mutex_lock(&kvm_lock); + + list_for_each_entry(kvm, &vm_list, vm_list) { + mutex_lock(&kvm->slots_lock); + kvm_mmu_zap_all_fast(kvm); + mutex_unlock(&kvm->slots_lock); + + wake_up_process(kvm->arch.nx_lpage_recovery_thread); + } + mutex_unlock(&kvm_lock); + } + + return 0; +} + int kvm_mmu_module_init(void) { int ret = -ENOMEM; + if (nx_huge_pages == -1) + __set_nx_huge_pages(get_nx_auto_mode()); + /* * MMU roles use union aliasing which is, generally speaking, an * undefined behavior. However, we supposedly know how compilers behave @@ -6205,3 +6387,116 @@ void kvm_mmu_module_exit(void) unregister_shrinker(&mmu_shrinker); mmu_audit_disable(); } + +static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp) +{ + unsigned int old_val; + int err; + + old_val = nx_huge_pages_recovery_ratio; + err = param_set_uint(val, kp); + if (err) + return err; + + if (READ_ONCE(nx_huge_pages) && + !old_val && nx_huge_pages_recovery_ratio) { + struct kvm *kvm; + + mutex_lock(&kvm_lock); + + list_for_each_entry(kvm, &vm_list, vm_list) + wake_up_process(kvm->arch.nx_lpage_recovery_thread); + + mutex_unlock(&kvm_lock); + } + + return err; +} + +static void kvm_recover_nx_lpages(struct kvm *kvm) +{ + int rcu_idx; + struct kvm_mmu_page *sp; + unsigned int ratio; + LIST_HEAD(invalid_list); + ulong to_zap; + + rcu_idx = srcu_read_lock(&kvm->srcu); + spin_lock(&kvm->mmu_lock); + + ratio = READ_ONCE(nx_huge_pages_recovery_ratio); + to_zap = ratio ? DIV_ROUND_UP(kvm->stat.nx_lpage_splits, ratio) : 0; + while (to_zap && !list_empty(&kvm->arch.lpage_disallowed_mmu_pages)) { + /* + * We use a separate list instead of just using active_mmu_pages + * because the number of lpage_disallowed pages is expected to + * be relatively small compared to the total. + */ + sp = list_first_entry(&kvm->arch.lpage_disallowed_mmu_pages, + struct kvm_mmu_page, + lpage_disallowed_link); + WARN_ON_ONCE(!sp->lpage_disallowed); + kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); + WARN_ON_ONCE(sp->lpage_disallowed); + + if (!--to_zap || need_resched() || spin_needbreak(&kvm->mmu_lock)) { + kvm_mmu_commit_zap_page(kvm, &invalid_list); + if (to_zap) + cond_resched_lock(&kvm->mmu_lock); + } + } + + spin_unlock(&kvm->mmu_lock); + srcu_read_unlock(&kvm->srcu, rcu_idx); +} + +static long get_nx_lpage_recovery_timeout(u64 start_time) +{ + return READ_ONCE(nx_huge_pages) && READ_ONCE(nx_huge_pages_recovery_ratio) + ? start_time + 60 * HZ - get_jiffies_64() + : MAX_SCHEDULE_TIMEOUT; +} + +static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data) +{ + u64 start_time; + long remaining_time; + + while (true) { + start_time = get_jiffies_64(); + remaining_time = get_nx_lpage_recovery_timeout(start_time); + + set_current_state(TASK_INTERRUPTIBLE); + while (!kthread_should_stop() && remaining_time > 0) { + schedule_timeout(remaining_time); + remaining_time = get_nx_lpage_recovery_timeout(start_time); + set_current_state(TASK_INTERRUPTIBLE); + } + + set_current_state(TASK_RUNNING); + + if (kthread_should_stop()) + return 0; + + kvm_recover_nx_lpages(kvm); + } +} + +int kvm_mmu_post_init_vm(struct kvm *kvm) +{ + int err; + + err = kvm_vm_create_worker_thread(kvm, kvm_nx_lpage_recovery_worker, 0, + "kvm-nx-lpage-recovery", + &kvm->arch.nx_lpage_recovery_thread); + if (!err) + kthread_unpark(kvm->arch.nx_lpage_recovery_thread); + + return err; +} + +void kvm_mmu_pre_destroy_vm(struct kvm *kvm) +{ + if (kvm->arch.nx_lpage_recovery_thread) + kthread_stop(kvm->arch.nx_lpage_recovery_thread); +} diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/mmu/page_track.c index 3521e2d176f2..3521e2d176f2 100644 --- a/arch/x86/kvm/page_track.c +++ b/arch/x86/kvm/mmu/page_track.c diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 7d5cdb3af594..97b21e7fd013 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -614,13 +614,14 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, struct guest_walker *gw, int write_fault, int hlevel, - kvm_pfn_t pfn, bool map_writable, bool prefault) + kvm_pfn_t pfn, bool map_writable, bool prefault, + bool lpage_disallowed) { struct kvm_mmu_page *sp = NULL; struct kvm_shadow_walk_iterator it; unsigned direct_access, access = gw->pt_access; int top_level, ret; - gfn_t base_gfn; + gfn_t gfn, base_gfn; direct_access = gw->pte_access; @@ -665,13 +666,25 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, link_shadow_page(vcpu, it.sptep, sp); } - base_gfn = gw->gfn; + /* + * FNAME(page_fault) might have clobbered the bottom bits of + * gw->gfn, restore them from the virtual address. + */ + gfn = gw->gfn | ((addr & PT_LVL_OFFSET_MASK(gw->level)) >> PAGE_SHIFT); + base_gfn = gfn; trace_kvm_mmu_spte_requested(addr, gw->level, pfn); for (; shadow_walk_okay(&it); shadow_walk_next(&it)) { clear_sp_write_flooding_count(it.sptep); - base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); + + /* + * We cannot overwrite existing page tables with an NX + * large page, as the leaf could be executable. + */ + disallowed_hugepage_adjust(it, gfn, &pfn, &hlevel); + + base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); if (it.level == hlevel) break; @@ -683,6 +696,8 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, sp = kvm_mmu_get_page(vcpu, base_gfn, addr, it.level - 1, true, direct_access); link_shadow_page(vcpu, it.sptep, sp); + if (lpage_disallowed) + account_huge_nx_page(vcpu->kvm, sp); } } @@ -759,9 +774,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, int r; kvm_pfn_t pfn; int level = PT_PAGE_TABLE_LEVEL; - bool force_pt_level = false; unsigned long mmu_seq; bool map_writable, is_self_change_mapping; + bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) && + is_nx_huge_page_enabled(); + bool force_pt_level = lpage_disallowed; pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); @@ -851,7 +868,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, if (!force_pt_level) transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level); r = FNAME(fetch)(vcpu, addr, &walker, write_fault, - level, pfn, map_writable, prefault); + level, pfn, map_writable, prefault, lpage_disallowed); kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); out_unlock: diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 46875bbd0419..d5e6d5b3f06f 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -62,8 +62,7 @@ static void kvm_perf_overflow(struct perf_event *perf_event, struct kvm_pmc *pmc = perf_event->overflow_handler_context; struct kvm_pmu *pmu = pmc_to_pmu(pmc); - if (!test_and_set_bit(pmc->idx, - (unsigned long *)&pmu->reprogram_pmi)) { + if (!test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) { __set_bit(pmc->idx, (unsigned long *)&pmu->global_status); kvm_make_request(KVM_REQ_PMU, pmc->vcpu); } @@ -76,8 +75,7 @@ static void kvm_perf_overflow_intr(struct perf_event *perf_event, struct kvm_pmc *pmc = perf_event->overflow_handler_context; struct kvm_pmu *pmu = pmc_to_pmu(pmc); - if (!test_and_set_bit(pmc->idx, - (unsigned long *)&pmu->reprogram_pmi)) { + if (!test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) { __set_bit(pmc->idx, (unsigned long *)&pmu->global_status); kvm_make_request(KVM_REQ_PMU, pmc->vcpu); @@ -137,7 +135,37 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, } pmc->perf_event = event; - clear_bit(pmc->idx, (unsigned long*)&pmc_to_pmu(pmc)->reprogram_pmi); + pmc_to_pmu(pmc)->event_count++; + clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi); +} + +static void pmc_pause_counter(struct kvm_pmc *pmc) +{ + u64 counter = pmc->counter; + + if (!pmc->perf_event) + return; + + /* update counter, reset event value to avoid redundant accumulation */ + counter += perf_event_pause(pmc->perf_event, true); + pmc->counter = counter & pmc_bitmask(pmc); +} + +static bool pmc_resume_counter(struct kvm_pmc *pmc) +{ + if (!pmc->perf_event) + return false; + + /* recalibrate sample period and check if it's accepted by perf core */ + if (perf_event_period(pmc->perf_event, + (-pmc->counter) & pmc_bitmask(pmc))) + return false; + + /* reuse perf_event to serve as pmc_reprogram_counter() does*/ + perf_event_enable(pmc->perf_event); + + clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi); + return true; } void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) @@ -154,7 +182,7 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) pmc->eventsel = eventsel; - pmc_stop_counter(pmc); + pmc_pause_counter(pmc); if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_is_enabled(pmc)) return; @@ -193,6 +221,12 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) if (type == PERF_TYPE_RAW) config = eventsel & X86_RAW_EVENT_MASK; + if (pmc->current_config == eventsel && pmc_resume_counter(pmc)) + return; + + pmc_release_perf_event(pmc); + + pmc->current_config = eventsel; pmc_reprogram_counter(pmc, type, config, !(eventsel & ARCH_PERFMON_EVENTSEL_USR), !(eventsel & ARCH_PERFMON_EVENTSEL_OS), @@ -209,7 +243,7 @@ void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx) struct kvm_pmu_event_filter *filter; struct kvm *kvm = pmc->vcpu->kvm; - pmc_stop_counter(pmc); + pmc_pause_counter(pmc); if (!en_field || !pmc_is_enabled(pmc)) return; @@ -224,6 +258,12 @@ void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx) return; } + if (pmc->current_config == (u64)ctrl && pmc_resume_counter(pmc)) + return; + + pmc_release_perf_event(pmc); + + pmc->current_config = (u64)ctrl; pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE, kvm_x86_ops->pmu_ops->find_fixed_event(idx), !(en_field & 0x2), /* exclude user */ @@ -253,27 +293,32 @@ EXPORT_SYMBOL_GPL(reprogram_counter); void kvm_pmu_handle_event(struct kvm_vcpu *vcpu) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); - u64 bitmask; int bit; - bitmask = pmu->reprogram_pmi; - - for_each_set_bit(bit, (unsigned long *)&bitmask, X86_PMC_IDX_MAX) { + for_each_set_bit(bit, pmu->reprogram_pmi, X86_PMC_IDX_MAX) { struct kvm_pmc *pmc = kvm_x86_ops->pmu_ops->pmc_idx_to_pmc(pmu, bit); if (unlikely(!pmc || !pmc->perf_event)) { - clear_bit(bit, (unsigned long *)&pmu->reprogram_pmi); + clear_bit(bit, pmu->reprogram_pmi); continue; } reprogram_counter(pmu, bit); } + + /* + * Unused perf_events are only released if the corresponding MSRs + * weren't accessed during the last vCPU time slice. kvm_arch_sched_in + * triggers KVM_REQ_PMU if cleanup is needed. + */ + if (unlikely(pmu->need_cleanup)) + kvm_pmu_cleanup(vcpu); } /* check if idx is a valid index to access PMU */ -int kvm_pmu_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx) +int kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx) { - return kvm_x86_ops->pmu_ops->is_valid_msr_idx(vcpu, idx); + return kvm_x86_ops->pmu_ops->is_valid_rdpmc_ecx(vcpu, idx); } bool is_vmware_backdoor_pmc(u32 pmc_idx) @@ -323,7 +368,7 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) if (is_vmware_backdoor_pmc(idx)) return kvm_pmu_rdpmc_vmware(vcpu, idx, data); - pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, idx, &mask); + pmc = kvm_x86_ops->pmu_ops->rdpmc_ecx_to_pmc(vcpu, idx, &mask); if (!pmc) return 1; @@ -339,7 +384,17 @@ void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu) bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) { - return kvm_x86_ops->pmu_ops->is_valid_msr(vcpu, msr); + return kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, msr) || + kvm_x86_ops->pmu_ops->is_valid_msr(vcpu, msr); +} + +static void kvm_pmu_mark_pmc_in_use(struct kvm_vcpu *vcpu, u32 msr) +{ + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); + struct kvm_pmc *pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, msr); + + if (pmc) + __set_bit(pmc->idx, pmu->pmc_in_use); } int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data) @@ -349,6 +404,7 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data) int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { + kvm_pmu_mark_pmc_in_use(vcpu, msr_info->index); return kvm_x86_ops->pmu_ops->set_msr(vcpu, msr_info); } @@ -376,9 +432,45 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu) memset(pmu, 0, sizeof(*pmu)); kvm_x86_ops->pmu_ops->init(vcpu); init_irq_work(&pmu->irq_work, kvm_pmi_trigger_fn); + pmu->event_count = 0; + pmu->need_cleanup = false; kvm_pmu_refresh(vcpu); } +static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc) +{ + struct kvm_pmu *pmu = pmc_to_pmu(pmc); + + if (pmc_is_fixed(pmc)) + return fixed_ctrl_field(pmu->fixed_ctr_ctrl, + pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3; + + return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE; +} + +/* Release perf_events for vPMCs that have been unused for a full time slice. */ +void kvm_pmu_cleanup(struct kvm_vcpu *vcpu) +{ + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); + struct kvm_pmc *pmc = NULL; + DECLARE_BITMAP(bitmask, X86_PMC_IDX_MAX); + int i; + + pmu->need_cleanup = false; + + bitmap_andnot(bitmask, pmu->all_valid_pmc_idx, + pmu->pmc_in_use, X86_PMC_IDX_MAX); + + for_each_set_bit(i, bitmask, X86_PMC_IDX_MAX) { + pmc = kvm_x86_ops->pmu_ops->pmc_idx_to_pmc(pmu, i); + + if (pmc && pmc->perf_event && !pmc_speculative_in_use(pmc)) + pmc_stop_counter(pmc); + } + + bitmap_zero(pmu->pmc_in_use, X86_PMC_IDX_MAX); +} + void kvm_pmu_destroy(struct kvm_vcpu *vcpu) { kvm_pmu_reset(vcpu); diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 58265f761c3b..7ebb62326c14 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -25,9 +25,10 @@ struct kvm_pmu_ops { unsigned (*find_fixed_event)(int idx); bool (*pmc_is_enabled)(struct kvm_pmc *pmc); struct kvm_pmc *(*pmc_idx_to_pmc)(struct kvm_pmu *pmu, int pmc_idx); - struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, unsigned idx, - u64 *mask); - int (*is_valid_msr_idx)(struct kvm_vcpu *vcpu, unsigned idx); + struct kvm_pmc *(*rdpmc_ecx_to_pmc)(struct kvm_vcpu *vcpu, + unsigned int idx, u64 *mask); + struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, u32 msr); + int (*is_valid_rdpmc_ecx)(struct kvm_vcpu *vcpu, unsigned int idx); bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr); int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr, u64 *data); int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info); @@ -55,12 +56,21 @@ static inline u64 pmc_read_counter(struct kvm_pmc *pmc) return counter & pmc_bitmask(pmc); } -static inline void pmc_stop_counter(struct kvm_pmc *pmc) +static inline void pmc_release_perf_event(struct kvm_pmc *pmc) { if (pmc->perf_event) { - pmc->counter = pmc_read_counter(pmc); perf_event_release_kernel(pmc->perf_event); pmc->perf_event = NULL; + pmc->current_config = 0; + pmc_to_pmu(pmc)->event_count--; + } +} + +static inline void pmc_stop_counter(struct kvm_pmc *pmc) +{ + if (pmc->perf_event) { + pmc->counter = pmc_read_counter(pmc); + pmc_release_perf_event(pmc); } } @@ -79,6 +89,12 @@ static inline bool pmc_is_enabled(struct kvm_pmc *pmc) return kvm_x86_ops->pmu_ops->pmc_is_enabled(pmc); } +static inline bool kvm_valid_perf_global_ctrl(struct kvm_pmu *pmu, + u64 data) +{ + return !(pmu->global_ctrl_mask & data); +} + /* returns general purpose PMC with the specified MSR. Note that it can be * used for both PERFCTRn and EVNTSELn; that is why it accepts base as a * paramenter to tell them apart. @@ -110,13 +126,14 @@ void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx); void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu); void kvm_pmu_handle_event(struct kvm_vcpu *vcpu); int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data); -int kvm_pmu_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx); +int kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx); bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr); int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data); int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info); void kvm_pmu_refresh(struct kvm_vcpu *vcpu); void kvm_pmu_reset(struct kvm_vcpu *vcpu); void kvm_pmu_init(struct kvm_vcpu *vcpu); +void kvm_pmu_cleanup(struct kvm_vcpu *vcpu); void kvm_pmu_destroy(struct kvm_vcpu *vcpu); int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp); diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c index c8388389a3b0..ce0b10fe5e2b 100644 --- a/arch/x86/kvm/pmu_amd.c +++ b/arch/x86/kvm/pmu_amd.c @@ -174,7 +174,7 @@ static struct kvm_pmc *amd_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx) } /* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */ -static int amd_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx) +static int amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); @@ -184,7 +184,8 @@ static int amd_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx) } /* idx is the ECX register of RDPMC instruction */ -static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *mask) +static struct kvm_pmc *amd_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu, + unsigned int idx, u64 *mask) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); struct kvm_pmc *counters; @@ -199,13 +200,19 @@ static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, unsigned idx, u static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) { + /* All MSRs refer to exactly one PMC, so msr_idx_to_pmc is enough. */ + return false; +} + +static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr) +{ struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); - int ret = false; + struct kvm_pmc *pmc; - ret = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER) || - get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL); + pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER); + pmc = pmc ? pmc : get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL); - return ret; + return pmc; } static int amd_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data) @@ -272,6 +279,7 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu) pmu->counter_bitmask[KVM_PMC_FIXED] = 0; pmu->nr_arch_fixed_counters = 0; pmu->global_status = 0; + bitmap_set(pmu->all_valid_pmc_idx, 0, pmu->nr_arch_gp_counters); } static void amd_pmu_init(struct kvm_vcpu *vcpu) @@ -285,6 +293,7 @@ static void amd_pmu_init(struct kvm_vcpu *vcpu) pmu->gp_counters[i].type = KVM_PMC_GP; pmu->gp_counters[i].vcpu = vcpu; pmu->gp_counters[i].idx = i; + pmu->gp_counters[i].current_config = 0; } } @@ -306,8 +315,9 @@ struct kvm_pmu_ops amd_pmu_ops = { .find_fixed_event = amd_find_fixed_event, .pmc_is_enabled = amd_pmc_is_enabled, .pmc_idx_to_pmc = amd_pmc_idx_to_pmc, + .rdpmc_ecx_to_pmc = amd_rdpmc_ecx_to_pmc, .msr_idx_to_pmc = amd_msr_idx_to_pmc, - .is_valid_msr_idx = amd_is_valid_msr_idx, + .is_valid_rdpmc_ecx = amd_is_valid_rdpmc_ecx, .is_valid_msr = amd_is_valid_msr, .get_msr = amd_pmu_get_msr, .set_msr = amd_pmu_set_msr, diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index f8ecb6df5106..362e874297e4 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -38,6 +38,7 @@ #include <linux/file.h> #include <linux/pagemap.h> #include <linux/swap.h> +#include <linux/rwsem.h> #include <asm/apic.h> #include <asm/perf_event.h> @@ -418,9 +419,13 @@ enum { #define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL +static int sev_flush_asids(void); +static DECLARE_RWSEM(sev_deactivate_lock); +static DEFINE_MUTEX(sev_bitmap_lock); static unsigned int max_sev_asid; static unsigned int min_sev_asid; static unsigned long *sev_asid_bitmap; +static unsigned long *sev_reclaim_asid_bitmap; #define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT) struct enc_region { @@ -734,8 +739,14 @@ static int get_npt_level(struct kvm_vcpu *vcpu) static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) { vcpu->arch.efer = efer; - if (!npt_enabled && !(efer & EFER_LMA)) - efer &= ~EFER_LME; + + if (!npt_enabled) { + /* Shadow paging assumes NX to be available. */ + efer |= EFER_NX; + + if (!(efer & EFER_LMA)) + efer &= ~EFER_LME; + } to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); @@ -1229,11 +1240,15 @@ static __init int sev_hardware_setup(void) /* Minimum ASID value that should be used for SEV guest */ min_sev_asid = cpuid_edx(0x8000001F); - /* Initialize SEV ASID bitmap */ + /* Initialize SEV ASID bitmaps */ sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL); if (!sev_asid_bitmap) return 1; + sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL); + if (!sev_reclaim_asid_bitmap) + return 1; + status = kmalloc(sizeof(*status), GFP_KERNEL); if (!status) return 1; @@ -1412,8 +1427,12 @@ static __exit void svm_hardware_unsetup(void) { int cpu; - if (svm_sev_enabled()) + if (svm_sev_enabled()) { bitmap_free(sev_asid_bitmap); + bitmap_free(sev_reclaim_asid_bitmap); + + sev_flush_asids(); + } for_each_possible_cpu(cpu) svm_cpu_uninit(cpu); @@ -1723,25 +1742,22 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu) return 0; } -static void __sev_asid_free(int asid) +static void sev_asid_free(int asid) { struct svm_cpu_data *sd; int cpu, pos; + mutex_lock(&sev_bitmap_lock); + pos = asid - 1; - clear_bit(pos, sev_asid_bitmap); + __set_bit(pos, sev_reclaim_asid_bitmap); for_each_possible_cpu(cpu) { sd = per_cpu(svm_data, cpu); sd->sev_vmcbs[pos] = NULL; } -} -static void sev_asid_free(struct kvm *kvm) -{ - struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; - - __sev_asid_free(sev->asid); + mutex_unlock(&sev_bitmap_lock); } static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) @@ -1758,10 +1774,12 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) /* deactivate handle */ data->handle = handle; + + /* Guard DEACTIVATE against WBINVD/DF_FLUSH used in ASID recycling */ + down_read(&sev_deactivate_lock); sev_guest_deactivate(data, NULL); + up_read(&sev_deactivate_lock); - wbinvd_on_all_cpus(); - sev_guest_df_flush(NULL); kfree(data); decommission = kzalloc(sizeof(*decommission), GFP_KERNEL); @@ -1910,7 +1928,7 @@ static void sev_vm_destroy(struct kvm *kvm) mutex_unlock(&kvm->lock); sev_unbind_asid(kvm, sev->handle); - sev_asid_free(kvm); + sev_asid_free(sev->asid); } static void avic_vm_destroy(struct kvm *kvm) @@ -2364,7 +2382,7 @@ static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)); break; default: - BUG(); + WARN_ON_ONCE(1); } } @@ -2517,10 +2535,6 @@ static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) { } -static void svm_decache_cr3(struct kvm_vcpu *vcpu) -{ -} - static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) { } @@ -4591,6 +4605,7 @@ static int avic_handle_ldr_update(struct kvm_vcpu *vcpu) int ret = 0; struct vcpu_svm *svm = to_svm(vcpu); u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR); + u32 id = kvm_xapic_id(vcpu->arch.apic); if (ldr == svm->ldr_reg) return 0; @@ -4598,7 +4613,7 @@ static int avic_handle_ldr_update(struct kvm_vcpu *vcpu) avic_invalidate_logical_id_entry(vcpu); if (ldr) - ret = avic_ldr_write(vcpu, vcpu->vcpu_id, ldr); + ret = avic_ldr_write(vcpu, id, ldr); if (!ret) svm->ldr_reg = ldr; @@ -4610,8 +4625,7 @@ static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu) { u64 *old, *new; struct vcpu_svm *svm = to_svm(vcpu); - u32 apic_id_reg = kvm_lapic_get_reg(vcpu->arch.apic, APIC_ID); - u32 id = (apic_id_reg >> 24) & 0xff; + u32 id = kvm_xapic_id(vcpu->arch.apic); if (vcpu->vcpu_id == id) return 0; @@ -4991,6 +5005,18 @@ static int handle_exit(struct kvm_vcpu *vcpu) return 0; } +#ifdef CONFIG_RETPOLINE + if (exit_code == SVM_EXIT_MSR) + return msr_interception(svm); + else if (exit_code == SVM_EXIT_VINTR) + return interrupt_window_interception(svm); + else if (exit_code == SVM_EXIT_INTR) + return intr_interception(svm); + else if (exit_code == SVM_EXIT_HLT) + return halt_interception(svm); + else if (exit_code == SVM_EXIT_NPF) + return npf_interception(svm); +#endif return svm_exit_handlers[exit_code](svm); } @@ -5086,8 +5112,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) { struct vcpu_svm *svm = to_svm(vcpu); - if (svm_nested_virtualize_tpr(vcpu) || - kvm_vcpu_apicv_active(vcpu)) + if (svm_nested_virtualize_tpr(vcpu)) return; clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); @@ -5104,9 +5129,9 @@ static void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu) return; } -static bool svm_get_enable_apicv(struct kvm_vcpu *vcpu) +static bool svm_get_enable_apicv(struct kvm *kvm) { - return avic && irqchip_split(vcpu->kvm); + return avic && irqchip_split(kvm); } static void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) @@ -5628,7 +5653,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) svm->vmcb->save.cr2 = vcpu->arch.cr2; clgi(); - kvm_load_guest_xcr0(vcpu); + kvm_load_guest_xsave_state(vcpu); if (lapic_in_kernel(vcpu) && vcpu->arch.apic->lapic_timer.timer_advance_ns) @@ -5778,7 +5803,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) kvm_before_interrupt(&svm->vcpu); - kvm_put_guest_xcr0(vcpu); + kvm_load_host_xsave_state(vcpu); stgi(); /* Any pending NMI will happen here */ @@ -5887,6 +5912,9 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + vcpu->arch.xsaves_enabled = guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && + boot_cpu_has(X86_FEATURE_XSAVES); + /* Update nrips enabled cache */ svm->nrips_enabled = !!guest_cpuid_has(&svm->vcpu, X86_FEATURE_NRIPS); @@ -5962,7 +5990,7 @@ static bool svm_mpx_supported(void) static bool svm_xsaves_supported(void) { - return false; + return boot_cpu_has(X86_FEATURE_XSAVES); } static bool svm_umip_emulated(void) @@ -6264,18 +6292,73 @@ static int enable_smi_window(struct kvm_vcpu *vcpu) return 0; } +static int sev_flush_asids(void) +{ + int ret, error; + + /* + * DEACTIVATE will clear the WBINVD indicator causing DF_FLUSH to fail, + * so it must be guarded. + */ + down_write(&sev_deactivate_lock); + + wbinvd_on_all_cpus(); + ret = sev_guest_df_flush(&error); + + up_write(&sev_deactivate_lock); + + if (ret) + pr_err("SEV: DF_FLUSH failed, ret=%d, error=%#x\n", ret, error); + + return ret; +} + +/* Must be called with the sev_bitmap_lock held */ +static bool __sev_recycle_asids(void) +{ + int pos; + + /* Check if there are any ASIDs to reclaim before performing a flush */ + pos = find_next_bit(sev_reclaim_asid_bitmap, + max_sev_asid, min_sev_asid - 1); + if (pos >= max_sev_asid) + return false; + + if (sev_flush_asids()) + return false; + + bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap, + max_sev_asid); + bitmap_zero(sev_reclaim_asid_bitmap, max_sev_asid); + + return true; +} + static int sev_asid_new(void) { + bool retry = true; int pos; + mutex_lock(&sev_bitmap_lock); + /* * SEV-enabled guest must use asid from min_sev_asid to max_sev_asid. */ +again: pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_sev_asid - 1); - if (pos >= max_sev_asid) + if (pos >= max_sev_asid) { + if (retry && __sev_recycle_asids()) { + retry = false; + goto again; + } + mutex_unlock(&sev_bitmap_lock); return -EBUSY; + } + + __set_bit(pos, sev_asid_bitmap); + + mutex_unlock(&sev_bitmap_lock); - set_bit(pos, sev_asid_bitmap); return pos + 1; } @@ -6303,7 +6386,7 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) return 0; e_free: - __sev_asid_free(asid); + sev_asid_free(asid); return ret; } @@ -6313,12 +6396,6 @@ static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error) int asid = sev_get_asid(kvm); int ret; - wbinvd_on_all_cpus(); - - ret = sev_guest_df_flush(error); - if (ret) - return ret; - data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT); if (!data) return -ENOMEM; @@ -7208,7 +7285,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .get_cpl = svm_get_cpl, .get_cs_db_l_bits = kvm_get_cs_db_l_bits, .decache_cr0_guest_bits = svm_decache_cr0_guest_bits, - .decache_cr3 = svm_decache_cr3, .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, .set_cr0 = svm_set_cr0, .set_cr3 = svm_set_cr3, diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 41abc62c9a8a..4aea7d304beb 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -10,6 +10,7 @@ #include "hyperv.h" #include "mmu.h" #include "nested.h" +#include "pmu.h" #include "trace.h" #include "x86.h" @@ -27,6 +28,16 @@ module_param(nested_early_check, bool, S_IRUGO); failed; \ }) +#define SET_MSR_OR_WARN(vcpu, idx, data) \ +({ \ + bool failed = kvm_set_msr(vcpu, idx, data); \ + if (failed) \ + pr_warn_ratelimited( \ + "%s cannot write MSR (0x%x, 0x%llx)\n", \ + __func__, idx, data); \ + failed; \ +}) + /* * Hyper-V requires all of these, so mark them as supported even though * they are just treated the same as all-context. @@ -257,7 +268,7 @@ static void free_nested(struct kvm_vcpu *vcpu) vmx->nested.cached_shadow_vmcs12 = NULL; /* Unpin physical memory we referred to in the vmcs02 */ if (vmx->nested.apic_access_page) { - kvm_release_page_dirty(vmx->nested.apic_access_page); + kvm_release_page_clean(vmx->nested.apic_access_page); vmx->nested.apic_access_page = NULL; } kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true); @@ -929,6 +940,57 @@ fail: return i + 1; } +static bool nested_vmx_get_vmexit_msr_value(struct kvm_vcpu *vcpu, + u32 msr_index, + u64 *data) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + /* + * If the L0 hypervisor stored a more accurate value for the TSC that + * does not include the time taken for emulation of the L2->L1 + * VM-exit in L0, use the more accurate value. + */ + if (msr_index == MSR_IA32_TSC) { + int index = vmx_find_msr_index(&vmx->msr_autostore.guest, + MSR_IA32_TSC); + + if (index >= 0) { + u64 val = vmx->msr_autostore.guest.val[index].value; + + *data = kvm_read_l1_tsc(vcpu, val); + return true; + } + } + + if (kvm_get_msr(vcpu, msr_index, data)) { + pr_debug_ratelimited("%s cannot read MSR (0x%x)\n", __func__, + msr_index); + return false; + } + return true; +} + +static bool read_and_check_msr_entry(struct kvm_vcpu *vcpu, u64 gpa, int i, + struct vmx_msr_entry *e) +{ + if (kvm_vcpu_read_guest(vcpu, + gpa + i * sizeof(*e), + e, 2 * sizeof(u32))) { + pr_debug_ratelimited( + "%s cannot read MSR entry (%u, 0x%08llx)\n", + __func__, i, gpa + i * sizeof(*e)); + return false; + } + if (nested_vmx_store_msr_check(vcpu, e)) { + pr_debug_ratelimited( + "%s check failed (%u, 0x%x, 0x%x)\n", + __func__, i, e->index, e->reserved); + return false; + } + return true; +} + static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) { u64 data; @@ -940,26 +1002,12 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) if (unlikely(i >= max_msr_list_size)) return -EINVAL; - if (kvm_vcpu_read_guest(vcpu, - gpa + i * sizeof(e), - &e, 2 * sizeof(u32))) { - pr_debug_ratelimited( - "%s cannot read MSR entry (%u, 0x%08llx)\n", - __func__, i, gpa + i * sizeof(e)); + if (!read_and_check_msr_entry(vcpu, gpa, i, &e)) return -EINVAL; - } - if (nested_vmx_store_msr_check(vcpu, &e)) { - pr_debug_ratelimited( - "%s check failed (%u, 0x%x, 0x%x)\n", - __func__, i, e.index, e.reserved); - return -EINVAL; - } - if (kvm_get_msr(vcpu, e.index, &data)) { - pr_debug_ratelimited( - "%s cannot read MSR (%u, 0x%x)\n", - __func__, i, e.index); + + if (!nested_vmx_get_vmexit_msr_value(vcpu, e.index, &data)) return -EINVAL; - } + if (kvm_vcpu_write_guest(vcpu, gpa + i * sizeof(e) + offsetof(struct vmx_msr_entry, value), @@ -973,6 +1021,60 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) return 0; } +static bool nested_msr_store_list_has_msr(struct kvm_vcpu *vcpu, u32 msr_index) +{ + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + u32 count = vmcs12->vm_exit_msr_store_count; + u64 gpa = vmcs12->vm_exit_msr_store_addr; + struct vmx_msr_entry e; + u32 i; + + for (i = 0; i < count; i++) { + if (!read_and_check_msr_entry(vcpu, gpa, i, &e)) + return false; + + if (e.index == msr_index) + return true; + } + return false; +} + +static void prepare_vmx_msr_autostore_list(struct kvm_vcpu *vcpu, + u32 msr_index) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct vmx_msrs *autostore = &vmx->msr_autostore.guest; + bool in_vmcs12_store_list; + int msr_autostore_index; + bool in_autostore_list; + int last; + + msr_autostore_index = vmx_find_msr_index(autostore, msr_index); + in_autostore_list = msr_autostore_index >= 0; + in_vmcs12_store_list = nested_msr_store_list_has_msr(vcpu, msr_index); + + if (in_vmcs12_store_list && !in_autostore_list) { + if (autostore->nr == NR_LOADSTORE_MSRS) { + /* + * Emulated VMEntry does not fail here. Instead a less + * accurate value will be returned by + * nested_vmx_get_vmexit_msr_value() using kvm_get_msr() + * instead of reading the value from the vmcs02 VMExit + * MSR-store area. + */ + pr_warn_ratelimited( + "Not enough msr entries in msr_autostore. Can't add msr %x\n", + msr_index); + return; + } + last = autostore->nr++; + autostore->val[last].index = msr_index; + } else if (!in_vmcs12_store_list && in_autostore_list) { + last = --autostore->nr; + autostore->val[msr_autostore_index] = autostore->val[last]; + } +} + static bool nested_cr3_valid(struct kvm_vcpu *vcpu, unsigned long val) { unsigned long invalid_mask; @@ -1012,7 +1114,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne kvm_mmu_new_cr3(vcpu, cr3, false); vcpu->arch.cr3 = cr3; - __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); + kvm_register_mark_available(vcpu, VCPU_EXREG_CR3); kvm_init_mmu(vcpu, false); @@ -1024,7 +1126,9 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne * populated by L2 differently than TLB entries populated * by L1. * - * If L1 uses EPT, then TLB entries are tagged with different EPTP. + * If L0 uses EPT, L1 and L2 run with different EPTP because + * guest_mode is part of kvm_mmu_page_role. Thus, TLB entries + * are tagged with different EPTP. * * If L1 uses VPID and we allocated a vpid02, TLB entries are tagged * with different VPID (L1 entries are tagged with vmx->vpid @@ -1034,7 +1138,7 @@ static bool nested_has_guest_tlb_tag(struct kvm_vcpu *vcpu) { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - return nested_cpu_has_ept(vmcs12) || + return enable_ept || (nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02); } @@ -2018,7 +2122,7 @@ static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx) * addresses are constant (for vmcs02), the counts can change based * on L2's behavior, e.g. switching to/from long mode. */ - vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); + vmcs_write64(VM_EXIT_MSR_STORE_ADDR, __pa(vmx->msr_autostore.guest.val)); vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val)); vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val)); @@ -2073,6 +2177,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) exec_control &= ~CPU_BASED_TPR_SHADOW; exec_control |= vmcs12->cpu_based_vm_exec_control; + vmx->nested.l1_tpr_threshold = -1; if (exec_control & CPU_BASED_TPR_SHADOW) vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold); #ifdef CONFIG_X86_64 @@ -2285,6 +2390,13 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) vmcs_write64(EOI_EXIT_BITMAP3, vmcs12->eoi_exit_bitmap3); } + /* + * Make sure the msr_autostore list is up to date before we set the + * count in the vmcs02. + */ + prepare_vmx_msr_autostore_list(&vmx->vcpu, MSR_IA32_TSC); + + vmcs_write32(VM_EXIT_MSR_STORE_COUNT, vmx->msr_autostore.guest.nr); vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); @@ -2381,9 +2493,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, if (nested_cpu_has_ept(vmcs12)) nested_ept_init_mmu_context(vcpu); - else if (nested_cpu_has2(vmcs12, - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) - vmx_flush_tlb(vcpu, true); /* * This sets GUEST_CR0 to vmcs12->guest_cr0, possibly modifying those @@ -2418,6 +2527,16 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, entry_failure_code)) return -EINVAL; + /* + * Immediately write vmcs02.GUEST_CR3. It will be propagated to vmcs12 + * on nested VM-Exit, which can occur without actually running L2 and + * thus without hitting vmx_set_cr3(), e.g. if L1 is entering L2 with + * vmcs12.GUEST_ACTIVITYSTATE=HLT, in which case KVM will intercept the + * transition to HLT instead of running L2. + */ + if (enable_ept) + vmcs_writel(GUEST_CR3, vmcs12->guest_cr3); + /* Late preparation of GUEST_PDPTRs now that EFER and CRs are set. */ if (load_guest_pdptrs_vmcs12 && nested_cpu_has_ept(vmcs12) && is_pae_paging(vcpu)) { @@ -2430,6 +2549,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, if (!enable_ept) vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested; + if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) && + SET_MSR_OR_WARN(vcpu, MSR_CORE_PERF_GLOBAL_CTRL, + vmcs12->guest_ia32_perf_global_ctrl)) + return -EINVAL; + kvm_rsp_write(vcpu, vmcs12->guest_rsp); kvm_rip_write(vcpu, vmcs12->guest_rip); return 0; @@ -2610,7 +2734,7 @@ static int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu, /* VM-entry exception error code */ if (CC(has_error_code && - vmcs12->vm_entry_exception_error_code & GENMASK(31, 15))) + vmcs12->vm_entry_exception_error_code & GENMASK(31, 16))) return -EINVAL; /* VM-entry interruption-info field: reserved bits */ @@ -2664,6 +2788,11 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, CC(!kvm_pat_valid(vmcs12->host_ia32_pat))) return -EINVAL; + if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) && + CC(!kvm_valid_perf_global_ctrl(vcpu_to_pmu(vcpu), + vmcs12->host_ia32_perf_global_ctrl))) + return -EINVAL; + #ifdef CONFIG_X86_64 ia32e = !!(vcpu->arch.efer & EFER_LMA); #else @@ -2779,6 +2908,11 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, return -EINVAL; } + if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) && + CC(!kvm_valid_perf_global_ctrl(vcpu_to_pmu(vcpu), + vmcs12->guest_ia32_perf_global_ctrl))) + return -EINVAL; + /* * If the load IA32_EFER VM-entry control is 1, the following checks * are performed on the field for the IA32_EFER MSR: @@ -2917,7 +3051,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12); -static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) +static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -2933,23 +3067,22 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) * to it so we can release it later. */ if (vmx->nested.apic_access_page) { /* shouldn't happen */ - kvm_release_page_dirty(vmx->nested.apic_access_page); + kvm_release_page_clean(vmx->nested.apic_access_page); vmx->nested.apic_access_page = NULL; } page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->apic_access_addr); - /* - * If translation failed, no matter: This feature asks - * to exit when accessing the given address, and if it - * can never be accessed, this feature won't do - * anything anyway. - */ if (!is_error_page(page)) { vmx->nested.apic_access_page = page; hpa = page_to_phys(vmx->nested.apic_access_page); vmcs_write64(APIC_ACCESS_ADDR, hpa); } else { - secondary_exec_controls_clearbit(vmx, - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); + pr_debug_ratelimited("%s: no backing 'struct page' for APIC-access address in vmcs12\n", + __func__); + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = + KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; + return false; } } @@ -2994,6 +3127,7 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) exec_controls_setbit(vmx, CPU_BASED_USE_MSR_BITMAPS); else exec_controls_clearbit(vmx, CPU_BASED_USE_MSR_BITMAPS); + return true; } /* @@ -3032,13 +3166,15 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, /* * If from_vmentry is false, this is being called from state restore (either RSM * or KVM_SET_NESTED_STATE). Otherwise it's called from vmlaunch/vmresume. -+ * -+ * Returns: -+ * 0 - success, i.e. proceed with actual VMEnter -+ * 1 - consistency check VMExit -+ * -1 - consistency check VMFail + * + * Returns: + * NVMX_ENTRY_SUCCESS: Entered VMX non-root mode + * NVMX_ENTRY_VMFAIL: Consistency check VMFail + * NVMX_ENTRY_VMEXIT: Consistency check VMExit + * NVMX_ENTRY_KVM_INTERNAL_ERROR: KVM internal error */ -int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) +enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, + bool from_vmentry) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs12 *vmcs12 = get_vmcs12(vcpu); @@ -3081,11 +3217,12 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) prepare_vmcs02_early(vmx, vmcs12); if (from_vmentry) { - nested_get_vmcs12_pages(vcpu); + if (unlikely(!nested_get_vmcs12_pages(vcpu))) + return NVMX_VMENTRY_KVM_INTERNAL_ERROR; if (nested_vmx_check_vmentry_hw(vcpu)) { vmx_switch_vmcs(vcpu, &vmx->vmcs01); - return -1; + return NVMX_VMENTRY_VMFAIL; } if (nested_vmx_check_guest_state(vcpu, vmcs12, &exit_qual)) @@ -3149,7 +3286,7 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) * returned as far as L1 is concerned. It will only return (and set * the success flag) when L2 exits (see nested_vmx_vmexit()). */ - return 0; + return NVMX_VMENTRY_SUCCESS; /* * A failed consistency check that leads to a VMExit during L1's @@ -3165,14 +3302,14 @@ vmentry_fail_vmexit: vmx_switch_vmcs(vcpu, &vmx->vmcs01); if (!from_vmentry) - return 1; + return NVMX_VMENTRY_VMEXIT; load_vmcs12_host_state(vcpu, vmcs12); vmcs12->vm_exit_reason = exit_reason | VMX_EXIT_REASONS_FAILED_VMENTRY; vmcs12->exit_qualification = exit_qual; if (enable_shadow_vmcs || vmx->nested.hv_evmcs) vmx->nested.need_vmcs12_to_shadow_sync = true; - return 1; + return NVMX_VMENTRY_VMEXIT; } /* @@ -3182,9 +3319,9 @@ vmentry_fail_vmexit: static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) { struct vmcs12 *vmcs12; + enum nvmx_vmentry_status status; struct vcpu_vmx *vmx = to_vmx(vcpu); u32 interrupt_shadow = vmx_get_interrupt_shadow(vcpu); - int ret; if (!nested_vmx_check_permission(vcpu)) return 1; @@ -3244,13 +3381,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) * the nested entry. */ vmx->nested.nested_run_pending = 1; - ret = nested_vmx_enter_non_root_mode(vcpu, true); - vmx->nested.nested_run_pending = !ret; - if (ret > 0) - return 1; - else if (ret) - return nested_vmx_failValid(vcpu, - VMXERR_ENTRY_INVALID_CONTROL_FIELD); + status = nested_vmx_enter_non_root_mode(vcpu, true); + if (unlikely(status != NVMX_VMENTRY_SUCCESS)) + goto vmentry_failed; /* Hide L1D cache contents from the nested guest. */ vmx->vcpu.arch.l1tf_flush_l1d = true; @@ -3281,6 +3414,15 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) return kvm_vcpu_halt(vcpu); } return 1; + +vmentry_failed: + vmx->nested.nested_run_pending = 0; + if (status == NVMX_VMENTRY_KVM_INTERNAL_ERROR) + return 0; + if (status == NVMX_VMENTRY_VMEXIT) + return 1; + WARN_ON_ONCE(status != NVMX_VMENTRY_VMFAIL); + return nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); } /* @@ -3453,6 +3595,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) test_bit(KVM_APIC_INIT, &apic->pending_events)) { if (block_nested_events) return -EBUSY; + clear_bit(KVM_APIC_INIT, &apic->pending_events); nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0); return 0; } @@ -3856,8 +3999,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, vcpu->arch.pat = vmcs12->host_ia32_pat; } if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) - vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL, - vmcs12->host_ia32_perf_global_ctrl); + SET_MSR_OR_WARN(vcpu, MSR_CORE_PERF_GLOBAL_CTRL, + vmcs12->host_ia32_perf_global_ctrl); /* Set L1 segment info according to Intel SDM 27.5.2 Loading Host Segment and Descriptor-Table Registers */ @@ -3976,7 +4119,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) nested_ept_uninit_mmu_context(vcpu); vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); - __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); + kvm_register_mark_available(vcpu, VCPU_EXREG_CR3); /* * Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs @@ -4104,6 +4247,8 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset); + if (vmx->nested.l1_tpr_threshold != -1) + vmcs_write32(TPR_THRESHOLD, vmx->nested.l1_tpr_threshold); if (kvm_has_tsc_control) decache_tsc_multiplier(vmx); @@ -4111,15 +4256,11 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, if (vmx->nested.change_vmcs01_virtual_apic_mode) { vmx->nested.change_vmcs01_virtual_apic_mode = false; vmx_set_virtual_apic_mode(vcpu); - } else if (!nested_cpu_has_ept(vmcs12) && - nested_cpu_has2(vmcs12, - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { - vmx_flush_tlb(vcpu, true); } /* Unpin physical memory we referred to in vmcs02 */ if (vmx->nested.apic_access_page) { - kvm_release_page_dirty(vmx->nested.apic_access_page); + kvm_release_page_clean(vmx->nested.apic_access_page); vmx->nested.apic_access_page = NULL; } kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true); @@ -4319,6 +4460,27 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification, return 0; } +void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx; + + if (!nested_vmx_allowed(vcpu)) + return; + + vmx = to_vmx(vcpu); + if (kvm_x86_ops->pmu_ops->is_valid_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL)) { + vmx->nested.msrs.entry_ctls_high |= + VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; + vmx->nested.msrs.exit_ctls_high |= + VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; + } else { + vmx->nested.msrs.entry_ctls_high &= + ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; + vmx->nested.msrs.exit_ctls_high &= + ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; + } +} + static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer) { gva_t gva; @@ -5758,7 +5920,7 @@ error_guest_mode: return ret; } -void nested_vmx_vcpu_setup(void) +void nested_vmx_set_vmcs_shadowing_bitmap(void) { if (enable_shadow_vmcs) { vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap)); @@ -6039,23 +6201,23 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)) init_vmcs_shadow_fields(); } - exit_handlers[EXIT_REASON_VMCLEAR] = handle_vmclear, - exit_handlers[EXIT_REASON_VMLAUNCH] = handle_vmlaunch, - exit_handlers[EXIT_REASON_VMPTRLD] = handle_vmptrld, - exit_handlers[EXIT_REASON_VMPTRST] = handle_vmptrst, - exit_handlers[EXIT_REASON_VMREAD] = handle_vmread, - exit_handlers[EXIT_REASON_VMRESUME] = handle_vmresume, - exit_handlers[EXIT_REASON_VMWRITE] = handle_vmwrite, - exit_handlers[EXIT_REASON_VMOFF] = handle_vmoff, - exit_handlers[EXIT_REASON_VMON] = handle_vmon, - exit_handlers[EXIT_REASON_INVEPT] = handle_invept, - exit_handlers[EXIT_REASON_INVVPID] = handle_invvpid, - exit_handlers[EXIT_REASON_VMFUNC] = handle_vmfunc, + exit_handlers[EXIT_REASON_VMCLEAR] = handle_vmclear; + exit_handlers[EXIT_REASON_VMLAUNCH] = handle_vmlaunch; + exit_handlers[EXIT_REASON_VMPTRLD] = handle_vmptrld; + exit_handlers[EXIT_REASON_VMPTRST] = handle_vmptrst; + exit_handlers[EXIT_REASON_VMREAD] = handle_vmread; + exit_handlers[EXIT_REASON_VMRESUME] = handle_vmresume; + exit_handlers[EXIT_REASON_VMWRITE] = handle_vmwrite; + exit_handlers[EXIT_REASON_VMOFF] = handle_vmoff; + exit_handlers[EXIT_REASON_VMON] = handle_vmon; + exit_handlers[EXIT_REASON_INVEPT] = handle_invept; + exit_handlers[EXIT_REASON_INVVPID] = handle_invvpid; + exit_handlers[EXIT_REASON_VMFUNC] = handle_vmfunc; kvm_x86_ops->check_nested_events = vmx_check_nested_events; kvm_x86_ops->get_nested_state = vmx_get_nested_state; kvm_x86_ops->set_nested_state = vmx_set_nested_state; - kvm_x86_ops->get_vmcs12_pages = nested_get_vmcs12_pages, + kvm_x86_ops->get_vmcs12_pages = nested_get_vmcs12_pages; kvm_x86_ops->nested_enable_evmcs = nested_enable_evmcs; kvm_x86_ops->nested_get_evmcs_version = nested_get_evmcs_version; diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h index 187d39bf0bf1..fc874d4ead0f 100644 --- a/arch/x86/kvm/vmx/nested.h +++ b/arch/x86/kvm/vmx/nested.h @@ -6,14 +6,25 @@ #include "vmcs12.h" #include "vmx.h" +/* + * Status returned by nested_vmx_enter_non_root_mode(): + */ +enum nvmx_vmentry_status { + NVMX_VMENTRY_SUCCESS, /* Entered VMX non-root mode */ + NVMX_VMENTRY_VMFAIL, /* Consistency check VMFail */ + NVMX_VMENTRY_VMEXIT, /* Consistency check VMExit */ + NVMX_VMENTRY_KVM_INTERNAL_ERROR,/* KVM internal error */ +}; + void vmx_leave_nested(struct kvm_vcpu *vcpu); void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps, bool apicv); void nested_vmx_hardware_unsetup(void); __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)); -void nested_vmx_vcpu_setup(void); +void nested_vmx_set_vmcs_shadowing_bitmap(void); void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu); -int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry); +enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, + bool from_vmentry); bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason); void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, u32 exit_intr_info, unsigned long exit_qualification); @@ -22,6 +33,7 @@ int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata); int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification, u32 vmx_instruction_info, bool wr, int len, gva_t *ret); +void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu); static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) { @@ -245,7 +257,7 @@ static inline bool fixed_bits_valid(u64 val, u64 fixed0, u64 fixed1) return ((val & fixed1) | fixed0) == val; } -static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val) +static inline bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val) { u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0; u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1; @@ -259,7 +271,7 @@ static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val) return fixed_bits_valid(val, fixed0, fixed1); } -static bool nested_host_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val) +static inline bool nested_host_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val) { u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0; u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1; @@ -267,7 +279,7 @@ static bool nested_host_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val) return fixed_bits_valid(val, fixed0, fixed1); } -static bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val) +static inline bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val) { u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr4_fixed0; u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr4_fixed1; diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 4dea0e0e7e39..7023138b1cb0 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -15,6 +15,7 @@ #include "x86.h" #include "cpuid.h" #include "lapic.h" +#include "nested.h" #include "pmu.h" static struct kvm_event_hw_type_mapping intel_arch_events[] = { @@ -46,6 +47,7 @@ static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data) if (old_ctrl == new_ctrl) continue; + __set_bit(INTEL_PMC_IDX_FIXED + i, pmu->pmc_in_use); reprogram_fixed_counter(pmc, new_ctrl, i); } @@ -111,7 +113,7 @@ static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx) } /* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */ -static int intel_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx) +static int intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); bool fixed = idx & (1u << 30); @@ -122,8 +124,8 @@ static int intel_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx) (fixed && idx >= pmu->nr_arch_fixed_counters); } -static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, - unsigned idx, u64 *mask) +static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu, + unsigned int idx, u64 *mask) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); bool fixed = idx & (1u << 30); @@ -162,6 +164,18 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) return ret; } +static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr) +{ + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); + struct kvm_pmc *pmc; + + pmc = get_fixed_pmc(pmu, msr); + pmc = pmc ? pmc : get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0); + pmc = pmc ? pmc : get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0); + + return pmc; +} + static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); @@ -223,7 +237,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_CORE_PERF_GLOBAL_CTRL: if (pmu->global_ctrl == data) return 0; - if (!(data & pmu->global_ctrl_mask)) { + if (kvm_valid_perf_global_ctrl(pmu, data)) { global_ctrl_changed(pmu, data); return 0; } @@ -262,6 +276,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) static void intel_pmu_refresh(struct kvm_vcpu *vcpu) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); + struct x86_pmu_capability x86_pmu; struct kvm_cpuid_entry2 *entry; union cpuid10_eax eax; union cpuid10_edx edx; @@ -283,8 +298,10 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) if (!pmu->version) return; + perf_get_x86_pmu_capability(&x86_pmu); + pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters, - INTEL_PMC_MAX_GENERIC); + x86_pmu.num_counters_gp); pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1; pmu->available_event_types = ~entry->ebx & ((1ull << eax.split.mask_length) - 1); @@ -294,7 +311,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) } else { pmu->nr_arch_fixed_counters = min_t(int, edx.split.num_counters_fixed, - INTEL_PMC_MAX_FIXED); + x86_pmu.num_counters_fixed); pmu->counter_bitmask[KVM_PMC_FIXED] = ((u64)1 << edx.split.bit_width_fixed) - 1; } @@ -314,6 +331,13 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) (boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) && (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM))) pmu->reserved_bits ^= HSW_IN_TX|HSW_IN_TX_CHECKPOINTED; + + bitmap_set(pmu->all_valid_pmc_idx, + 0, pmu->nr_arch_gp_counters); + bitmap_set(pmu->all_valid_pmc_idx, + INTEL_PMC_MAX_GENERIC, pmu->nr_arch_fixed_counters); + + nested_vmx_pmu_entry_exit_ctls_update(vcpu); } static void intel_pmu_init(struct kvm_vcpu *vcpu) @@ -325,12 +349,14 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu) pmu->gp_counters[i].type = KVM_PMC_GP; pmu->gp_counters[i].vcpu = vcpu; pmu->gp_counters[i].idx = i; + pmu->gp_counters[i].current_config = 0; } for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) { pmu->fixed_counters[i].type = KVM_PMC_FIXED; pmu->fixed_counters[i].vcpu = vcpu; pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED; + pmu->fixed_counters[i].current_config = 0; } } @@ -363,8 +389,9 @@ struct kvm_pmu_ops intel_pmu_ops = { .find_fixed_event = intel_find_fixed_event, .pmc_is_enabled = intel_pmc_is_enabled, .pmc_idx_to_pmc = intel_pmc_idx_to_pmc, + .rdpmc_ecx_to_pmc = intel_rdpmc_ecx_to_pmc, .msr_idx_to_pmc = intel_msr_idx_to_pmc, - .is_valid_msr_idx = intel_is_valid_msr_idx, + .is_valid_rdpmc_ecx = intel_is_valid_rdpmc_ecx, .is_valid_msr = intel_is_valid_msr, .get_msr = intel_pmu_get_msr, .set_msr = intel_pmu_set_msr, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index d4575ffb3cec..d175429c91b0 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -106,8 +106,6 @@ module_param(enable_apicv, bool, S_IRUGO); static bool __read_mostly nested = 1; module_param(nested, bool, S_IRUGO); -static u64 __read_mostly host_xss; - bool __read_mostly enable_pml = 1; module_param_named(pml, enable_pml, bool, S_IRUGO); @@ -209,6 +207,11 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf) struct page *page; unsigned int i; + if (!boot_cpu_has_bug(X86_BUG_L1TF)) { + l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED; + return 0; + } + if (!enable_ept) { l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED; return 0; @@ -445,6 +448,7 @@ const u32 vmx_msr_index[] = { MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, #endif MSR_EFER, MSR_TSC_AUX, MSR_STAR, + MSR_IA32_TSX_CTRL, }; #if IS_ENABLED(CONFIG_HYPERV) @@ -633,6 +637,23 @@ struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) return NULL; } +static int vmx_set_guest_msr(struct vcpu_vmx *vmx, struct shared_msr_entry *msr, u64 data) +{ + int ret = 0; + + u64 old_msr_data = msr->data; + msr->data = data; + if (msr - vmx->guest_msrs < vmx->save_nmsrs) { + preempt_disable(); + ret = kvm_set_shared_msr(msr->index, msr->data, + msr->mask); + preempt_enable(); + if (ret) + msr->data = old_msr_data; + } + return ret; +} + void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs) { vmcs_clear(loaded_vmcs->vmcs); @@ -721,8 +742,8 @@ static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg, bool ret; u32 mask = 1 << (seg * SEG_FIELD_NR + field); - if (!(vmx->vcpu.arch.regs_avail & (1 << VCPU_EXREG_SEGMENTS))) { - vmx->vcpu.arch.regs_avail |= (1 << VCPU_EXREG_SEGMENTS); + if (!kvm_register_is_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS)) { + kvm_register_mark_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS); vmx->segment_cache.bitmask = 0; } ret = vmx->segment_cache.bitmask & mask; @@ -830,7 +851,7 @@ static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, vm_exit_controls_clearbit(vmx, exit); } -static int find_msr(struct vmx_msrs *m, unsigned int msr) +int vmx_find_msr_index(struct vmx_msrs *m, u32 msr) { unsigned int i; @@ -864,7 +885,7 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) } break; } - i = find_msr(&m->guest, msr); + i = vmx_find_msr_index(&m->guest, msr); if (i < 0) goto skip_guest; --m->guest.nr; @@ -872,7 +893,7 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr); skip_guest: - i = find_msr(&m->host, msr); + i = vmx_find_msr_index(&m->host, msr); if (i < 0) return; @@ -931,12 +952,12 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, wrmsrl(MSR_IA32_PEBS_ENABLE, 0); } - i = find_msr(&m->guest, msr); + i = vmx_find_msr_index(&m->guest, msr); if (!entry_only) - j = find_msr(&m->host, msr); + j = vmx_find_msr_index(&m->host, msr); - if ((i < 0 && m->guest.nr == NR_AUTOLOAD_MSRS) || - (j < 0 && m->host.nr == NR_AUTOLOAD_MSRS)) { + if ((i < 0 && m->guest.nr == NR_LOADSTORE_MSRS) || + (j < 0 && m->host.nr == NR_LOADSTORE_MSRS)) { printk_once(KERN_WARNING "Not enough msr switch entries. " "Can't add msr %x\n", msr); return; @@ -964,17 +985,9 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) u64 guest_efer = vmx->vcpu.arch.efer; u64 ignore_bits = 0; - if (!enable_ept) { - /* - * NX is needed to handle CR0.WP=1, CR4.SMEP=1. Testing - * host CPUID is more efficient than testing guest CPUID - * or CR4. Host SMEP is anyway a requirement for guest SMEP. - */ - if (boot_cpu_has(X86_FEATURE_SMEP)) - guest_efer |= EFER_NX; - else if (!(guest_efer & EFER_NX)) - ignore_bits |= EFER_NX; - } + /* Shadow paging assumes NX to be available. */ + if (!enable_ept) + guest_efer |= EFER_NX; /* * LMA and LME handled by hardware; SCE meaningless outside long mode. @@ -1271,6 +1284,18 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu) return; + /* + * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change + * PI.NDST: pi_post_block is the one expected to change PID.NDST and the + * wakeup handler expects the vCPU to be on the blocked_vcpu_list that + * matches PI.NDST. Otherwise, a vcpu may not be able to be woken up + * correctly. + */ + if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || vcpu->cpu == cpu) { + pi_clear_sn(pi_desc); + goto after_clear_sn; + } + /* The full case. */ do { old.control = new.control = pi_desc->control; @@ -1286,6 +1311,8 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) } while (cmpxchg64(&pi_desc->control, old.control, new.control) != old.control); +after_clear_sn: + /* * Clear SN before reading the bitmap. The VT-d firmware * writes the bitmap and reads SN atomically (5.2.3 in the @@ -1294,7 +1321,7 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) */ smp_mb__after_atomic(); - if (!bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS)) + if (!pi_is_pir_empty(pi_desc)) pi_set_on(pi_desc); } @@ -1407,35 +1434,44 @@ static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu); unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) { + struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long rflags, save_rflags; - if (!test_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail)) { - __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); + if (!kvm_register_is_available(vcpu, VCPU_EXREG_RFLAGS)) { + kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS); rflags = vmcs_readl(GUEST_RFLAGS); - if (to_vmx(vcpu)->rmode.vm86_active) { + if (vmx->rmode.vm86_active) { rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; - save_rflags = to_vmx(vcpu)->rmode.save_rflags; + save_rflags = vmx->rmode.save_rflags; rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; } - to_vmx(vcpu)->rflags = rflags; + vmx->rflags = rflags; } - return to_vmx(vcpu)->rflags; + return vmx->rflags; } void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) { - unsigned long old_rflags = vmx_get_rflags(vcpu); + struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned long old_rflags; - __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); - to_vmx(vcpu)->rflags = rflags; - if (to_vmx(vcpu)->rmode.vm86_active) { - to_vmx(vcpu)->rmode.save_rflags = rflags; + if (enable_unrestricted_guest) { + kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS); + vmx->rflags = rflags; + vmcs_writel(GUEST_RFLAGS, rflags); + return; + } + + old_rflags = vmx_get_rflags(vcpu); + vmx->rflags = rflags; + if (vmx->rmode.vm86_active) { + vmx->rmode.save_rflags = rflags; rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; } vmcs_writel(GUEST_RFLAGS, rflags); - if ((old_rflags ^ to_vmx(vcpu)->rflags) & X86_EFLAGS_VM) - to_vmx(vcpu)->emulation_required = emulation_required(vcpu); + if ((old_rflags ^ vmx->rflags) & X86_EFLAGS_VM) + vmx->emulation_required = emulation_required(vcpu); } u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu) @@ -1672,6 +1708,9 @@ static void setup_msrs(struct vcpu_vmx *vmx) index = __find_msr_index(vmx, MSR_TSC_AUX); if (index >= 0 && guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP)) move_msr_up(vmx, index, save_nmsrs++); + index = __find_msr_index(vmx, MSR_IA32_TSX_CTRL); + if (index >= 0) + move_msr_up(vmx, index, save_nmsrs++); vmx->save_nmsrs = save_nmsrs; vmx->guest_msrs_ready = false; @@ -1771,6 +1810,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) #endif case MSR_EFER: return kvm_get_msr_common(vcpu, msr_info); + case MSR_IA32_TSX_CTRL: + if (!msr_info->host_initiated && + !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR)) + return 1; + goto find_shared_msr; case MSR_IA32_UMWAIT_CONTROL: if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx)) return 1; @@ -1815,14 +1859,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index, &msr_info->data); - case MSR_IA32_XSS: - if (!vmx_xsaves_supported() || - (!msr_info->host_initiated && - !(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && - guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)))) - return 1; - msr_info->data = vcpu->arch.ia32_xss; - break; case MSR_IA32_RTIT_CTL: if (pt_mode != PT_MODE_HOST_GUEST) return 1; @@ -1873,8 +1909,9 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) return 1; - /* Else, falls through */ + goto find_shared_msr; default: + find_shared_msr: msr = find_msr_entry(vmx, msr_info->index); if (msr) { msr_info->data = msr->data; @@ -1990,6 +2027,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) MSR_IA32_SPEC_CTRL, MSR_TYPE_RW); break; + case MSR_IA32_TSX_CTRL: + if (!msr_info->host_initiated && + !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR)) + return 1; + if (data & ~(TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR)) + return 1; + goto find_shared_msr; case MSR_IA32_PRED_CMD: if (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) @@ -2058,25 +2102,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!nested_vmx_allowed(vcpu)) return 1; return vmx_set_vmx_msr(vcpu, msr_index, data); - case MSR_IA32_XSS: - if (!vmx_xsaves_supported() || - (!msr_info->host_initiated && - !(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && - guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)))) - return 1; - /* - * The only supported bit as of Skylake is bit 8, but - * it is not supported on KVM. - */ - if (data != 0) - return 1; - vcpu->arch.ia32_xss = data; - if (vcpu->arch.ia32_xss != host_xss) - add_atomic_switch_msr(vmx, MSR_IA32_XSS, - vcpu->arch.ia32_xss, host_xss, false); - else - clear_atomic_switch_msr(vmx, MSR_IA32_XSS); - break; case MSR_IA32_RTIT_CTL: if ((pt_mode != PT_MODE_HOST_GUEST) || vmx_rtit_ctl_check(vcpu, data) || @@ -2141,23 +2166,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) /* Check reserved bit, higher 32 bits should be zero */ if ((data >> 32) != 0) return 1; - /* Else, falls through */ + goto find_shared_msr; + default: + find_shared_msr: msr = find_msr_entry(vmx, msr_index); - if (msr) { - u64 old_msr_data = msr->data; - msr->data = data; - if (msr - vmx->guest_msrs < vmx->save_nmsrs) { - preempt_disable(); - ret = kvm_set_shared_msr(msr->index, msr->data, - msr->mask); - preempt_enable(); - if (ret) - msr->data = old_msr_data; - } - break; - } - ret = kvm_set_msr_common(vcpu, msr_info); + if (msr) + ret = vmx_set_guest_msr(vmx, msr, data); + else + ret = kvm_set_msr_common(vcpu, msr_info); } return ret; @@ -2165,7 +2182,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) { - __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); + kvm_register_mark_available(vcpu, reg); + switch (reg) { case VCPU_REGS_RSP: vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP); @@ -2177,7 +2195,12 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) if (enable_ept) ept_save_pdptrs(vcpu); break; + case VCPU_EXREG_CR3: + if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu))) + vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); + break; default: + WARN_ON_ONCE(1); break; } } @@ -2848,13 +2871,6 @@ static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits; } -static void vmx_decache_cr3(struct kvm_vcpu *vcpu) -{ - if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu))) - vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); - __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); -} - static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) { ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits; @@ -2867,8 +2883,7 @@ static void ept_load_pdptrs(struct kvm_vcpu *vcpu) { struct kvm_mmu *mmu = vcpu->arch.walk_mmu; - if (!test_bit(VCPU_EXREG_PDPTR, - (unsigned long *)&vcpu->arch.regs_dirty)) + if (!kvm_register_is_dirty(vcpu, VCPU_EXREG_PDPTR)) return; if (is_pae_paging(vcpu)) { @@ -2890,10 +2905,7 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu) mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3); } - __set_bit(VCPU_EXREG_PDPTR, - (unsigned long *)&vcpu->arch.regs_avail); - __set_bit(VCPU_EXREG_PDPTR, - (unsigned long *)&vcpu->arch.regs_dirty); + kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR); } static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, @@ -2902,8 +2914,8 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, { struct vcpu_vmx *vmx = to_vmx(vcpu); - if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail)) - vmx_decache_cr3(vcpu); + if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3)) + vmx_cache_reg(vcpu, VCPU_EXREG_CR3); if (!(cr0 & X86_CR0_PG)) { /* From paging/starting to nonpaging */ exec_controls_setbit(vmx, CPU_BASED_CR3_LOAD_EXITING | @@ -2984,6 +2996,7 @@ u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa) void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { struct kvm *kvm = vcpu->kvm; + bool update_guest_cr3 = true; unsigned long guest_cr3; u64 eptp; @@ -3000,15 +3013,20 @@ void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock); } - if (enable_unrestricted_guest || is_paging(vcpu) || - is_guest_mode(vcpu)) - guest_cr3 = kvm_read_cr3(vcpu); - else + /* Loading vmcs02.GUEST_CR3 is handled by nested VM-Enter. */ + if (is_guest_mode(vcpu)) + update_guest_cr3 = false; + else if (!enable_unrestricted_guest && !is_paging(vcpu)) guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr; + else if (test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail)) + guest_cr3 = vcpu->arch.cr3; + else /* vmcs01.GUEST_CR3 is already up-to-date. */ + update_guest_cr3 = false; ept_load_pdptrs(vcpu); } - vmcs_writel(GUEST_CR3, guest_cr3); + if (update_guest_cr3) + vmcs_writel(GUEST_CR3, guest_cr3); } int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) @@ -3742,7 +3760,7 @@ void pt_update_intercept_for_msr(struct vcpu_vmx *vmx) } } -static bool vmx_get_enable_apicv(struct kvm_vcpu *vcpu) +static bool vmx_get_enable_apicv(struct kvm *kvm) { return enable_apicv; } @@ -4035,6 +4053,8 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && guest_cpuid_has(vcpu, X86_FEATURE_XSAVES); + vcpu->arch.xsaves_enabled = xsaves_enabled; + if (!xsaves_enabled) exec_control &= ~SECONDARY_EXEC_XSAVES; @@ -4147,14 +4167,13 @@ static void ept_set_mmio_spte_mask(void) #define VMX_XSS_EXIT_BITMAP 0 /* - * Sets up the vmcs for emulated real mode. + * Noting that the initialization of Guest-state Area of VMCS is in + * vmx_vcpu_reset(). */ -static void vmx_vcpu_setup(struct vcpu_vmx *vmx) +static void init_vmcs(struct vcpu_vmx *vmx) { - int i; - if (nested) - nested_vmx_vcpu_setup(); + nested_vmx_set_vmcs_shadowing_bitmap(); if (cpu_has_vmx_msr_bitmap()) vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap)); @@ -4163,7 +4182,6 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) /* Control */ pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx)); - vmx->hv_deadline_tsc = -1; exec_controls_set(vmx, vmx_exec_control(vmx)); @@ -4212,21 +4230,6 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); - for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) { - u32 index = vmx_msr_index[i]; - u32 data_low, data_high; - int j = vmx->nmsrs; - - if (rdmsr_safe(index, &data_low, &data_high) < 0) - continue; - if (wrmsr_safe(index, data_low, data_high) < 0) - continue; - vmx->guest_msrs[j].index = i; - vmx->guest_msrs[j].data = 0; - vmx->guest_msrs[j].mask = -1ull; - ++vmx->nmsrs; - } - vm_exit_controls_set(vmx, vmx_vmexit_ctrl()); /* 22.2.1, 20.8.1 */ @@ -4237,6 +4240,9 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) set_cr4_guest_host_mask(vmx); + if (vmx->vpid != 0) + vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); + if (vmx_xsaves_supported()) vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP); @@ -4339,9 +4345,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); - if (vmx->vpid != 0) - vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); - cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; vmx->vcpu.arch.cr0 = cr0; vmx_set_cr0(vcpu, cr0); /* enter rmode */ @@ -4696,7 +4699,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) return 0; } -static int handle_external_interrupt(struct kvm_vcpu *vcpu) +static __always_inline int handle_external_interrupt(struct kvm_vcpu *vcpu) { ++vcpu->stat.irq_exits; return 1; @@ -4968,21 +4971,6 @@ static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) vmcs_writel(GUEST_DR7, val); } -static int handle_cpuid(struct kvm_vcpu *vcpu) -{ - return kvm_emulate_cpuid(vcpu); -} - -static int handle_rdmsr(struct kvm_vcpu *vcpu) -{ - return kvm_emulate_rdmsr(vcpu); -} - -static int handle_wrmsr(struct kvm_vcpu *vcpu) -{ - return kvm_emulate_wrmsr(vcpu); -} - static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu) { kvm_apic_update_ppr(vcpu); @@ -4999,11 +4987,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu) return 1; } -static int handle_halt(struct kvm_vcpu *vcpu) -{ - return kvm_emulate_halt(vcpu); -} - static int handle_vmcall(struct kvm_vcpu *vcpu) { return kvm_emulate_hypercall(vcpu); @@ -5538,14 +5521,6 @@ static int handle_encls(struct kvm_vcpu *vcpu) return 1; } -static int handle_unexpected_vmexit(struct kvm_vcpu *vcpu) -{ - kvm_skip_emulated_instruction(vcpu); - WARN_ONCE(1, "Unexpected VM-Exit Reason = 0x%x", - vmcs_read32(VM_EXIT_REASON)); - return 1; -} - /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -5559,11 +5534,11 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_IO_INSTRUCTION] = handle_io, [EXIT_REASON_CR_ACCESS] = handle_cr, [EXIT_REASON_DR_ACCESS] = handle_dr, - [EXIT_REASON_CPUID] = handle_cpuid, - [EXIT_REASON_MSR_READ] = handle_rdmsr, - [EXIT_REASON_MSR_WRITE] = handle_wrmsr, + [EXIT_REASON_CPUID] = kvm_emulate_cpuid, + [EXIT_REASON_MSR_READ] = kvm_emulate_rdmsr, + [EXIT_REASON_MSR_WRITE] = kvm_emulate_wrmsr, [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window, - [EXIT_REASON_HLT] = handle_halt, + [EXIT_REASON_HLT] = kvm_emulate_halt, [EXIT_REASON_INVD] = handle_invd, [EXIT_REASON_INVLPG] = handle_invlpg, [EXIT_REASON_RDPMC] = handle_rdpmc, @@ -5597,15 +5572,11 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_INVVPID] = handle_vmx_instruction, [EXIT_REASON_RDRAND] = handle_invalid_op, [EXIT_REASON_RDSEED] = handle_invalid_op, - [EXIT_REASON_XSAVES] = handle_unexpected_vmexit, - [EXIT_REASON_XRSTORS] = handle_unexpected_vmexit, [EXIT_REASON_PML_FULL] = handle_pml_full, [EXIT_REASON_INVPCID] = handle_invpcid, [EXIT_REASON_VMFUNC] = handle_vmx_instruction, [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer, [EXIT_REASON_ENCLS] = handle_encls, - [EXIT_REASON_UMWAIT] = handle_unexpected_vmexit, - [EXIT_REASON_TPAUSE] = handle_unexpected_vmexit, }; static const int kvm_vmx_max_exit_handlers = @@ -5940,9 +5911,23 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) } if (exit_reason < kvm_vmx_max_exit_handlers - && kvm_vmx_exit_handlers[exit_reason]) + && kvm_vmx_exit_handlers[exit_reason]) { +#ifdef CONFIG_RETPOLINE + if (exit_reason == EXIT_REASON_MSR_WRITE) + return kvm_emulate_wrmsr(vcpu); + else if (exit_reason == EXIT_REASON_PREEMPTION_TIMER) + return handle_preemption_timer(vcpu); + else if (exit_reason == EXIT_REASON_PENDING_INTERRUPT) + return handle_interrupt_window(vcpu); + else if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) + return handle_external_interrupt(vcpu); + else if (exit_reason == EXIT_REASON_HLT) + return kvm_emulate_halt(vcpu); + else if (exit_reason == EXIT_REASON_EPT_MISCONFIG) + return handle_ept_misconfig(vcpu); +#endif return kvm_vmx_exit_handlers[exit_reason](vcpu); - else { + } else { vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n", exit_reason); dump_vmcs(); @@ -6028,17 +6013,17 @@ static void vmx_l1d_flush(struct kvm_vcpu *vcpu) static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + int tpr_threshold; if (is_guest_mode(vcpu) && nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) return; - if (irr == -1 || tpr < irr) { - vmcs_write32(TPR_THRESHOLD, 0); - return; - } - - vmcs_write32(TPR_THRESHOLD, irr); + tpr_threshold = (irr == -1 || tpr < irr) ? 0 : irr; + if (is_guest_mode(vcpu)) + to_vmx(vcpu)->nested.l1_tpr_threshold = tpr_threshold; + else + vmcs_write32(TPR_THRESHOLD, tpr_threshold); } void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) @@ -6152,7 +6137,7 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) if (pi_test_on(&vmx->pi_desc)) { pi_clear_on(&vmx->pi_desc); /* - * IOMMU can write to PIR.ON, so the barrier matters even on UP. + * IOMMU can write to PID.ON, so the barrier matters even on UP. * But on x86 this is just a compiler barrier anyway. */ smp_mb__after_atomic(); @@ -6182,7 +6167,10 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) static bool vmx_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu) { - return pi_test_on(vcpu_to_pi_desc(vcpu)); + struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); + + return pi_test_on(pi_desc) || + (pi_test_sn(pi_desc) && !pi_is_pir_empty(pi_desc)); } static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) @@ -6512,9 +6500,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) if (vmx->nested.need_vmcs12_to_shadow_sync) nested_sync_vmcs12_to_shadow(vcpu); - if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) + if (kvm_register_is_dirty(vcpu, VCPU_REGS_RSP)) vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); - if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) + if (kvm_register_is_dirty(vcpu, VCPU_REGS_RIP)) vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); cr3 = __get_current_cr3_fast(); @@ -6537,7 +6525,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) vmx_set_interrupt_shadow(vcpu, 0); - kvm_load_guest_xcr0(vcpu); + kvm_load_guest_xsave_state(vcpu); if (static_cpu_has(X86_FEATURE_PKU) && kvm_read_cr4_bits(vcpu, X86_CR4_PKE) && @@ -6644,7 +6632,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) __write_pkru(vmx->host_pkru); } - kvm_put_guest_xcr0(vcpu); + kvm_load_host_xsave_state(vcpu); vmx->nested.nested_run_pending = 0; vmx->idt_vectoring_info = 0; @@ -6698,7 +6686,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) int err; struct vcpu_vmx *vmx; unsigned long *msr_bitmap; - int cpu; + int i, cpu; BUILD_BUG_ON_MSG(offsetof(struct vcpu_vmx, vcpu) != 0, "struct kvm_vcpu must be at offset 0 for arch usercopy region"); @@ -6750,6 +6738,34 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (!vmx->guest_msrs) goto free_pml; + for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) { + u32 index = vmx_msr_index[i]; + u32 data_low, data_high; + int j = vmx->nmsrs; + + if (rdmsr_safe(index, &data_low, &data_high) < 0) + continue; + if (wrmsr_safe(index, data_low, data_high) < 0) + continue; + + vmx->guest_msrs[j].index = i; + vmx->guest_msrs[j].data = 0; + switch (index) { + case MSR_IA32_TSX_CTRL: + /* + * No need to pass TSX_CTRL_CPUID_CLEAR through, so + * let's avoid changing CPUID bits under the host + * kernel's feet. + */ + vmx->guest_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR; + break; + default: + vmx->guest_msrs[j].mask = -1ull; + break; + } + ++vmx->nmsrs; + } + err = alloc_loaded_vmcs(&vmx->vmcs01); if (err < 0) goto free_msrs; @@ -6774,7 +6790,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) cpu = get_cpu(); vmx_vcpu_load(&vmx->vcpu, cpu); vmx->vcpu.cpu = cpu; - vmx_vcpu_setup(vmx); + init_vmcs(vmx); vmx_vcpu_put(&vmx->vcpu); put_cpu(); if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) { @@ -6994,6 +7010,7 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu) cr4_fixed1_update(X86_CR4_SMAP, ebx, bit(X86_FEATURE_SMAP)); cr4_fixed1_update(X86_CR4_PKE, ecx, bit(X86_FEATURE_PKU)); cr4_fixed1_update(X86_CR4_UMIP, ecx, bit(X86_FEATURE_UMIP)); + cr4_fixed1_update(X86_CR4_LA57, ecx, bit(X86_FEATURE_LA57)); #undef cr4_fixed1_update } @@ -7088,6 +7105,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + /* xsaves_enabled is recomputed in vmx_compute_secondary_exec_control(). */ + vcpu->arch.xsaves_enabled = false; + if (cpu_has_secondary_exec_ctrls()) { vmx_compute_secondary_exec_control(vmx); vmcs_set_secondary_exec_control(vmx); @@ -7095,10 +7115,12 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) if (nested_vmx_allowed(vcpu)) to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= + FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; else to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &= - ~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; + ~(FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX | + FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX); if (nested_vmx_allowed(vcpu)) { nested_vmx_cr_fixed1_bits_update(vcpu); @@ -7108,6 +7130,15 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) if (boot_cpu_has(X86_FEATURE_INTEL_PT) && guest_cpuid_has(vcpu, X86_FEATURE_INTEL_PT)) update_intel_pt_cfg(vcpu); + + if (boot_cpu_has(X86_FEATURE_RTM)) { + struct shared_msr_entry *msr; + msr = find_msr_entry(vmx, MSR_IA32_TSX_CTRL); + if (msr) { + bool enabled = guest_cpuid_has(vcpu, X86_FEATURE_RTM); + vmx_set_guest_msr(vmx, msr, enabled ? 0 : TSX_CTRL_RTM_DISABLE); + } + } } static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) @@ -7596,9 +7627,6 @@ static __init int hardware_setup(void) WARN_ONCE(host_bndcfgs, "KVM: BNDCFGS in host will be lost"); } - if (boot_cpu_has(X86_FEATURE_XSAVES)) - rdmsrl(MSR_IA32_XSS, host_xss); - if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() || !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global())) enable_vpid = 0; @@ -7779,7 +7807,6 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .get_cpl = vmx_get_cpl, .get_cs_db_l_bits = vmx_get_cs_db_l_bits, .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits, - .decache_cr3 = vmx_decache_cr3, .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, .set_cr0 = vmx_set_cr0, .set_cr3 = vmx_set_cr3, @@ -7995,12 +8022,10 @@ static int __init vmx_init(void) * contain 'auto' which will be turned into the default 'cond' * mitigation mode. */ - if (boot_cpu_has(X86_BUG_L1TF)) { - r = vmx_setup_l1d_flush(vmentry_l1d_flush_param); - if (r) { - vmx_exit(); - return r; - } + r = vmx_setup_l1d_flush(vmentry_l1d_flush_param); + if (r) { + vmx_exit(); + return r; } #ifdef CONFIG_KEXEC_CORE diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index bee16687dc0b..7c1b978b2df4 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -22,11 +22,11 @@ extern u32 get_umwait_control_msr(void); #define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4)) -#define NR_AUTOLOAD_MSRS 8 +#define NR_LOADSTORE_MSRS 8 struct vmx_msrs { unsigned int nr; - struct vmx_msr_entry val[NR_AUTOLOAD_MSRS]; + struct vmx_msr_entry val[NR_LOADSTORE_MSRS]; }; struct shared_msr_entry { @@ -167,6 +167,9 @@ struct nested_vmx { u64 vmcs01_debugctl; u64 vmcs01_guest_bndcfgs; + /* to migrate it to L1 if L2 writes to L1's CR8 directly */ + int l1_tpr_threshold; + u16 vpid02; u16 last_vpid; @@ -230,6 +233,10 @@ struct vcpu_vmx { struct vmx_msrs host; } msr_autoload; + struct msr_autostore { + struct vmx_msrs guest; + } msr_autostore; + struct { int vm86_active; ulong save_rflags; @@ -334,6 +341,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr); void pt_update_intercept_for_msr(struct vcpu_vmx *vmx); void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); +int vmx_find_msr_index(struct vmx_msrs *m, u32 msr); #define POSTED_INTR_ON 0 #define POSTED_INTR_SN 1 @@ -355,6 +363,11 @@ static inline int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc) return test_and_set_bit(vector, (unsigned long *)pi_desc->pir); } +static inline bool pi_is_pir_empty(struct pi_desc *pi_desc) +{ + return bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS); +} + static inline void pi_set_sn(struct pi_desc *pi_desc) { set_bit(POSTED_INTR_SN, @@ -373,6 +386,12 @@ static inline void pi_clear_on(struct pi_desc *pi_desc) (unsigned long *)&pi_desc->control); } +static inline void pi_clear_sn(struct pi_desc *pi_desc) +{ + clear_bit(POSTED_INTR_SN, + (unsigned long *)&pi_desc->control); +} + static inline int pi_test_on(struct pi_desc *pi_desc) { return test_bit(POSTED_INTR_ON, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0ed07d8d2caa..3ed167e039e5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -92,8 +92,8 @@ u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA)); static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); #endif -#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM -#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU +#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__ +#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__ #define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \ KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK) @@ -176,6 +176,8 @@ struct kvm_shared_msrs { static struct kvm_shared_msrs_global __read_mostly shared_msrs_global; static struct kvm_shared_msrs __percpu *shared_msrs; +static u64 __read_mostly host_xss; + struct kvm_stats_debugfs_item debugfs_entries[] = { { "pf_fixed", VCPU_STAT(pf_fixed) }, { "pf_guest", VCPU_STAT(pf_guest) }, @@ -212,7 +214,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, { "mmu_unsync", VM_STAT(mmu_unsync) }, { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, - { "largepages", VM_STAT(lpages) }, + { "largepages", VM_STAT(lpages, .mode = 0444) }, + { "nx_largepages_splitted", VM_STAT(nx_lpage_splits, .mode = 0444) }, { "max_mmu_page_hash_collisions", VM_STAT(max_mmu_page_hash_collisions) }, { NULL } @@ -259,23 +262,6 @@ static void kvm_on_user_return(struct user_return_notifier *urn) } } -static void shared_msr_update(unsigned slot, u32 msr) -{ - u64 value; - unsigned int cpu = smp_processor_id(); - struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); - - /* only read, and nobody should modify it at this time, - * so don't need lock */ - if (slot >= shared_msrs_global.nr) { - printk(KERN_ERR "kvm: invalid MSR slot!"); - return; - } - rdmsrl_safe(msr, &value); - smsr->values[slot].host = value; - smsr->values[slot].curr = value; -} - void kvm_define_shared_msr(unsigned slot, u32 msr) { BUG_ON(slot >= KVM_NR_SHARED_MSRS); @@ -287,10 +273,16 @@ EXPORT_SYMBOL_GPL(kvm_define_shared_msr); static void kvm_shared_msr_cpu_online(void) { - unsigned i; + unsigned int cpu = smp_processor_id(); + struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); + u64 value; + int i; - for (i = 0; i < shared_msrs_global.nr; ++i) - shared_msr_update(i, shared_msrs_global.msrs[i]); + for (i = 0; i < shared_msrs_global.nr; ++i) { + rdmsrl_safe(shared_msrs_global.msrs[i], &value); + smsr->values[i].host = value; + smsr->values[i].curr = value; + } } int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) @@ -299,13 +291,14 @@ int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); int err; - if (((value ^ smsr->values[slot].curr) & mask) == 0) + value = (value & mask) | (smsr->values[slot].host & ~mask); + if (value == smsr->values[slot].curr) return 0; - smsr->values[slot].curr = value; err = wrmsrl_safe(shared_msrs_global.msrs[slot], value); if (err) return 1; + smsr->values[slot].curr = value; if (!smsr->registered) { smsr->urn.on_user_return = kvm_on_user_return; user_return_notifier_register(&smsr->urn); @@ -360,8 +353,7 @@ EXPORT_SYMBOL_GPL(kvm_set_apic_base); asmlinkage __visible void kvm_spurious_fault(void) { /* Fault while not rebooting. We want the trace. */ - if (!kvm_rebooting) - BUG(); + BUG_ON(!kvm_rebooting); } EXPORT_SYMBOL_GPL(kvm_spurious_fault); @@ -709,10 +701,8 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3) ret = 1; memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs)); - __set_bit(VCPU_EXREG_PDPTR, - (unsigned long *)&vcpu->arch.regs_avail); - __set_bit(VCPU_EXREG_PDPTR, - (unsigned long *)&vcpu->arch.regs_dirty); + kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR); + out: return ret; @@ -722,7 +712,6 @@ EXPORT_SYMBOL_GPL(load_pdptrs); bool pdptrs_changed(struct kvm_vcpu *vcpu) { u64 pdpte[ARRAY_SIZE(vcpu->arch.walk_mmu->pdptrs)]; - bool changed = true; int offset; gfn_t gfn; int r; @@ -730,8 +719,7 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu) if (!is_pae_paging(vcpu)) return false; - if (!test_bit(VCPU_EXREG_PDPTR, - (unsigned long *)&vcpu->arch.regs_avail)) + if (!kvm_register_is_available(vcpu, VCPU_EXREG_PDPTR)) return true; gfn = (kvm_read_cr3(vcpu) & 0xffffffe0ul) >> PAGE_SHIFT; @@ -739,11 +727,9 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu) r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte), PFERR_USER_MASK | PFERR_WRITE_MASK); if (r < 0) - goto out; - changed = memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0; -out: + return true; - return changed; + return memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0; } EXPORT_SYMBOL_GPL(pdptrs_changed); @@ -812,27 +798,34 @@ void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) } EXPORT_SYMBOL_GPL(kvm_lmsw); -void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu) +void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu) { - if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) && - !vcpu->guest_xcr0_loaded) { - /* kvm_set_xcr() also depends on this */ + if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) { + if (vcpu->arch.xcr0 != host_xcr0) xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0); - vcpu->guest_xcr0_loaded = 1; + + if (vcpu->arch.xsaves_enabled && + vcpu->arch.ia32_xss != host_xss) + wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss); } } -EXPORT_SYMBOL_GPL(kvm_load_guest_xcr0); +EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state); -void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) +void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu) { - if (vcpu->guest_xcr0_loaded) { + if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) { + if (vcpu->arch.xcr0 != host_xcr0) xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0); - vcpu->guest_xcr0_loaded = 0; + + if (vcpu->arch.xsaves_enabled && + vcpu->arch.ia32_xss != host_xss) + wrmsrl(MSR_IA32_XSS, host_xss); } + } -EXPORT_SYMBOL_GPL(kvm_put_guest_xcr0); +EXPORT_SYMBOL_GPL(kvm_load_host_xsave_state); static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) { @@ -885,34 +878,42 @@ int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) } EXPORT_SYMBOL_GPL(kvm_set_xcr); -int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { - unsigned long old_cr4 = kvm_read_cr4(vcpu); - unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | - X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE; - if (cr4 & CR4_RESERVED_BITS) - return 1; + return -EINVAL; if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (cr4 & X86_CR4_OSXSAVE)) - return 1; + return -EINVAL; if (!guest_cpuid_has(vcpu, X86_FEATURE_SMEP) && (cr4 & X86_CR4_SMEP)) - return 1; + return -EINVAL; if (!guest_cpuid_has(vcpu, X86_FEATURE_SMAP) && (cr4 & X86_CR4_SMAP)) - return 1; + return -EINVAL; if (!guest_cpuid_has(vcpu, X86_FEATURE_FSGSBASE) && (cr4 & X86_CR4_FSGSBASE)) - return 1; + return -EINVAL; if (!guest_cpuid_has(vcpu, X86_FEATURE_PKU) && (cr4 & X86_CR4_PKE)) - return 1; + return -EINVAL; if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57)) - return 1; + return -EINVAL; if (!guest_cpuid_has(vcpu, X86_FEATURE_UMIP) && (cr4 & X86_CR4_UMIP)) + return -EINVAL; + + return 0; +} + +int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +{ + unsigned long old_cr4 = kvm_read_cr4(vcpu); + unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | + X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE; + + if (kvm_valid_cr4(vcpu, cr4)) return 1; if (is_long_mode(vcpu)) { @@ -976,7 +977,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) kvm_mmu_new_cr3(vcpu, cr3, skip_tlb_flush); vcpu->arch.cr3 = cr3; - __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); + kvm_register_mark_available(vcpu, VCPU_EXREG_CR3); return 0; } @@ -1125,13 +1126,15 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); * List of msr numbers which we expose to userspace through KVM_GET_MSRS * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. * - * This list is modified at module load time to reflect the + * The three MSR lists(msrs_to_save, emulated_msrs, msr_based_features) + * extract the supported MSRs from the related const lists. + * msrs_to_save is selected from the msrs_to_save_all to reflect the * capabilities of the host cpu. This capabilities test skips MSRs that are - * kvm-specific. Those are put in emulated_msrs; filtering of emulated_msrs + * kvm-specific. Those are put in emulated_msrs_all; filtering of emulated_msrs * may depend on host virtualization features rather than host cpu features. */ -static u32 msrs_to_save[] = { +static const u32 msrs_to_save_all[] = { MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, MSR_STAR, #ifdef CONFIG_X86_64 @@ -1161,13 +1164,6 @@ static u32 msrs_to_save[] = { MSR_ARCH_PERFMON_PERFCTR0 + 12, MSR_ARCH_PERFMON_PERFCTR0 + 13, MSR_ARCH_PERFMON_PERFCTR0 + 14, MSR_ARCH_PERFMON_PERFCTR0 + 15, MSR_ARCH_PERFMON_PERFCTR0 + 16, MSR_ARCH_PERFMON_PERFCTR0 + 17, - MSR_ARCH_PERFMON_PERFCTR0 + 18, MSR_ARCH_PERFMON_PERFCTR0 + 19, - MSR_ARCH_PERFMON_PERFCTR0 + 20, MSR_ARCH_PERFMON_PERFCTR0 + 21, - MSR_ARCH_PERFMON_PERFCTR0 + 22, MSR_ARCH_PERFMON_PERFCTR0 + 23, - MSR_ARCH_PERFMON_PERFCTR0 + 24, MSR_ARCH_PERFMON_PERFCTR0 + 25, - MSR_ARCH_PERFMON_PERFCTR0 + 26, MSR_ARCH_PERFMON_PERFCTR0 + 27, - MSR_ARCH_PERFMON_PERFCTR0 + 28, MSR_ARCH_PERFMON_PERFCTR0 + 29, - MSR_ARCH_PERFMON_PERFCTR0 + 30, MSR_ARCH_PERFMON_PERFCTR0 + 31, MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1, MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3, MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5, @@ -1177,18 +1173,12 @@ static u32 msrs_to_save[] = { MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13, MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15, MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17, - MSR_ARCH_PERFMON_EVENTSEL0 + 18, MSR_ARCH_PERFMON_EVENTSEL0 + 19, - MSR_ARCH_PERFMON_EVENTSEL0 + 20, MSR_ARCH_PERFMON_EVENTSEL0 + 21, - MSR_ARCH_PERFMON_EVENTSEL0 + 22, MSR_ARCH_PERFMON_EVENTSEL0 + 23, - MSR_ARCH_PERFMON_EVENTSEL0 + 24, MSR_ARCH_PERFMON_EVENTSEL0 + 25, - MSR_ARCH_PERFMON_EVENTSEL0 + 26, MSR_ARCH_PERFMON_EVENTSEL0 + 27, - MSR_ARCH_PERFMON_EVENTSEL0 + 28, MSR_ARCH_PERFMON_EVENTSEL0 + 29, - MSR_ARCH_PERFMON_EVENTSEL0 + 30, MSR_ARCH_PERFMON_EVENTSEL0 + 31, }; +static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_all)]; static unsigned num_msrs_to_save; -static u32 emulated_msrs[] = { +static const u32 emulated_msrs_all[] = { MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, @@ -1227,7 +1217,7 @@ static u32 emulated_msrs[] = { * by arch/x86/kvm/vmx/nested.c based on CPUID or other MSRs. * We always support the "true" VMX control MSRs, even if the host * processor does not, so I am putting these registers here rather - * than in msrs_to_save. + * than in msrs_to_save_all. */ MSR_IA32_VMX_BASIC, MSR_IA32_VMX_TRUE_PINBASED_CTLS, @@ -1246,13 +1236,14 @@ static u32 emulated_msrs[] = { MSR_KVM_POLL_CONTROL, }; +static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)]; static unsigned num_emulated_msrs; /* * List of msr numbers which are used to expose MSR-based features that * can be used by a hypervisor to validate requested CPU features. */ -static u32 msr_based_features[] = { +static const u32 msr_based_features_all[] = { MSR_IA32_VMX_BASIC, MSR_IA32_VMX_TRUE_PINBASED_CTLS, MSR_IA32_VMX_PINBASED_CTLS, @@ -1277,6 +1268,7 @@ static u32 msr_based_features[] = { MSR_IA32_ARCH_CAPABILITIES, }; +static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)]; static unsigned int num_msr_based_features; static u64 kvm_get_arch_capabilities(void) @@ -1287,6 +1279,14 @@ static u64 kvm_get_arch_capabilities(void) rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data); /* + * If nx_huge_pages is enabled, KVM's shadow paging will ensure that + * the nested hypervisor runs with NX huge pages. If it is not, + * L1 is anyway vulnerable to ITLB_MULTIHIT explots from other + * L1 guests, so it need not worry about its own (L2) guests. + */ + data |= ARCH_CAP_PSCHANGE_MC_NO; + + /* * If we're doing cache flushes (either "always" or "cond") * we will do one whenever the guest does a vmlaunch/vmresume. * If an outer hypervisor is doing the cache flush for us @@ -1305,6 +1305,17 @@ static u64 kvm_get_arch_capabilities(void) if (!boot_cpu_has_bug(X86_BUG_MDS)) data |= ARCH_CAP_MDS_NO; + /* + * On TAA affected systems: + * - nothing to do if TSX is disabled on the host. + * - we emulate TSX_CTRL if present on the host. + * This lets the guest use VERW to clear CPU buffers. + */ + if (!boot_cpu_has(X86_FEATURE_RTM)) + data &= ~(ARCH_CAP_TAA_NO | ARCH_CAP_TSX_CTRL_MSR); + else if (!boot_cpu_has_bug(X86_BUG_TAA)) + data |= ARCH_CAP_TAA_NO; + return data; } @@ -1451,8 +1462,8 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data, * Returns 0 on success, non-0 otherwise. * Assumes vcpu_load() was already called. */ -static int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, - bool host_initiated) +int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, + bool host_initiated) { struct msr_data msr; int ret; @@ -1527,20 +1538,25 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) } #ifdef CONFIG_X86_64 +struct pvclock_clock { + int vclock_mode; + u64 cycle_last; + u64 mask; + u32 mult; + u32 shift; +}; + struct pvclock_gtod_data { seqcount_t seq; - struct { /* extract of a clocksource struct */ - int vclock_mode; - u64 cycle_last; - u64 mask; - u32 mult; - u32 shift; - } clock; + struct pvclock_clock clock; /* extract of a clocksource struct */ + struct pvclock_clock raw_clock; /* extract of a clocksource struct */ + u64 boot_ns_raw; u64 boot_ns; u64 nsec_base; u64 wall_time_sec; + u64 monotonic_raw_nsec; }; static struct pvclock_gtod_data pvclock_gtod_data; @@ -1548,9 +1564,10 @@ static struct pvclock_gtod_data pvclock_gtod_data; static void update_pvclock_gtod(struct timekeeper *tk) { struct pvclock_gtod_data *vdata = &pvclock_gtod_data; - u64 boot_ns; + u64 boot_ns, boot_ns_raw; boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot)); + boot_ns_raw = ktime_to_ns(ktime_add(tk->tkr_raw.base, tk->offs_boot)); write_seqcount_begin(&vdata->seq); @@ -1561,11 +1578,20 @@ static void update_pvclock_gtod(struct timekeeper *tk) vdata->clock.mult = tk->tkr_mono.mult; vdata->clock.shift = tk->tkr_mono.shift; + vdata->raw_clock.vclock_mode = tk->tkr_raw.clock->archdata.vclock_mode; + vdata->raw_clock.cycle_last = tk->tkr_raw.cycle_last; + vdata->raw_clock.mask = tk->tkr_raw.mask; + vdata->raw_clock.mult = tk->tkr_raw.mult; + vdata->raw_clock.shift = tk->tkr_raw.shift; + vdata->boot_ns = boot_ns; vdata->nsec_base = tk->tkr_mono.xtime_nsec; vdata->wall_time_sec = tk->xtime_sec; + vdata->boot_ns_raw = boot_ns_raw; + vdata->monotonic_raw_nsec = tk->tkr_raw.xtime_nsec; + write_seqcount_end(&vdata->seq); } #endif @@ -1989,21 +2015,21 @@ static u64 read_tsc(void) return last; } -static inline u64 vgettsc(u64 *tsc_timestamp, int *mode) +static inline u64 vgettsc(struct pvclock_clock *clock, u64 *tsc_timestamp, + int *mode) { long v; - struct pvclock_gtod_data *gtod = &pvclock_gtod_data; u64 tsc_pg_val; - switch (gtod->clock.vclock_mode) { + switch (clock->vclock_mode) { case VCLOCK_HVCLOCK: tsc_pg_val = hv_read_tsc_page_tsc(hv_get_tsc_page(), tsc_timestamp); if (tsc_pg_val != U64_MAX) { /* TSC page valid */ *mode = VCLOCK_HVCLOCK; - v = (tsc_pg_val - gtod->clock.cycle_last) & - gtod->clock.mask; + v = (tsc_pg_val - clock->cycle_last) & + clock->mask; } else { /* TSC page invalid */ *mode = VCLOCK_NONE; @@ -2012,8 +2038,8 @@ static inline u64 vgettsc(u64 *tsc_timestamp, int *mode) case VCLOCK_TSC: *mode = VCLOCK_TSC; *tsc_timestamp = read_tsc(); - v = (*tsc_timestamp - gtod->clock.cycle_last) & - gtod->clock.mask; + v = (*tsc_timestamp - clock->cycle_last) & + clock->mask; break; default: *mode = VCLOCK_NONE; @@ -2022,10 +2048,10 @@ static inline u64 vgettsc(u64 *tsc_timestamp, int *mode) if (*mode == VCLOCK_NONE) *tsc_timestamp = v = 0; - return v * gtod->clock.mult; + return v * clock->mult; } -static int do_monotonic_boot(s64 *t, u64 *tsc_timestamp) +static int do_monotonic_raw(s64 *t, u64 *tsc_timestamp) { struct pvclock_gtod_data *gtod = &pvclock_gtod_data; unsigned long seq; @@ -2034,10 +2060,10 @@ static int do_monotonic_boot(s64 *t, u64 *tsc_timestamp) do { seq = read_seqcount_begin(>od->seq); - ns = gtod->nsec_base; - ns += vgettsc(tsc_timestamp, &mode); + ns = gtod->monotonic_raw_nsec; + ns += vgettsc(>od->raw_clock, tsc_timestamp, &mode); ns >>= gtod->clock.shift; - ns += gtod->boot_ns; + ns += gtod->boot_ns_raw; } while (unlikely(read_seqcount_retry(>od->seq, seq))); *t = ns; @@ -2055,7 +2081,7 @@ static int do_realtime(struct timespec64 *ts, u64 *tsc_timestamp) seq = read_seqcount_begin(>od->seq); ts->tv_sec = gtod->wall_time_sec; ns = gtod->nsec_base; - ns += vgettsc(tsc_timestamp, &mode); + ns += vgettsc(>od->clock, tsc_timestamp, &mode); ns >>= gtod->clock.shift; } while (unlikely(read_seqcount_retry(>od->seq, seq))); @@ -2072,7 +2098,7 @@ static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp) if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode)) return false; - return gtod_is_based_on_tsc(do_monotonic_boot(kernel_ns, + return gtod_is_based_on_tsc(do_monotonic_raw(kernel_ns, tsc_timestamp)); } @@ -2543,6 +2569,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) static void kvmclock_reset(struct kvm_vcpu *vcpu) { vcpu->arch.pv_time_enabled = false; + vcpu->arch.time = 0; } static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) @@ -2694,6 +2721,20 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_TSC: kvm_write_tsc(vcpu, msr_info); break; + case MSR_IA32_XSS: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)) + return 1; + /* + * We do support PT if kvm_x86_ops->pt_supported(), but we do + * not support IA32_XSS[bit 8]. Guests will have to use + * RDMSR/WRMSR rather than XSAVES/XRSTORS to save/restore PT + * MSRs. + */ + if (data != 0) + return 1; + vcpu->arch.ia32_xss = data; + break; case MSR_SMI_COUNT: if (!msr_info->host_initiated) return 1; @@ -2708,8 +2749,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_KVM_SYSTEM_TIME: { struct kvm_arch *ka = &vcpu->kvm->arch; - kvmclock_reset(vcpu); - if (vcpu->vcpu_id == 0 && !msr_info->host_initiated) { bool tmp = (msr == MSR_KVM_SYSTEM_TIME); @@ -2723,14 +2762,13 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu); /* we verify if the enable bit is set... */ + vcpu->arch.pv_time_enabled = false; if (!(data & 1)) break; - if (kvm_gfn_to_hva_cache_init(vcpu->kvm, + if (!kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_time, data & ~1ULL, sizeof(struct pvclock_vcpu_time_info))) - vcpu->arch.pv_time_enabled = false; - else vcpu->arch.pv_time_enabled = true; break; @@ -3024,6 +3062,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1: return get_msr_mce(vcpu, msr_info->index, &msr_info->data, msr_info->host_initiated); + case MSR_IA32_XSS: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)) + return 1; + msr_info->data = vcpu->arch.ia32_xss; + break; case MSR_K7_CLK_CTL: /* * Provide expected ramp-up count for K7. All other @@ -3801,12 +3845,13 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK; else vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK; - if (lapic_in_kernel(vcpu)) { - if (events->smi.latched_init) - set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); - else - clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); - } + } + + if (lapic_in_kernel(vcpu)) { + if (events->smi.latched_init) + set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); + else + clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); } } @@ -4397,6 +4442,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, case KVM_SET_NESTED_STATE: { struct kvm_nested_state __user *user_kvm_nested_state = argp; struct kvm_nested_state kvm_state; + int idx; r = -EINVAL; if (!kvm_x86_ops->set_nested_state) @@ -4420,7 +4466,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, && !(kvm_state.flags & KVM_STATE_NESTED_GUEST_MODE)) break; + idx = srcu_read_lock(&vcpu->kvm->srcu); r = kvm_x86_ops->set_nested_state(vcpu, user_kvm_nested_state, &kvm_state); + srcu_read_unlock(&vcpu->kvm->srcu, idx); break; } case KVM_GET_SUPPORTED_HV_CPUID: { @@ -4922,9 +4970,6 @@ set_identity_unlock: if (!irqchip_kernel(kvm)) goto set_irqchip_out; r = kvm_vm_ioctl_set_irqchip(kvm, chip); - if (r) - goto set_irqchip_out; - r = 0; set_irqchip_out: kfree(chip); break; @@ -5097,23 +5142,28 @@ out: static void kvm_init_msr_list(void) { + struct x86_pmu_capability x86_pmu; u32 dummy[2]; - unsigned i, j; + unsigned i; BUILD_BUG_ON_MSG(INTEL_PMC_MAX_FIXED != 4, - "Please update the fixed PMCs in msrs_to_save[]"); - BUILD_BUG_ON_MSG(INTEL_PMC_MAX_GENERIC != 32, - "Please update the generic perfctr/eventsel MSRs in msrs_to_save[]"); + "Please update the fixed PMCs in msrs_to_saved_all[]"); + + perf_get_x86_pmu_capability(&x86_pmu); - for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) { - if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0) + num_msrs_to_save = 0; + num_emulated_msrs = 0; + num_msr_based_features = 0; + + for (i = 0; i < ARRAY_SIZE(msrs_to_save_all); i++) { + if (rdmsr_safe(msrs_to_save_all[i], &dummy[0], &dummy[1]) < 0) continue; /* * Even MSRs that are valid in the host may not be exposed * to the guests in some cases. */ - switch (msrs_to_save[i]) { + switch (msrs_to_save_all[i]) { case MSR_IA32_BNDCFGS: if (!kvm_mpx_supported()) continue; @@ -5141,43 +5191,43 @@ static void kvm_init_msr_list(void) break; case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: { if (!kvm_x86_ops->pt_supported() || - msrs_to_save[i] - MSR_IA32_RTIT_ADDR0_A >= + msrs_to_save_all[i] - MSR_IA32_RTIT_ADDR0_A >= intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2) continue; break; + case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17: + if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_PERFCTR0 >= + min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp)) + continue; + break; + case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17: + if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >= + min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp)) + continue; } default: break; } - if (j < i) - msrs_to_save[j] = msrs_to_save[i]; - j++; + msrs_to_save[num_msrs_to_save++] = msrs_to_save_all[i]; } - num_msrs_to_save = j; - for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) { - if (!kvm_x86_ops->has_emulated_msr(emulated_msrs[i])) + for (i = 0; i < ARRAY_SIZE(emulated_msrs_all); i++) { + if (!kvm_x86_ops->has_emulated_msr(emulated_msrs_all[i])) continue; - if (j < i) - emulated_msrs[j] = emulated_msrs[i]; - j++; + emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i]; } - num_emulated_msrs = j; - for (i = j = 0; i < ARRAY_SIZE(msr_based_features); i++) { + for (i = 0; i < ARRAY_SIZE(msr_based_features_all); i++) { struct kvm_msr_entry msr; - msr.index = msr_based_features[i]; + msr.index = msr_based_features_all[i]; if (kvm_get_msr_feature(&msr)) continue; - if (j < i) - msr_based_features[j] = msr_based_features[i]; - j++; + msr_based_features[num_msr_based_features++] = msr_based_features_all[i]; } - num_msr_based_features = j; } static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, @@ -6107,7 +6157,7 @@ static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase) static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt, u32 pmc) { - return kvm_pmu_is_valid_msr_idx(emul_to_vcpu(ctxt), pmc); + return kvm_pmu_is_valid_rdpmc_ecx(emul_to_vcpu(ctxt), pmc); } static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt, @@ -7837,6 +7887,19 @@ static void process_smi(struct kvm_vcpu *vcpu) kvm_make_request(KVM_REQ_EVENT, vcpu); } +void kvm_make_scan_ioapic_request_mask(struct kvm *kvm, + unsigned long *vcpu_bitmap) +{ + cpumask_var_t cpus; + + zalloc_cpumask_var(&cpus, GFP_ATOMIC); + + kvm_make_vcpus_request_mask(kvm, KVM_REQ_SCAN_IOAPIC, + vcpu_bitmap, cpus); + + free_cpumask_var(cpus); +} + void kvm_make_scan_ioapic_request(struct kvm *kvm) { kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC); @@ -7914,7 +7977,6 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) */ put_page(page); } -EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page); void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu) { @@ -7937,8 +7999,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) bool req_immediate_exit = false; if (kvm_request_pending(vcpu)) { - if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu)) - kvm_x86_ops->get_vmcs12_pages(vcpu); + if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu)) { + if (unlikely(!kvm_x86_ops->get_vmcs12_pages(vcpu))) { + r = 0; + goto out; + } + } if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) kvm_mmu_unload(vcpu); if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) @@ -8669,8 +8735,12 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, mp_state->mp_state != KVM_MP_STATE_RUNNABLE) goto out; - /* INITs are latched while in SMM */ - if ((is_smm(vcpu) || vcpu->arch.smi_pending) && + /* + * KVM_MP_STATE_INIT_RECEIVED means the processor is in + * INIT state; latched init should be reported using + * KVM_SET_VCPU_EVENTS, so reject it here. + */ + if ((kvm_vcpu_latch_init(vcpu) || vcpu->arch.smi_pending) && (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED || mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED)) goto out; @@ -8714,10 +8784,6 @@ EXPORT_SYMBOL_GPL(kvm_task_switch); static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { - if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && - (sregs->cr4 & X86_CR4_OSXSAVE)) - return -EINVAL; - if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) { /* * When EFER.LME and CR0.PG are set, the processor is in @@ -8736,7 +8802,7 @@ static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) return -EINVAL; } - return 0; + return kvm_valid_cr4(vcpu, sregs->cr4); } static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) @@ -8766,7 +8832,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) vcpu->arch.cr2 = sregs->cr2; mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3; vcpu->arch.cr3 = sregs->cr3; - __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); + kvm_register_mark_available(vcpu, VCPU_EXREG_CR3); kvm_set_cr8(vcpu, sregs->cr8); @@ -9293,6 +9359,9 @@ int kvm_arch_hardware_setup(void) kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits; } + if (boot_cpu_has(X86_FEATURE_XSAVES)) + rdmsrl(MSR_IA32_XSS, host_xss); + kvm_init_msr_list(); return 0; } @@ -9346,7 +9415,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) goto fail_free_pio_data; if (irqchip_in_kernel(vcpu->kvm)) { - vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu); + vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu->kvm); r = kvm_create_lapic(vcpu, lapic_timer_advance_ns); if (r < 0) goto fail_mmu_destroy; @@ -9415,7 +9484,13 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) { + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); + vcpu->arch.l1tf_flush_l1d = true; + if (pmu->version && unlikely(pmu->event_count)) { + pmu->need_cleanup = true; + kvm_make_request(KVM_REQ_PMU, vcpu); + } kvm_x86_ops->sched_in(vcpu, cpu); } @@ -9427,6 +9502,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list); INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); + INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages); INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); atomic_set(&kvm->arch.noncoherent_dma_count, 0); @@ -9455,6 +9531,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) return kvm_x86_ops->vm_init(kvm); } +int kvm_arch_post_init_vm(struct kvm *kvm) +{ + return kvm_mmu_post_init_vm(kvm); +} + static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) { vcpu_load(vcpu); @@ -9556,6 +9637,11 @@ int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) } EXPORT_SYMBOL_GPL(x86_set_memory_region); +void kvm_arch_pre_destroy_vm(struct kvm *kvm) +{ + kvm_mmu_pre_destroy_vm(kvm); +} + void kvm_arch_destroy_vm(struct kvm *kvm) { if (current->mm == kvm->mm) { diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index dbf7442a822b..29391af8871d 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -238,8 +238,7 @@ static inline bool vcpu_match_mmio_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) return false; } -static inline unsigned long kvm_register_readl(struct kvm_vcpu *vcpu, - enum kvm_reg reg) +static inline unsigned long kvm_register_readl(struct kvm_vcpu *vcpu, int reg) { unsigned long val = kvm_register_read(vcpu, reg); @@ -247,8 +246,7 @@ static inline unsigned long kvm_register_readl(struct kvm_vcpu *vcpu, } static inline void kvm_register_writel(struct kvm_vcpu *vcpu, - enum kvm_reg reg, - unsigned long val) + int reg, unsigned long val) { if (!is_64_bit_mode(vcpu)) val = (u32)val; @@ -260,6 +258,11 @@ static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk) return !(kvm->arch.disabled_quirks & quirk); } +static inline bool kvm_vcpu_latch_init(struct kvm_vcpu *vcpu) +{ + return is_smm(vcpu) || kvm_x86_ops->apic_init_signal_blocked(vcpu); +} + void kvm_set_pending_timer(struct kvm_vcpu *vcpu); void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); @@ -366,7 +369,7 @@ static inline bool kvm_pat_valid(u64 data) return (data | ((data & 0x0202020202020202ull) << 1)) == data; } -void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu); -void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu); +void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu); +void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu); #endif diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index b7375dc6898f..c126571e5e2e 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -113,8 +113,8 @@ static void delay_mwaitx(unsigned long __loops) __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0); /* - * AMD, like Intel, supports the EAX hint and EAX=0xf - * means, do not enter any deep C-state and we use it + * AMD, like Intel's MWAIT version, supports the EAX hint and + * EAX=0xf0 means, do not enter any deep C-state and we use it * here in delay() to minimize wakeup latency. */ __mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE); diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 84373dc9b341..bbc68a54795e 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -13,7 +13,7 @@ CFLAGS_REMOVE_mem_encrypt_identity.o = -pg endif obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ - pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o + pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o # Make sure __phys_addr has no stackprotector nostackp := $(call cc-option, -fno-stack-protector) diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 79eb55ce69a9..49d7814b59a9 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -193,8 +193,8 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f) int ret; WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n")); if (f->armed) { - pr_warning("double-arm: addr 0x%08lx, ref %d, old %d\n", - f->addr, f->count, !!f->old_presence); + pr_warn("double-arm: addr 0x%08lx, ref %d, old %d\n", + f->addr, f->count, !!f->old_presence); } ret = clear_page_presence(f, true); WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming at 0x%08lx failed.\n"), @@ -341,8 +341,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) * something external causing them (f.e. using a debugger while * mmio tracing enabled), or erroneous behaviour */ - pr_warning("unexpected debug trap on CPU %d.\n", - smp_processor_id()); + pr_warn("unexpected debug trap on CPU %d.\n", smp_processor_id()); goto out; } diff --git a/arch/x86/mm/maccess.c b/arch/x86/mm/maccess.c new file mode 100644 index 000000000000..f5b85bdc0535 --- /dev/null +++ b/arch/x86/mm/maccess.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/uaccess.h> +#include <linux/kernel.h> + +#ifdef CONFIG_X86_64 +static __always_inline u64 canonical_address(u64 vaddr, u8 vaddr_bits) +{ + return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits); +} + +static __always_inline bool invalid_probe_range(u64 vaddr) +{ + /* + * Range covering the highest possible canonical userspace address + * as well as non-canonical address range. For the canonical range + * we also need to include the userspace guard page. + */ + return vaddr < TASK_SIZE_MAX + PAGE_SIZE || + canonical_address(vaddr, boot_cpu_data.x86_virt_bits) != vaddr; +} +#else +static __always_inline bool invalid_probe_range(u64 vaddr) +{ + return vaddr < TASK_SIZE_MAX; +} +#endif + +long probe_kernel_read_strict(void *dst, const void *src, size_t size) +{ + if (unlikely(invalid_probe_range((unsigned long)src))) + return -EFAULT; + + return __probe_kernel_read(dst, src, size); +} + +long strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr, long count) +{ + if (unlikely(invalid_probe_range((unsigned long)unsafe_addr))) + return -EFAULT; + + return __strncpy_from_unsafe(dst, unsafe_addr, count); +} diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index b8ef8557d4b3..673de6063345 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -394,7 +394,7 @@ static void enter_uniprocessor(void) } out: if (num_online_cpus() > 1) - pr_warning("multiple CPUs still online, may miss events.\n"); + pr_warn("multiple CPUs still online, may miss events.\n"); } static void leave_uniprocessor(void) @@ -418,8 +418,8 @@ static void leave_uniprocessor(void) static void enter_uniprocessor(void) { if (num_online_cpus() > 1) - pr_warning("multiple CPUs are online, may miss events. " - "Suggest booting with maxcpus=1 kernel argument.\n"); + pr_warn("multiple CPUs are online, may miss events. " + "Suggest booting with maxcpus=1 kernel argument.\n"); } static void leave_uniprocessor(void) diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c index abffa0be80da..7f1d2034df1e 100644 --- a/arch/x86/mm/numa_emulation.c +++ b/arch/x86/mm/numa_emulation.c @@ -438,7 +438,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt) goto no_emu; if (numa_cleanup_meminfo(&ei) < 0) { - pr_warning("NUMA: Warning: constructed meminfo invalid, disabling emulation\n"); + pr_warn("NUMA: Warning: constructed meminfo invalid, disabling emulation\n"); goto no_emu; } @@ -449,7 +449,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt) phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), phys_size, PAGE_SIZE); if (!phys) { - pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n"); + pr_warn("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n"); goto no_emu; } memblock_reserve(phys, phys_size); diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c index a8bd952e136d..92153d054d6c 100644 --- a/arch/x86/mm/testmmiotrace.c +++ b/arch/x86/mm/testmmiotrace.c @@ -127,9 +127,9 @@ static int __init init(void) return -ENXIO; } - pr_warning("WARNING: mapping %lu kB @ 0x%08lx in PCI address space, " - "and writing 16 kB of rubbish in there.\n", - size >> 10, mmio_address); + pr_warn("WARNING: mapping %lu kB @ 0x%08lx in PCI address space, " + "and writing 16 kB of rubbish in there.\n", + size >> 10, mmio_address); do_test(size); do_test_bulk_ioremapping(); pr_info("All done.\n"); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 991549a1c5f3..b8be18427277 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -9,9 +9,11 @@ #include <linux/filter.h> #include <linux/if_vlan.h> #include <linux/bpf.h> - +#include <linux/memory.h> +#include <asm/extable.h> #include <asm/set_memory.h> #include <asm/nospec-branch.h> +#include <asm/text-patching.h> static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) { @@ -96,6 +98,7 @@ static int bpf_size_to_x86_bytes(int bpf_size) /* Pick a register outside of BPF range for JIT internal work */ #define AUX_REG (MAX_BPF_JIT_REG + 1) +#define X86_REG_R9 (MAX_BPF_JIT_REG + 2) /* * The following table maps BPF registers to x86-64 registers. @@ -104,8 +107,8 @@ static int bpf_size_to_x86_bytes(int bpf_size) * register in load/store instructions, it always needs an * extra byte of encoding and is callee saved. * - * Also x86-64 register R9 is unused. x86-64 register R10 is - * used for blinding (if enabled). + * x86-64 register R9 is not used by BPF programs, but can be used by BPF + * trampoline. x86-64 register R10 is used for blinding (if enabled). */ static const int reg2hex[] = { [BPF_REG_0] = 0, /* RAX */ @@ -121,6 +124,20 @@ static const int reg2hex[] = { [BPF_REG_FP] = 5, /* RBP readonly */ [BPF_REG_AX] = 2, /* R10 temp register */ [AUX_REG] = 3, /* R11 temp register */ + [X86_REG_R9] = 1, /* R9 register, 6th function argument */ +}; + +static const int reg2pt_regs[] = { + [BPF_REG_0] = offsetof(struct pt_regs, ax), + [BPF_REG_1] = offsetof(struct pt_regs, di), + [BPF_REG_2] = offsetof(struct pt_regs, si), + [BPF_REG_3] = offsetof(struct pt_regs, dx), + [BPF_REG_4] = offsetof(struct pt_regs, cx), + [BPF_REG_5] = offsetof(struct pt_regs, r8), + [BPF_REG_6] = offsetof(struct pt_regs, bx), + [BPF_REG_7] = offsetof(struct pt_regs, r13), + [BPF_REG_8] = offsetof(struct pt_regs, r14), + [BPF_REG_9] = offsetof(struct pt_regs, r15), }; /* @@ -135,6 +152,7 @@ static bool is_ereg(u32 reg) BIT(BPF_REG_7) | BIT(BPF_REG_8) | BIT(BPF_REG_9) | + BIT(X86_REG_R9) | BIT(BPF_REG_AX)); } @@ -186,7 +204,10 @@ struct jit_context { #define BPF_MAX_INSN_SIZE 128 #define BPF_INSN_SAFETY 64 -#define PROLOGUE_SIZE 20 +/* Number of bytes emit_patch() needs to generate instructions */ +#define X86_PATCH_SIZE 5 + +#define PROLOGUE_SIZE 25 /* * Emit x86-64 prologue code for BPF program and check its size. @@ -195,8 +216,13 @@ struct jit_context { static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) { u8 *prog = *pprog; - int cnt = 0; + int cnt = X86_PATCH_SIZE; + /* BPF trampoline can be made to work without these nops, + * but let's waste 5 bytes for now and optimize later + */ + memcpy(prog, ideal_nops[NOP_ATOMIC5], cnt); + prog += cnt; EMIT1(0x55); /* push rbp */ EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ /* sub rsp, rounded_stack_depth */ @@ -213,6 +239,89 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) *pprog = prog; } +static int emit_patch(u8 **pprog, void *func, void *ip, u8 opcode) +{ + u8 *prog = *pprog; + int cnt = 0; + s64 offset; + + offset = func - (ip + X86_PATCH_SIZE); + if (!is_simm32(offset)) { + pr_err("Target call %p is out of range\n", func); + return -ERANGE; + } + EMIT1_off32(opcode, offset); + *pprog = prog; + return 0; +} + +static int emit_call(u8 **pprog, void *func, void *ip) +{ + return emit_patch(pprog, func, ip, 0xE8); +} + +static int emit_jump(u8 **pprog, void *func, void *ip) +{ + return emit_patch(pprog, func, ip, 0xE9); +} + +static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, + void *old_addr, void *new_addr, + const bool text_live) +{ + const u8 *nop_insn = ideal_nops[NOP_ATOMIC5]; + u8 old_insn[X86_PATCH_SIZE]; + u8 new_insn[X86_PATCH_SIZE]; + u8 *prog; + int ret; + + memcpy(old_insn, nop_insn, X86_PATCH_SIZE); + if (old_addr) { + prog = old_insn; + ret = t == BPF_MOD_CALL ? + emit_call(&prog, old_addr, ip) : + emit_jump(&prog, old_addr, ip); + if (ret) + return ret; + } + + memcpy(new_insn, nop_insn, X86_PATCH_SIZE); + if (new_addr) { + prog = new_insn; + ret = t == BPF_MOD_CALL ? + emit_call(&prog, new_addr, ip) : + emit_jump(&prog, new_addr, ip); + if (ret) + return ret; + } + + ret = -EBUSY; + mutex_lock(&text_mutex); + if (memcmp(ip, old_insn, X86_PATCH_SIZE)) + goto out; + if (memcmp(ip, new_insn, X86_PATCH_SIZE)) { + if (text_live) + text_poke_bp(ip, new_insn, X86_PATCH_SIZE, NULL); + else + memcpy(ip, new_insn, X86_PATCH_SIZE); + } + ret = 0; +out: + mutex_unlock(&text_mutex); + return ret; +} + +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, + void *old_addr, void *new_addr) +{ + if (!is_kernel_text((long)ip) && + !is_bpf_text_address((long)ip)) + /* BPF poking in modules is not supported */ + return -EINVAL; + + return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true); +} + /* * Generate the following code: * @@ -227,7 +336,7 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) * goto *(prog->bpf_func + prologue_size); * out: */ -static void emit_bpf_tail_call(u8 **pprog) +static void emit_bpf_tail_call_indirect(u8 **pprog) { u8 *prog = *pprog; int label1, label2, label3; @@ -294,6 +403,68 @@ static void emit_bpf_tail_call(u8 **pprog) *pprog = prog; } +static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke, + u8 **pprog, int addr, u8 *image) +{ + u8 *prog = *pprog; + int cnt = 0; + + /* + * if (tail_call_cnt > MAX_TAIL_CALL_CNT) + * goto out; + */ + EMIT2_off32(0x8B, 0x85, -36 - MAX_BPF_STACK); /* mov eax, dword ptr [rbp - 548] */ + EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ + EMIT2(X86_JA, 14); /* ja out */ + EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ + EMIT2_off32(0x89, 0x85, -36 - MAX_BPF_STACK); /* mov dword ptr [rbp -548], eax */ + + poke->ip = image + (addr - X86_PATCH_SIZE); + poke->adj_off = PROLOGUE_SIZE; + + memcpy(prog, ideal_nops[NOP_ATOMIC5], X86_PATCH_SIZE); + prog += X86_PATCH_SIZE; + /* out: */ + + *pprog = prog; +} + +static void bpf_tail_call_direct_fixup(struct bpf_prog *prog) +{ + struct bpf_jit_poke_descriptor *poke; + struct bpf_array *array; + struct bpf_prog *target; + int i, ret; + + for (i = 0; i < prog->aux->size_poke_tab; i++) { + poke = &prog->aux->poke_tab[i]; + WARN_ON_ONCE(READ_ONCE(poke->ip_stable)); + + if (poke->reason != BPF_POKE_REASON_TAIL_CALL) + continue; + + array = container_of(poke->tail_call.map, struct bpf_array, map); + mutex_lock(&array->aux->poke_mutex); + target = array->ptrs[poke->tail_call.key]; + if (target) { + /* Plain memcpy is used when image is not live yet + * and still not locked as read-only. Once poke + * location is active (poke->ip_stable), any parallel + * bpf_arch_text_poke() might occur still on the + * read-write image until we finally locked it as + * read-only. Both modifications on the given image + * are under text_mutex to avoid interference. + */ + ret = __bpf_arch_text_poke(poke->ip, BPF_MOD_JUMP, NULL, + (u8 *)target->bpf_func + + poke->adj_off, false); + BUG_ON(ret < 0); + } + WRITE_ONCE(poke->ip_stable, true); + mutex_unlock(&array->aux->poke_mutex); + } +} + static void emit_mov_imm32(u8 **pprog, bool sign_propagate, u32 dst_reg, const u32 imm32) { @@ -377,6 +548,96 @@ static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg) *pprog = prog; } +/* LDX: dst_reg = *(u8*)(src_reg + off) */ +static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) +{ + u8 *prog = *pprog; + int cnt = 0; + + switch (size) { + case BPF_B: + /* Emit 'movzx rax, byte ptr [rax + off]' */ + EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6); + break; + case BPF_H: + /* Emit 'movzx rax, word ptr [rax + off]' */ + EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7); + break; + case BPF_W: + /* Emit 'mov eax, dword ptr [rax+0x14]' */ + if (is_ereg(dst_reg) || is_ereg(src_reg)) + EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B); + else + EMIT1(0x8B); + break; + case BPF_DW: + /* Emit 'mov rax, qword ptr [rax+0x14]' */ + EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B); + break; + } + /* + * If insn->off == 0 we can save one extra byte, but + * special case of x86 R13 which always needs an offset + * is not worth the hassle + */ + if (is_imm8(off)) + EMIT2(add_2reg(0x40, src_reg, dst_reg), off); + else + EMIT1_off32(add_2reg(0x80, src_reg, dst_reg), off); + *pprog = prog; +} + +/* STX: *(u8*)(dst_reg + off) = src_reg */ +static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) +{ + u8 *prog = *pprog; + int cnt = 0; + + switch (size) { + case BPF_B: + /* Emit 'mov byte ptr [rax + off], al' */ + if (is_ereg(dst_reg) || is_ereg(src_reg) || + /* We have to add extra byte for x86 SIL, DIL regs */ + src_reg == BPF_REG_1 || src_reg == BPF_REG_2) + EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88); + else + EMIT1(0x88); + break; + case BPF_H: + if (is_ereg(dst_reg) || is_ereg(src_reg)) + EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89); + else + EMIT2(0x66, 0x89); + break; + case BPF_W: + if (is_ereg(dst_reg) || is_ereg(src_reg)) + EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89); + else + EMIT1(0x89); + break; + case BPF_DW: + EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89); + break; + } + if (is_imm8(off)) + EMIT2(add_2reg(0x40, dst_reg, src_reg), off); + else + EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), off); + *pprog = prog; +} + +static bool ex_handler_bpf(const struct exception_table_entry *x, + struct pt_regs *regs, int trapnr, + unsigned long error_code, unsigned long fault_addr) +{ + u32 reg = x->fixup >> 8; + + /* jump over faulting load and clear dest register */ + *(unsigned long *)((void *)regs + reg) = 0; + regs->ip += x->fixup & 0xff; + return true; +} + static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, int oldproglen, struct jit_context *ctx) { @@ -384,7 +645,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, int insn_cnt = bpf_prog->len; bool seen_exit = false; u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY]; - int i, cnt = 0; + int i, cnt = 0, excnt = 0; int proglen = 0; u8 *prog = temp; @@ -747,64 +1008,64 @@ st: if (is_imm8(insn->off)) /* STX: *(u8*)(dst_reg + off) = src_reg */ case BPF_STX | BPF_MEM | BPF_B: - /* Emit 'mov byte ptr [rax + off], al' */ - if (is_ereg(dst_reg) || is_ereg(src_reg) || - /* We have to add extra byte for x86 SIL, DIL regs */ - src_reg == BPF_REG_1 || src_reg == BPF_REG_2) - EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88); - else - EMIT1(0x88); - goto stx; case BPF_STX | BPF_MEM | BPF_H: - if (is_ereg(dst_reg) || is_ereg(src_reg)) - EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89); - else - EMIT2(0x66, 0x89); - goto stx; case BPF_STX | BPF_MEM | BPF_W: - if (is_ereg(dst_reg) || is_ereg(src_reg)) - EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89); - else - EMIT1(0x89); - goto stx; case BPF_STX | BPF_MEM | BPF_DW: - EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89); -stx: if (is_imm8(insn->off)) - EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off); - else - EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), - insn->off); + emit_stx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off); break; /* LDX: dst_reg = *(u8*)(src_reg + off) */ case BPF_LDX | BPF_MEM | BPF_B: - /* Emit 'movzx rax, byte ptr [rax + off]' */ - EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6); - goto ldx; + case BPF_LDX | BPF_PROBE_MEM | BPF_B: case BPF_LDX | BPF_MEM | BPF_H: - /* Emit 'movzx rax, word ptr [rax + off]' */ - EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7); - goto ldx; + case BPF_LDX | BPF_PROBE_MEM | BPF_H: case BPF_LDX | BPF_MEM | BPF_W: - /* Emit 'mov eax, dword ptr [rax+0x14]' */ - if (is_ereg(dst_reg) || is_ereg(src_reg)) - EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B); - else - EMIT1(0x8B); - goto ldx; + case BPF_LDX | BPF_PROBE_MEM | BPF_W: case BPF_LDX | BPF_MEM | BPF_DW: - /* Emit 'mov rax, qword ptr [rax+0x14]' */ - EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B); -ldx: /* - * If insn->off == 0 we can save one extra byte, but - * special case of x86 R13 which always needs an offset - * is not worth the hassle - */ - if (is_imm8(insn->off)) - EMIT2(add_2reg(0x40, src_reg, dst_reg), insn->off); - else - EMIT1_off32(add_2reg(0x80, src_reg, dst_reg), - insn->off); + case BPF_LDX | BPF_PROBE_MEM | BPF_DW: + emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off); + if (BPF_MODE(insn->code) == BPF_PROBE_MEM) { + struct exception_table_entry *ex; + u8 *_insn = image + proglen; + s64 delta; + + if (!bpf_prog->aux->extable) + break; + + if (excnt >= bpf_prog->aux->num_exentries) { + pr_err("ex gen bug\n"); + return -EFAULT; + } + ex = &bpf_prog->aux->extable[excnt++]; + + delta = _insn - (u8 *)&ex->insn; + if (!is_simm32(delta)) { + pr_err("extable->insn doesn't fit into 32-bit\n"); + return -EFAULT; + } + ex->insn = delta; + + delta = (u8 *)ex_handler_bpf - (u8 *)&ex->handler; + if (!is_simm32(delta)) { + pr_err("extable->handler doesn't fit into 32-bit\n"); + return -EFAULT; + } + ex->handler = delta; + + if (dst_reg > BPF_REG_9) { + pr_err("verifier error\n"); + return -EFAULT; + } + /* + * Compute size of x86 insn and its target dest x86 register. + * ex_handler_bpf() will use lower 8 bits to adjust + * pt_regs->ip to jump over this x86 instruction + * and upper bits to figure out which pt_regs to zero out. + * End result: x86 insn "mov rbx, qword ptr [rax+0x14]" + * of 4 bytes will be ignored and rbx will be zero inited. + */ + ex->fixup = (prog - temp) | (reg2pt_regs[dst_reg] << 8); + } break; /* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */ @@ -827,17 +1088,16 @@ xadd: if (is_imm8(insn->off)) /* call */ case BPF_JMP | BPF_CALL: func = (u8 *) __bpf_call_base + imm32; - jmp_offset = func - (image + addrs[i]); - if (!imm32 || !is_simm32(jmp_offset)) { - pr_err("unsupported BPF func %d addr %p image %p\n", - imm32, func, image); + if (!imm32 || emit_call(&prog, func, image + addrs[i - 1])) return -EINVAL; - } - EMIT1_off32(0xE8, jmp_offset); break; case BPF_JMP | BPF_TAIL_CALL: - emit_bpf_tail_call(&prog); + if (imm32) + emit_bpf_tail_call_direct(&bpf_prog->aux->poke_tab[imm32 - 1], + &prog, addrs[i], image); + else + emit_bpf_tail_call_indirect(&prog); break; /* cond jump */ @@ -909,6 +1169,16 @@ xadd: if (is_imm8(insn->off)) case BPF_JMP32 | BPF_JSLT | BPF_K: case BPF_JMP32 | BPF_JSGE | BPF_K: case BPF_JMP32 | BPF_JSLE | BPF_K: + /* test dst_reg, dst_reg to save one extra byte */ + if (imm32 == 0) { + if (BPF_CLASS(insn->code) == BPF_JMP) + EMIT1(add_2mod(0x48, dst_reg, dst_reg)); + else if (is_ereg(dst_reg)) + EMIT1(add_2mod(0x40, dst_reg, dst_reg)); + EMIT2(0x85, add_2reg(0xC0, dst_reg, dst_reg)); + goto emit_cond_jmp; + } + /* cmp dst_reg, imm8/32 */ if (BPF_CLASS(insn->code) == BPF_JMP) EMIT1(add_1mod(0x48, dst_reg)); @@ -1048,9 +1318,218 @@ emit_jmp: addrs[i] = proglen; prog = temp; } + + if (image && excnt != bpf_prog->aux->num_exentries) { + pr_err("extable is not populated\n"); + return -EFAULT; + } return proglen; } +static void save_regs(struct btf_func_model *m, u8 **prog, int nr_args, + int stack_size) +{ + int i; + /* Store function arguments to stack. + * For a function that accepts two pointers the sequence will be: + * mov QWORD PTR [rbp-0x10],rdi + * mov QWORD PTR [rbp-0x8],rsi + */ + for (i = 0; i < min(nr_args, 6); i++) + emit_stx(prog, bytes_to_bpf_size(m->arg_size[i]), + BPF_REG_FP, + i == 5 ? X86_REG_R9 : BPF_REG_1 + i, + -(stack_size - i * 8)); +} + +static void restore_regs(struct btf_func_model *m, u8 **prog, int nr_args, + int stack_size) +{ + int i; + + /* Restore function arguments from stack. + * For a function that accepts two pointers the sequence will be: + * EMIT4(0x48, 0x8B, 0x7D, 0xF0); mov rdi,QWORD PTR [rbp-0x10] + * EMIT4(0x48, 0x8B, 0x75, 0xF8); mov rsi,QWORD PTR [rbp-0x8] + */ + for (i = 0; i < min(nr_args, 6); i++) + emit_ldx(prog, bytes_to_bpf_size(m->arg_size[i]), + i == 5 ? X86_REG_R9 : BPF_REG_1 + i, + BPF_REG_FP, + -(stack_size - i * 8)); +} + +static int invoke_bpf(struct btf_func_model *m, u8 **pprog, + struct bpf_prog **progs, int prog_cnt, int stack_size) +{ + u8 *prog = *pprog; + int cnt = 0, i; + + for (i = 0; i < prog_cnt; i++) { + if (emit_call(&prog, __bpf_prog_enter, prog)) + return -EINVAL; + /* remember prog start time returned by __bpf_prog_enter */ + emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0); + + /* arg1: lea rdi, [rbp - stack_size] */ + EMIT4(0x48, 0x8D, 0x7D, -stack_size); + /* arg2: progs[i]->insnsi for interpreter */ + if (!progs[i]->jited) + emit_mov_imm64(&prog, BPF_REG_2, + (long) progs[i]->insnsi >> 32, + (u32) (long) progs[i]->insnsi); + /* call JITed bpf program or interpreter */ + if (emit_call(&prog, progs[i]->bpf_func, prog)) + return -EINVAL; + + /* arg1: mov rdi, progs[i] */ + emit_mov_imm64(&prog, BPF_REG_1, (long) progs[i] >> 32, + (u32) (long) progs[i]); + /* arg2: mov rsi, rbx <- start time in nsec */ + emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6); + if (emit_call(&prog, __bpf_prog_exit, prog)) + return -EINVAL; + } + *pprog = prog; + return 0; +} + +/* Example: + * __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); + * its 'struct btf_func_model' will be nr_args=2 + * The assembly code when eth_type_trans is executing after trampoline: + * + * push rbp + * mov rbp, rsp + * sub rsp, 16 // space for skb and dev + * push rbx // temp regs to pass start time + * mov qword ptr [rbp - 16], rdi // save skb pointer to stack + * mov qword ptr [rbp - 8], rsi // save dev pointer to stack + * call __bpf_prog_enter // rcu_read_lock and preempt_disable + * mov rbx, rax // remember start time in bpf stats are enabled + * lea rdi, [rbp - 16] // R1==ctx of bpf prog + * call addr_of_jited_FENTRY_prog + * movabsq rdi, 64bit_addr_of_struct_bpf_prog // unused if bpf stats are off + * mov rsi, rbx // prog start time + * call __bpf_prog_exit // rcu_read_unlock, preempt_enable and stats math + * mov rdi, qword ptr [rbp - 16] // restore skb pointer from stack + * mov rsi, qword ptr [rbp - 8] // restore dev pointer from stack + * pop rbx + * leave + * ret + * + * eth_type_trans has 5 byte nop at the beginning. These 5 bytes will be + * replaced with 'call generated_bpf_trampoline'. When it returns + * eth_type_trans will continue executing with original skb and dev pointers. + * + * The assembly code when eth_type_trans is called from trampoline: + * + * push rbp + * mov rbp, rsp + * sub rsp, 24 // space for skb, dev, return value + * push rbx // temp regs to pass start time + * mov qword ptr [rbp - 24], rdi // save skb pointer to stack + * mov qword ptr [rbp - 16], rsi // save dev pointer to stack + * call __bpf_prog_enter // rcu_read_lock and preempt_disable + * mov rbx, rax // remember start time if bpf stats are enabled + * lea rdi, [rbp - 24] // R1==ctx of bpf prog + * call addr_of_jited_FENTRY_prog // bpf prog can access skb and dev + * movabsq rdi, 64bit_addr_of_struct_bpf_prog // unused if bpf stats are off + * mov rsi, rbx // prog start time + * call __bpf_prog_exit // rcu_read_unlock, preempt_enable and stats math + * mov rdi, qword ptr [rbp - 24] // restore skb pointer from stack + * mov rsi, qword ptr [rbp - 16] // restore dev pointer from stack + * call eth_type_trans+5 // execute body of eth_type_trans + * mov qword ptr [rbp - 8], rax // save return value + * call __bpf_prog_enter // rcu_read_lock and preempt_disable + * mov rbx, rax // remember start time in bpf stats are enabled + * lea rdi, [rbp - 24] // R1==ctx of bpf prog + * call addr_of_jited_FEXIT_prog // bpf prog can access skb, dev, return value + * movabsq rdi, 64bit_addr_of_struct_bpf_prog // unused if bpf stats are off + * mov rsi, rbx // prog start time + * call __bpf_prog_exit // rcu_read_unlock, preempt_enable and stats math + * mov rax, qword ptr [rbp - 8] // restore eth_type_trans's return value + * pop rbx + * leave + * add rsp, 8 // skip eth_type_trans's frame + * ret // return to its caller + */ +int arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags, + struct bpf_prog **fentry_progs, int fentry_cnt, + struct bpf_prog **fexit_progs, int fexit_cnt, + void *orig_call) +{ + int cnt = 0, nr_args = m->nr_args; + int stack_size = nr_args * 8; + u8 *prog; + + /* x86-64 supports up to 6 arguments. 7+ can be added in the future */ + if (nr_args > 6) + return -ENOTSUPP; + + if ((flags & BPF_TRAMP_F_RESTORE_REGS) && + (flags & BPF_TRAMP_F_SKIP_FRAME)) + return -EINVAL; + + if (flags & BPF_TRAMP_F_CALL_ORIG) + stack_size += 8; /* room for return value of orig_call */ + + if (flags & BPF_TRAMP_F_SKIP_FRAME) + /* skip patched call instruction and point orig_call to actual + * body of the kernel function. + */ + orig_call += X86_PATCH_SIZE; + + prog = image; + + EMIT1(0x55); /* push rbp */ + EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ + EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */ + EMIT1(0x53); /* push rbx */ + + save_regs(m, &prog, nr_args, stack_size); + + if (fentry_cnt) + if (invoke_bpf(m, &prog, fentry_progs, fentry_cnt, stack_size)) + return -EINVAL; + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + if (fentry_cnt) + restore_regs(m, &prog, nr_args, stack_size); + + /* call original function */ + if (emit_call(&prog, orig_call, prog)) + return -EINVAL; + /* remember return value in a stack for bpf prog to access */ + emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); + } + + if (fexit_cnt) + if (invoke_bpf(m, &prog, fexit_progs, fexit_cnt, stack_size)) + return -EINVAL; + + if (flags & BPF_TRAMP_F_RESTORE_REGS) + restore_regs(m, &prog, nr_args, stack_size); + + if (flags & BPF_TRAMP_F_CALL_ORIG) + /* restore original return value back into RAX */ + emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8); + + EMIT1(0x5B); /* pop rbx */ + EMIT1(0xC9); /* leave */ + if (flags & BPF_TRAMP_F_SKIP_FRAME) + /* skip our return address and return to parent */ + EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */ + EMIT1(0xC3); /* ret */ + /* One half of the page has active running trampoline. + * Another half is an area for next trampoline. + * Make sure the trampoline generation logic doesn't overflow. + */ + if (WARN_ON_ONCE(prog - (u8 *)image > PAGE_SIZE / 2 - BPF_INSN_SAFETY)) + return -EFAULT; + return 0; +} + struct x64_jit_data { struct bpf_binary_header *header; int *addrs; @@ -1148,12 +1627,24 @@ out_image: break; } if (proglen == oldproglen) { - header = bpf_jit_binary_alloc(proglen, &image, - 1, jit_fill_hole); + /* + * The number of entries in extable is the number of BPF_LDX + * insns that access kernel memory via "pointer to BTF type". + * The verifier changed their opcode from LDX|MEM|size + * to LDX|PROBE_MEM|size to make JITing easier. + */ + u32 align = __alignof__(struct exception_table_entry); + u32 extable_size = prog->aux->num_exentries * + sizeof(struct exception_table_entry); + + /* allocate module memory for x86 insns and extable */ + header = bpf_jit_binary_alloc(roundup(proglen, align) + extable_size, + &image, align, jit_fill_hole); if (!header) { prog = orig_prog; goto out_addrs; } + prog->aux->extable = (void *) image + roundup(proglen, align); } oldproglen = proglen; cond_resched(); @@ -1164,6 +1655,7 @@ out_image: if (image) { if (!prog->is_func || extra_pass) { + bpf_tail_call_direct_fixup(prog); bpf_jit_binary_lock_ro(header); } else { jit_data->addrs = addrs; diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 71e8a67337e2..276cf79b5d24 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -67,13 +67,13 @@ static inline void op_x86_warn_in_use(int counter) * cannot be monitored by any other counter, contact your * hardware or BIOS vendor. */ - pr_warning("oprofile: counter #%d on cpu #%d may already be used\n", - counter, smp_processor_id()); + pr_warn("oprofile: counter #%d on cpu #%d may already be used\n", + counter, smp_processor_id()); } static inline void op_x86_warn_reserved(int counter) { - pr_warning("oprofile: counter #%d is already reserved\n", counter); + pr_warn("oprofile: counter #%d is already reserved\n", counter); } extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index c202e1b07e29..425e025341db 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -917,9 +917,6 @@ static void __init kexec_enter_virtual_mode(void) if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX)) runtime_code_page_mkexec(); - - /* clean DUMMY object */ - efi_delete_dummy_variable(); #endif } diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c index 6d193bb36021..089413cd944e 100644 --- a/arch/x86/platform/olpc/olpc-xo15-sci.c +++ b/arch/x86/platform/olpc/olpc-xo15-sci.c @@ -39,7 +39,7 @@ static int set_lid_wake_behavior(bool wake_on_close) status = acpi_execute_simple_method(NULL, "\\_SB.PCI0.LID.LIDW", wake_on_close); if (ACPI_FAILURE(status)) { - pr_warning(PFX "failed to set lid behavior\n"); + pr_warn(PFX "failed to set lid behavior\n"); return 1; } diff --git a/arch/x86/platform/sfi/sfi.c b/arch/x86/platform/sfi/sfi.c index bf6016f8db4e..6259563760f9 100644 --- a/arch/x86/platform/sfi/sfi.c +++ b/arch/x86/platform/sfi/sfi.c @@ -26,8 +26,7 @@ static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; static void __init mp_sfi_register_lapic(u8 id) { if (MAX_LOCAL_APIC - id <= 0) { - pr_warning("Processor #%d invalid (max %d)\n", - id, MAX_LOCAL_APIC); + pr_warn("Processor #%d invalid (max %d)\n", id, MAX_LOCAL_APIC); return; } diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk index b02a36b2c14f..a42015b305f4 100644 --- a/arch/x86/tools/gen-insn-attr-x86.awk +++ b/arch/x86/tools/gen-insn-attr-x86.awk @@ -69,7 +69,7 @@ BEGIN { lprefix1_expr = "\\((66|!F3)\\)" lprefix2_expr = "\\(F3\\)" - lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)" + lprefix3_expr = "\\((F2|!F3|66&F2)\\)" lprefix_expr = "\\((66|F2|F3)\\)" max_lprefix = 4 @@ -257,7 +257,7 @@ function convert_operands(count,opnd, i,j,imm,mod) return add_flags(imm, mod) } -/^[0-9a-f]+\:/ { +/^[0-9a-f]+:/ { if (NR == 1) next # get index diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c index 0d3365cb64de..a04551ee5568 100644 --- a/arch/x86/xen/efi.c +++ b/arch/x86/xen/efi.c @@ -57,19 +57,7 @@ static efi_system_table_t __init *xen_efi_probe(void) return NULL; /* Here we know that Xen runs on EFI platform. */ - - efi.get_time = xen_efi_get_time; - efi.set_time = xen_efi_set_time; - efi.get_wakeup_time = xen_efi_get_wakeup_time; - efi.set_wakeup_time = xen_efi_set_wakeup_time; - efi.get_variable = xen_efi_get_variable; - efi.get_next_variable = xen_efi_get_next_variable; - efi.set_variable = xen_efi_set_variable; - efi.query_variable_info = xen_efi_query_variable_info; - efi.update_capsule = xen_efi_update_capsule; - efi.query_capsule_caps = xen_efi_query_capsule_caps; - efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count; - efi.reset_system = xen_efi_reset_system; + xen_efi_runtime_setup(); efi_systab_xen.tables = info->cfg.addr; efi_systab_xen.nr_tables = info->cfg.nent; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 750f46ad018a..205b1176084f 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -269,19 +269,41 @@ void xen_reboot(int reason) BUG(); } +static int reboot_reason = SHUTDOWN_reboot; +static bool xen_legacy_crash; void xen_emergency_restart(void) { - xen_reboot(SHUTDOWN_reboot); + xen_reboot(reboot_reason); } static int xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr) { - if (!kexec_crash_loaded()) - xen_reboot(SHUTDOWN_crash); + if (!kexec_crash_loaded()) { + if (xen_legacy_crash) + xen_reboot(SHUTDOWN_crash); + + reboot_reason = SHUTDOWN_crash; + + /* + * If panic_timeout==0 then we are supposed to wait forever. + * However, to preserve original dom0 behavior we have to drop + * into hypervisor. (domU behavior is controlled by its + * config file) + */ + if (panic_timeout == 0) + panic_timeout = -1; + } return NOTIFY_DONE; } +static int __init parse_xen_legacy_crash(char *arg) +{ + xen_legacy_crash = true; + return 0; +} +early_param("xen_legacy_crash", parse_xen_legacy_crash); + static struct notifier_block xen_panic_block = { .notifier_call = xen_panic_event, .priority = INT_MIN diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 58f79ab32358..5bfea374a160 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -117,6 +117,14 @@ static void __init xen_banner(void) printk(KERN_INFO "Xen version: %d.%d%s%s\n", version >> 16, version & 0xffff, extra.extraversion, xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); + +#ifdef CONFIG_X86_32 + pr_warn("WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n" + "Support for running as 32-bit PV-guest under Xen will soon be removed\n" + "from the Linux kernel!\n" + "Please use either a 64-bit kernel or switch to HVM or PVH mode!\n" + "WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n"); +#endif } static void __init xen_pv_init_platform(void) diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 548d1e0a5ba1..33b0e20df7fc 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -412,7 +412,7 @@ static unsigned long __init xen_set_identity_and_remap_chunk( remap_range_size = xen_find_pfn_range(&remap_pfn); if (!remap_range_size) { - pr_warning("Unable to find available pfn range, not remapping identity pages\n"); + pr_warn("Unable to find available pfn range, not remapping identity pages\n"); xen_set_identity_and_release_chunk(cur_pfn, cur_pfn + left, nr_pages); break; diff --git a/arch/xtensa/boot/dts/virt.dts b/arch/xtensa/boot/dts/virt.dts index a9dcd87b6eb1..611b98a02a65 100644 --- a/arch/xtensa/boot/dts/virt.dts +++ b/arch/xtensa/boot/dts/virt.dts @@ -56,7 +56,7 @@ reg = <0xf0100000 0x03f00000>; // BUS_ADDRESS(3) CPU_PHYSICAL(1) SIZE(2) - ranges = <0x01000000 0x0 0xf0000000 0xf0000000 0x0 0x00010000>, + ranges = <0x01000000 0x0 0x00000000 0xf0000000 0x0 0x00010000>, <0x02000000 0x0 0xf4000000 0xf4000000 0x0 0x08000000>; // PCI_DEVICE(3) INT#(1) CONTROLLER(PHANDLE) CONTROLLER_DATA(2) diff --git a/arch/xtensa/include/asm/bitops.h b/arch/xtensa/include/asm/bitops.h index aeb15f4c755b..be8b2be5a98b 100644 --- a/arch/xtensa/include/asm/bitops.h +++ b/arch/xtensa/include/asm/bitops.h @@ -148,7 +148,7 @@ static inline void change_bit(unsigned int bit, volatile unsigned long *p) " getex %0\n" " beqz %0, 1b\n" : "=&a" (tmp) - : "a" (~mask), "a" (p) + : "a" (mask), "a" (p) : "memory"); } diff --git a/arch/xtensa/include/asm/uaccess.h b/arch/xtensa/include/asm/uaccess.h index 6792928ba84a..3f80386f1883 100644 --- a/arch/xtensa/include/asm/uaccess.h +++ b/arch/xtensa/include/asm/uaccess.h @@ -100,7 +100,7 @@ do { \ case 4: __put_user_asm(x, ptr, retval, 4, "s32i", __cb); break; \ case 8: { \ __typeof__(*ptr) __v64 = x; \ - retval = __copy_to_user(ptr, &__v64, 8); \ + retval = __copy_to_user(ptr, &__v64, 8) ? -EFAULT : 0; \ break; \ } \ default: __put_user_bad(); \ @@ -132,14 +132,14 @@ do { \ #define __check_align_1 "" #define __check_align_2 \ - " _bbci.l %3, 0, 1f \n" \ - " movi %0, %4 \n" \ + " _bbci.l %[addr], 0, 1f \n" \ + " movi %[err], %[efault] \n" \ " _j 2f \n" #define __check_align_4 \ - " _bbsi.l %3, 0, 0f \n" \ - " _bbci.l %3, 1, 1f \n" \ - "0: movi %0, %4 \n" \ + " _bbsi.l %[addr], 0, 0f \n" \ + " _bbci.l %[addr], 1, 1f \n" \ + "0: movi %[err], %[efault] \n" \ " _j 2f \n" @@ -151,40 +151,40 @@ do { \ * WARNING: If you modify this macro at all, verify that the * __check_align_* macros still work. */ -#define __put_user_asm(x, addr, err, align, insn, cb) \ +#define __put_user_asm(x_, addr_, err_, align, insn, cb)\ __asm__ __volatile__( \ __check_align_##align \ - "1: "insn" %2, %3, 0 \n" \ + "1: "insn" %[x], %[addr], 0 \n" \ "2: \n" \ " .section .fixup,\"ax\" \n" \ " .align 4 \n" \ " .literal_position \n" \ "5: \n" \ - " movi %1, 2b \n" \ - " movi %0, %4 \n" \ - " jx %1 \n" \ + " movi %[tmp], 2b \n" \ + " movi %[err], %[efault] \n" \ + " jx %[tmp] \n" \ " .previous \n" \ " .section __ex_table,\"a\" \n" \ " .long 1b, 5b \n" \ " .previous" \ - :"=r" (err), "=r" (cb) \ - :"r" ((int)(x)), "r" (addr), "i" (-EFAULT), "0" (err)) + :[err] "+r"(err_), [tmp] "=r"(cb) \ + :[x] "r"(x_), [addr] "r"(addr_), [efault] "i"(-EFAULT)) #define __get_user_nocheck(x, ptr, size) \ ({ \ - long __gu_err, __gu_val; \ - __get_user_size(__gu_val, (ptr), (size), __gu_err); \ - (x) = (__force __typeof__(*(ptr)))__gu_val; \ + long __gu_err; \ + __get_user_size((x), (ptr), (size), __gu_err); \ __gu_err; \ }) #define __get_user_check(x, ptr, size) \ ({ \ - long __gu_err = -EFAULT, __gu_val = 0; \ + long __gu_err = -EFAULT; \ const __typeof__(*(ptr)) *__gu_addr = (ptr); \ - if (access_ok(__gu_addr, size)) \ - __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ - (x) = (__force __typeof__(*(ptr)))__gu_val; \ + if (access_ok(__gu_addr, size)) \ + __get_user_size((x), __gu_addr, (size), __gu_err); \ + else \ + (x) = 0; \ __gu_err; \ }) @@ -198,8 +198,17 @@ do { \ case 1: __get_user_asm(x, ptr, retval, 1, "l8ui", __cb); break;\ case 2: __get_user_asm(x, ptr, retval, 2, "l16ui", __cb); break;\ case 4: __get_user_asm(x, ptr, retval, 4, "l32i", __cb); break;\ - case 8: retval = __copy_from_user(&x, ptr, 8); break; \ - default: (x) = __get_user_bad(); \ + case 8: { \ + u64 __x; \ + if (unlikely(__copy_from_user(&__x, ptr, 8))) { \ + retval = -EFAULT; \ + (x) = 0; \ + } else { \ + (x) = *(__force __typeof__((ptr)))&__x; \ + } \ + break; \ + } \ + default: (x) = 0; __get_user_bad(); \ } \ } while (0) @@ -208,25 +217,28 @@ do { \ * WARNING: If you modify this macro at all, verify that the * __check_align_* macros still work. */ -#define __get_user_asm(x, addr, err, align, insn, cb) \ -__asm__ __volatile__( \ - __check_align_##align \ - "1: "insn" %2, %3, 0 \n" \ - "2: \n" \ - " .section .fixup,\"ax\" \n" \ - " .align 4 \n" \ - " .literal_position \n" \ - "5: \n" \ - " movi %1, 2b \n" \ - " movi %2, 0 \n" \ - " movi %0, %4 \n" \ - " jx %1 \n" \ - " .previous \n" \ - " .section __ex_table,\"a\" \n" \ - " .long 1b, 5b \n" \ - " .previous" \ - :"=r" (err), "=r" (cb), "=r" (x) \ - :"r" (addr), "i" (-EFAULT), "0" (err)) +#define __get_user_asm(x_, addr_, err_, align, insn, cb) \ +do { \ + u32 __x = 0; \ + __asm__ __volatile__( \ + __check_align_##align \ + "1: "insn" %[x], %[addr], 0 \n" \ + "2: \n" \ + " .section .fixup,\"ax\" \n" \ + " .align 4 \n" \ + " .literal_position \n" \ + "5: \n" \ + " movi %[tmp], 2b \n" \ + " movi %[err], %[efault] \n" \ + " jx %[tmp] \n" \ + " .previous \n" \ + " .section __ex_table,\"a\" \n" \ + " .long 1b, 5b \n" \ + " .previous" \ + :[err] "+r"(err_), [tmp] "=r"(cb), [x] "+r"(__x) \ + :[addr] "r"(addr_), [efault] "i"(-EFAULT)); \ + (x_) = (__force __typeof__(*(addr_)))__x; \ +} while (0) /* diff --git a/arch/xtensa/kernel/xtensa_ksyms.c b/arch/xtensa/kernel/xtensa_ksyms.c index 04f19de46700..4092555828b1 100644 --- a/arch/xtensa/kernel/xtensa_ksyms.c +++ b/arch/xtensa/kernel/xtensa_ksyms.c @@ -119,13 +119,6 @@ EXPORT_SYMBOL(__invalidate_icache_range); // FIXME EXPORT_SYMBOL(screen_info); #endif -EXPORT_SYMBOL(outsb); -EXPORT_SYMBOL(outsw); -EXPORT_SYMBOL(outsl); -EXPORT_SYMBOL(insb); -EXPORT_SYMBOL(insw); -EXPORT_SYMBOL(insl); - extern long common_exception_return; EXPORT_SYMBOL(common_exception_return); |