From 0e4a4a08cd78efcaddbc2e4c5ed86b5a5cb8a15e Mon Sep 17 00:00:00 2001 From: Michal Vokáč Date: Tue, 13 Apr 2021 16:45:57 +0200 Subject: ARM: dts: imx6dl-yapp4: Fix RGMII connection to QCA8334 switch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The FEC does not have a PHY so it should not have a phy-handle. It is connected to the switch at RGMII level so we need a fixed-link sub-node on both ends. This was not a problem until the qca8k.c driver was converted to PHYLINK by commit b3591c2a3661 ("net: dsa: qca8k: Switch to PHYLINK instead of PHYLIB"). That commit revealed the FEC configuration was not correct. Fixes: 87489ec3a77f ("ARM: dts: imx: Add Y Soft IOTA Draco, Hydra and Ursa boards") Cc: stable@vger.kernel.org Signed-off-by: Michal Vokáč Reviewed-by: Andrew Lunn Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6dl-yapp4-common.dtsi | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi index 7d2c72562c73..9148a01ed6d9 100644 --- a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi +++ b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi @@ -105,9 +105,13 @@ phy-reset-gpios = <&gpio1 25 GPIO_ACTIVE_LOW>; phy-reset-duration = <20>; phy-supply = <&sw2_reg>; - phy-handle = <ðphy0>; status = "okay"; + fixed-link { + speed = <1000>; + full-duplex; + }; + mdio { #address-cells = <1>; #size-cells = <0>; -- cgit v1.2.3 From 8967b27a6c1c19251989c7ab33c058d16e4a5f53 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 26 Apr 2021 12:23:21 +0200 Subject: ARM: dts: imx6q-dhcom: Add PU,VDD1P1,VDD2P5 regulators Per schematic, both PU and SOC regulator are supplied from LTC3676 SW1 via VDDSOC_IN rail, add the PU input. Both VDD1P1, VDD2P5 are supplied from LTC3676 SW2 via VDDHIGH_IN rail, add both inputs. While no instability or problems are currently observed, the regulators should be fully described in DT and that description should fully match the hardware, else this might lead to unforseen issues later. Fix this. Fixes: 52c7a088badd ("ARM: dts: imx6q: Add support for the DHCOM iMX6 SoM and PDK2") Reviewed-by: Fabio Estevam Signed-off-by: Marek Vasut Cc: Christoph Niedermaier Cc: Fabio Estevam Cc: Ludwig Zenz Cc: NXP Linux Team Cc: Shawn Guo Cc: stable@vger.kernel.org Reviewed-by: Christoph Niedermaier Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6q-dhcom-som.dtsi | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/imx6q-dhcom-som.dtsi b/arch/arm/boot/dts/imx6q-dhcom-som.dtsi index 236fc205c389..d0768ae429fa 100644 --- a/arch/arm/boot/dts/imx6q-dhcom-som.dtsi +++ b/arch/arm/boot/dts/imx6q-dhcom-som.dtsi @@ -406,6 +406,18 @@ vin-supply = <&sw1_reg>; }; +®_pu { + vin-supply = <&sw1_reg>; +}; + +®_vdd1p1 { + vin-supply = <&sw2_reg>; +}; + +®_vdd2p5 { + vin-supply = <&sw2_reg>; +}; + &uart1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_uart1>; -- cgit v1.2.3 From df61cd9393845383adc4ea2410f2a91e1d1972b6 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Fri, 23 Apr 2021 11:31:20 +0300 Subject: arm64: dts: ti: k3-am654-base-board: remove ov5640 AM654 EVM boards are not shipped with OV5640 sensor module, it is a separate purchase. OV5640 module is also just one of the possible sensors or capture boards you can connect. However, for some reason, OV5640 has been added to the board dts file, making it cumbersome to use other sensors. Remove the OV5640 from the dts file so that it is easy to use other sensors via DT overlays. Signed-off-by: Tomi Valkeinen Acked-by: Pratyush Yadav Signed-off-by: Nishanth Menon Link: https://lore.kernel.org/r/20210423083120.73476-1-tomi.valkeinen@ideasonboard.com --- arch/arm64/boot/dts/ti/k3-am654-base-board.dts | 31 -------------------------- 1 file changed, 31 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/ti/k3-am654-base-board.dts b/arch/arm64/boot/dts/ti/k3-am654-base-board.dts index 9e87fb313a54..eddb2ffb93ca 100644 --- a/arch/arm64/boot/dts/ti/k3-am654-base-board.dts +++ b/arch/arm64/boot/dts/ti/k3-am654-base-board.dts @@ -85,12 +85,6 @@ gpios = <&wkup_gpio0 27 GPIO_ACTIVE_LOW>; }; }; - - clk_ov5640_fixed: clock { - compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency = <24000000>; - }; }; &wkup_pmx0 { @@ -287,23 +281,6 @@ pinctrl-names = "default"; pinctrl-0 = <&main_i2c1_pins_default>; clock-frequency = <400000>; - - ov5640: camera@3c { - compatible = "ovti,ov5640"; - reg = <0x3c>; - - clocks = <&clk_ov5640_fixed>; - clock-names = "xclk"; - - port { - csi2_cam0: endpoint { - remote-endpoint = <&csi2_phy0>; - clock-lanes = <0>; - data-lanes = <1 2>; - }; - }; - }; - }; &main_i2c2 { @@ -496,14 +473,6 @@ }; }; -&csi2_0 { - csi2_phy0: endpoint { - remote-endpoint = <&csi2_cam0>; - clock-lanes = <0>; - data-lanes = <1 2>; - }; -}; - &mcu_cpsw { pinctrl-names = "default"; pinctrl-0 = <&mcu_cpsw_pins_default &mcu_mdio_pins_default>; -- cgit v1.2.3 From 52ae30f55a2a40cff549fac95de82f25403bd387 Mon Sep 17 00:00:00 2001 From: Vignesh Raghavendra Date: Mon, 10 May 2021 23:36:01 +0530 Subject: arm64: dts: ti: j7200-main: Mark Main NAVSS as dma-coherent Traffic through main NAVSS interconnect is coherent wrt ARM caches on J7200 SoC. Add missing dma-coherent property to main_navss node. Also add dma-ranges to be consistent with mcu_navss node and with AM65/J721e main_navss and mcu_navss nodes. Fixes: d361ed88455fe ("arm64: dts: ti: Add support for J7200 SoC") Signed-off-by: Vignesh Raghavendra Reviewed-by: Peter Ujfalusi Signed-off-by: Nishanth Menon Link: https://lore.kernel.org/r/20210510180601.19458-1-vigneshr@ti.com --- arch/arm64/boot/dts/ti/k3-j7200-main.dtsi | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi b/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi index f86c493a44f1..a6826f1888ef 100644 --- a/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi @@ -85,6 +85,8 @@ #size-cells = <2>; ranges = <0x00 0x30000000 0x00 0x30000000 0x00 0x0c400000>; ti,sci-dev-id = <199>; + dma-coherent; + dma-ranges; main_navss_intr: interrupt-controller1 { compatible = "ti,sci-intr"; -- cgit v1.2.3 From a0812885fa7a1074c8003484b8176ffe28d5df68 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Mon, 10 May 2021 09:50:30 -0500 Subject: arm64: dts: ti: k3-*: Rename the TI-SCI clocks node name We currently use clocks as the node name for the node representing TI-SCI clock nodes. This is better renamed to being clock-controller as that is a better representative of the system controller function as a clock controller for the SoC. Signed-off-by: Nishanth Menon Reviewed-by: Tero Kristo Link: https://lore.kernel.org/r/20210510145033.7426-2-nm@ti.com --- arch/arm64/boot/dts/ti/k3-am64-main.dtsi | 2 +- arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi | 2 +- arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi | 2 +- arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi index b2bcbf23eefd..e1216073e3df 100644 --- a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi @@ -148,7 +148,7 @@ #power-domain-cells = <2>; }; - k3_clks: clocks { + k3_clks: clock-controller { compatible = "ti,k2g-sci-clk"; #clock-cells = <2>; }; diff --git a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi index ed42f13e7663..2ae1f9214b8a 100644 --- a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi @@ -23,7 +23,7 @@ #power-domain-cells = <2>; }; - k3_clks: clocks { + k3_clks: clock-controller { compatible = "ti,k2g-sci-clk"; #clock-cells = <2>; }; diff --git a/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi index 5e74e43822c3..9dba2df3569f 100644 --- a/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi @@ -23,7 +23,7 @@ #power-domain-cells = <2>; }; - k3_clks: clocks { + k3_clks: clock-controller { compatible = "ti,k2g-sci-clk"; #clock-cells = <2>; }; diff --git a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi index d56e3475aee7..b83801feeb10 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi @@ -23,7 +23,7 @@ #power-domain-cells = <2>; }; - k3_clks: clocks { + k3_clks: clock-controller { compatible = "ti,k2g-sci-clk"; #clock-cells = <2>; }; -- cgit v1.2.3 From 830454bbd628330c3779c3de637b709dae790da0 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Mon, 10 May 2021 09:50:31 -0500 Subject: arm64: dts: ti: k3-am65-wakeup: Add debug region to TI-SCI node Lets add the TISCI debug region to TI-SCI region in line with TI-SCI documentation[1]. While at it, lets rename the node to indicate the address usage. [1] http://downloads.ti.com/tisci/esd/latest/4_trace/trace.html Signed-off-by: Nishanth Menon Reviewed-by: Tero Kristo Link: https://lore.kernel.org/r/20210510145033.7426-3-nm@ti.com --- arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi index 2ae1f9214b8a..444842a2d556 100644 --- a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi @@ -6,7 +6,7 @@ */ &cbass_wakeup { - dmsc: dmsc { + dmsc: dmsc@44083000 { compatible = "ti,am654-sci"; ti,host-id = <12>; #address-cells = <1>; @@ -18,6 +18,9 @@ mboxes= <&secure_proxy_main 11>, <&secure_proxy_main 13>; + reg-names = "debug_messages"; + reg = <0x44083000 0x1000>; + k3_pds: power-controller { compatible = "ti,sci-pm-domain"; #power-domain-cells = <2>; -- cgit v1.2.3 From 421c06b8761abd7d953148f5b955b4149df9846e Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Mon, 10 May 2021 09:50:32 -0500 Subject: arm64: dts: ti: k3-am65-wakeup: Drop un-necessary properties from dmsc node The DMSC node does'nt require any of "#address-cells", "#size-cells" or "ranges" property as the child nodes are representations of SoC's system controller itself, so align it with the bindings. Signed-off-by: Nishanth Menon Reviewed-by: Tero Kristo Link: https://lore.kernel.org/r/20210510145033.7426-4-nm@ti.com --- arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi index 444842a2d556..80d4df775f43 100644 --- a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi @@ -9,9 +9,6 @@ dmsc: dmsc@44083000 { compatible = "ti,am654-sci"; ti,host-id = <12>; - #address-cells = <1>; - #size-cells = <1>; - ranges; mbox-names = "rx", "tx"; -- cgit v1.2.3 From 9d3c9378f96a95f15881ee3373d2c2f773273fc2 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Mon, 10 May 2021 09:50:33 -0500 Subject: arm64: dts: ti: k3-*: Rename the TI-SCI node Lets rename the node name of TI-SCI node to be system-controller as it is a better standardized name for the function that TI-SCI plays in the SoC. Signed-off-by: Nishanth Menon Reviewed-by: Tero Kristo Link: https://lore.kernel.org/r/20210510145033.7426-5-nm@ti.com --- arch/arm64/boot/dts/ti/k3-am64-main.dtsi | 2 +- arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi | 2 +- arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi | 2 +- arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi index e1216073e3df..dc52178d9b64 100644 --- a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi @@ -134,7 +134,7 @@ }; }; - dmsc: dmsc@44043000 { + dmsc: system-controller@44043000 { compatible = "ti,k2g-sci"; ti,host-id = <12>; mbox-names = "rx", "tx"; diff --git a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi index 80d4df775f43..822f4cff1db4 100644 --- a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi @@ -6,7 +6,7 @@ */ &cbass_wakeup { - dmsc: dmsc@44083000 { + dmsc: system-controller@44083000 { compatible = "ti,am654-sci"; ti,host-id = <12>; diff --git a/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi index 9dba2df3569f..65f3fabda114 100644 --- a/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi @@ -6,7 +6,7 @@ */ &cbass_mcu_wakeup { - dmsc: dmsc@44083000 { + dmsc: system-controller@44083000 { compatible = "ti,k2g-sci"; ti,host-id = <12>; diff --git a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi index b83801feeb10..94e821467eaa 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi @@ -6,7 +6,7 @@ */ &cbass_mcu_wakeup { - dmsc: dmsc@44083000 { + dmsc: system-controller@44083000 { compatible = "ti,k2g-sci"; ti,host-id = <12>; -- cgit v1.2.3 From 9ecdb6d6b11434494af4bad11b03f0dcda1eebbd Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Mon, 10 May 2021 09:54:29 -0500 Subject: arm64: dts: ti: k3-am65|j721e|am64: Map the dma / navigator subsystem via explicit ranges Instead of using empty ranges property, lets map explicitly the address range that is mapped onto the dma / navigator subsystems (navss/dmss). This is also exposed via the dtbs_check with dt-schema newer than 2021.03 version by throwing out following: arch/arm64/boot/dts/ti/k3-am654-base-board.dt.yaml: bus@100000: main-navss: {'type': 'object'} is not allowed for {'compatible': ['simple-mfd'], '#address-cells': [[2]], ..... This has already been correctly done for J7200, however was missed for other k3 SoCs. Fix that oversight. Signed-off-by: Nishanth Menon Reviewed-by: Tero Kristo Acked-by: Vignesh Raghavendra Link: https://lore.kernel.org/r/20210510145429.8752-1-nm@ti.com --- arch/arm64/boot/dts/ti/k3-am64-main.dtsi | 4 ++-- arch/arm64/boot/dts/ti/k3-am65-main.dtsi | 4 ++-- arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi | 4 ++-- arch/arm64/boot/dts/ti/k3-j721e-main.dtsi | 4 ++-- arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi index dc52178d9b64..d5dc05d4cba6 100644 --- a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi @@ -42,12 +42,12 @@ }; }; - dmss: dmss { + dmss: bus@48000000 { compatible = "simple-mfd"; #address-cells = <2>; #size-cells = <2>; dma-ranges; - ranges; + ranges = <0x00 0x48000000 0x00 0x48000000 0x00 0x06400000>; ti,sci-dev-id = <25>; diff --git a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi index cb340d1b401f..e160f22a1518 100644 --- a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi @@ -444,11 +444,11 @@ ti,interrupt-ranges = <0 392 32>; }; - main-navss { + main_navss: bus@30800000 { compatible = "simple-mfd"; #address-cells = <2>; #size-cells = <2>; - ranges; + ranges = <0x0 0x30800000 0x0 0x30800000 0x0 0xbc00000>; dma-coherent; dma-ranges; diff --git a/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi b/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi index 0388c02c2203..f5b8ef2f5f77 100644 --- a/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi @@ -116,11 +116,11 @@ }; }; - mcu-navss { + mcu_navss: bus@28380000 { compatible = "simple-mfd"; #address-cells = <2>; #size-cells = <2>; - ranges; + ranges = <0x00 0x28380000 0x00 0x28380000 0x00 0x03880000>; dma-coherent; dma-ranges; diff --git a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi index c2aa45a3ac79..1a7b1cf7f794 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi @@ -87,11 +87,11 @@ ti,interrupt-ranges = <8 392 56>; }; - main-navss { + main_navss: bus@30000000 { compatible = "simple-mfd"; #address-cells = <2>; #size-cells = <2>; - ranges; + ranges = <0x00 0x30000000 0x00 0x30000000 0x00 0x0c400000>; dma-coherent; dma-ranges; diff --git a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi index 94e821467eaa..c85f98b81987 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi @@ -249,11 +249,11 @@ }; }; - mcu-navss { + mcu_navss: bus@28380000 { compatible = "simple-mfd"; #address-cells = <2>; #size-cells = <2>; - ranges; + ranges = <0x00 0x28380000 0x00 0x28380000 0x00 0x03880000>; dma-coherent; dma-ranges; -- cgit v1.2.3 From cab12badfc99f93c1dccf192dd150f94b687a27c Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Tue, 11 May 2021 14:48:21 -0500 Subject: arm64: dts: ti: k3*: Introduce reg definition for interrupt routers Interrupt routers are memory mapped peripherals, that are organized in our dts bus hierarchy to closely represents the actual hardware behavior. However, without explicitly calling out the reg property, using 2021.03+ dt-schema package, this exposes the following problem with dtbs_check: /arch/arm64/boot/dts/ti/k3-am654-base-board.dt.yaml: bus@100000: interrupt-controller0: {'type': 'object'} is not allowed for {'compatible': ['ti,sci-intr'], ..... Even though we don't use interrupt router directly via memory mapped registers and have to use it via the system controller, the hardware block is memory mapped, so describe the base address in device tree. This is a valid, comprehensive description of hardware and permitted by the existing ti,sci-intr schema. Reviewed-by: Tero Kristo Reviewed-by: Lokesh Vutla Signed-off-by: Nishanth Menon Link: https://lore.kernel.org/r/20210511194821.13919-1-nm@ti.com --- arch/arm64/boot/dts/ti/k3-am64-main.dtsi | 3 ++- arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi | 3 ++- arch/arm64/boot/dts/ti/k3-am65-main.dtsi | 6 ++++-- arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi | 3 ++- arch/arm64/boot/dts/ti/k3-j7200-main.dtsi | 6 ++++-- arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi | 3 ++- arch/arm64/boot/dts/ti/k3-j721e-main.dtsi | 6 ++++-- arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi | 3 ++- 8 files changed, 22 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi index d5dc05d4cba6..ca59d1f711f8 100644 --- a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi @@ -373,8 +373,9 @@ clocks = <&k3_clks 145 0>; }; - main_gpio_intr: interrupt-controller0 { + main_gpio_intr: interrupt-controller@a00000 { compatible = "ti,sci-intr"; + reg = <0x00 0x00a00000 0x00 0x800>; ti,intr-trigger-type = <1>; interrupt-controller; interrupt-parent = <&gic500>; diff --git a/arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi b/arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi index 99e94dee1bd4..deb19ae5e168 100644 --- a/arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi @@ -74,8 +74,9 @@ clocks = <&k3_clks 148 0>; }; - mcu_gpio_intr: interrupt-controller1 { + mcu_gpio_intr: interrupt-controller@4210000 { compatible = "ti,sci-intr"; + reg = <0x00 0x04210000 0x00 0x200>; ti,intr-trigger-type = <1>; interrupt-controller; interrupt-parent = <&gic500>; diff --git a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi index e160f22a1518..6cd3131eb9ff 100644 --- a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi @@ -433,8 +433,9 @@ #phy-cells = <0>; }; - intr_main_gpio: interrupt-controller0 { + intr_main_gpio: interrupt-controller@a00000 { compatible = "ti,sci-intr"; + reg = <0x0 0x00a00000 0x0 0x400>; ti,intr-trigger-type = <1>; interrupt-controller; interrupt-parent = <&gic500>; @@ -454,8 +455,9 @@ ti,sci-dev-id = <118>; - intr_main_navss: interrupt-controller1 { + intr_main_navss: interrupt-controller@310e0000 { compatible = "ti,sci-intr"; + reg = <0x0 0x310e0000 0x0 0x2000>; ti,intr-trigger-type = <4>; interrupt-controller; interrupt-parent = <&gic500>; diff --git a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi index 822f4cff1db4..7cb864b4d74a 100644 --- a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi @@ -69,8 +69,9 @@ power-domains = <&k3_pds 115 TI_SCI_PD_EXCLUSIVE>; }; - intr_wkup_gpio: interrupt-controller2 { + intr_wkup_gpio: interrupt-controller@42200000 { compatible = "ti,sci-intr"; + reg = <0x42200000 0x200>; ti,intr-trigger-type = <1>; interrupt-controller; interrupt-parent = <&gic500>; diff --git a/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi b/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi index a6826f1888ef..19fea8adbcff 100644 --- a/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi @@ -68,8 +68,9 @@ }; }; - main_gpio_intr: interrupt-controller0 { + main_gpio_intr: interrupt-controller@a00000 { compatible = "ti,sci-intr"; + reg = <0x00 0x00a00000 0x00 0x800>; ti,intr-trigger-type = <1>; interrupt-controller; interrupt-parent = <&gic500>; @@ -88,8 +89,9 @@ dma-coherent; dma-ranges; - main_navss_intr: interrupt-controller1 { + main_navss_intr: interrupt-controller@310e0000 { compatible = "ti,sci-intr"; + reg = <0x00 0x310e0000 0x00 0x4000>; ti,intr-trigger-type = <4>; interrupt-controller; interrupt-parent = <&gic500>; diff --git a/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi index 65f3fabda114..5663fe3ea466 100644 --- a/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi @@ -96,8 +96,9 @@ clock-names = "fclk"; }; - wkup_gpio_intr: interrupt-controller2 { + wkup_gpio_intr: interrupt-controller@42200000 { compatible = "ti,sci-intr"; + reg = <0x00 0x42200000 0x00 0x400>; ti,intr-trigger-type = <1>; interrupt-controller; interrupt-parent = <&gic500>; diff --git a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi index 1a7b1cf7f794..3bcafe4c1742 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi @@ -76,8 +76,9 @@ }; }; - main_gpio_intr: interrupt-controller0 { + main_gpio_intr: interrupt-controller@a00000 { compatible = "ti,sci-intr"; + reg = <0x00 0x00a00000 0x00 0x800>; ti,intr-trigger-type = <1>; interrupt-controller; interrupt-parent = <&gic500>; @@ -97,8 +98,9 @@ ti,sci-dev-id = <199>; - main_navss_intr: interrupt-controller1 { + main_navss_intr: interrupt-controller@310e0000 { compatible = "ti,sci-intr"; + reg = <0x0 0x310e0000 0x0 0x4000>; ti,intr-trigger-type = <4>; interrupt-controller; interrupt-parent = <&gic500>; diff --git a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi index c85f98b81987..5e825e4d0306 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi @@ -96,8 +96,9 @@ clock-names = "fclk"; }; - wkup_gpio_intr: interrupt-controller2 { + wkup_gpio_intr: interrupt-controller@42200000 { compatible = "ti,sci-intr"; + reg = <0x00 0x42200000 0x00 0x400>; ti,intr-trigger-type = <1>; interrupt-controller; interrupt-parent = <&gic500>; -- cgit v1.2.3 From 3c4e0147c269738a19c7d70cd32395600bcc0714 Mon Sep 17 00:00:00 2001 From: Maciej Falkowski Date: Thu, 1 Apr 2021 18:11:27 +0200 Subject: ARM: OMAP1: Fix use of possibly uninitialized irq variable The current control flow of IRQ number assignment to `irq` variable allows a request of IRQ of unspecified value, generating a warning under Clang compilation with omap1_defconfig on linux-next: arch/arm/mach-omap1/pm.c:656:11: warning: variable 'irq' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized] else if (cpu_is_omap16xx()) ^~~~~~~~~~~~~~~~~ ./arch/arm/mach-omap1/include/mach/soc.h:123:30: note: expanded from macro 'cpu_is_omap16xx' ^~~~~~~~~~~~~ arch/arm/mach-omap1/pm.c:658:18: note: uninitialized use occurs here if (request_irq(irq, omap_wakeup_interrupt, 0, "peripheral wakeup", ^~~ arch/arm/mach-omap1/pm.c:656:7: note: remove the 'if' if its condition is always true else if (cpu_is_omap16xx()) ^~~~~~~~~~~~~~~~~~~~~~ arch/arm/mach-omap1/pm.c:611:9: note: initialize the variable 'irq' to silence this warning int irq; ^ = 0 1 warning generated. The patch provides a default value to the `irq` variable along with a validity check. Signed-off-by: Maciej Falkowski Link: https://github.com/ClangBuiltLinux/linux/issues/1324 Signed-off-by: Tony Lindgren --- arch/arm/mach-omap1/pm.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-omap1/pm.c b/arch/arm/mach-omap1/pm.c index 2c1e2b32b9b3..a745d64d4699 100644 --- a/arch/arm/mach-omap1/pm.c +++ b/arch/arm/mach-omap1/pm.c @@ -655,9 +655,13 @@ static int __init omap_pm_init(void) irq = INT_7XX_WAKE_UP_REQ; else if (cpu_is_omap16xx()) irq = INT_1610_WAKE_UP_REQ; - if (request_irq(irq, omap_wakeup_interrupt, 0, "peripheral wakeup", - NULL)) - pr_err("Failed to request irq %d (peripheral wakeup)\n", irq); + else + irq = -1; + + if (irq >= 0) { + if (request_irq(irq, omap_wakeup_interrupt, 0, "peripheral wakeup", NULL)) + pr_err("Failed to request irq %d (peripheral wakeup)\n", irq); + } /* Program new power ramp-up time * (0 for most boards since we don't lower voltage when in deep sleep) -- cgit v1.2.3 From 7c302314f37b44595f180198fca5ca646bce4a5f Mon Sep 17 00:00:00 2001 From: Maciej Falkowski Date: Thu, 1 Apr 2021 18:20:32 +0200 Subject: ARM: OMAP1: isp1301-omap: Add missing gpiod_add_lookup_table function The gpiod table was added without any usage making it unused as reported by Clang compilation from omap1_defconfig on linux-next: arch/arm/mach-omap1/board-h2.c:347:34: warning: unused variable 'isp1301_gpiod_table' [-Wunused-variable] static struct gpiod_lookup_table isp1301_gpiod_table = { ^ 1 warning generated. The patch adds the missing gpiod_add_lookup_table() function. Signed-off-by: Maciej Falkowski Fixes: f3ef38160e3d ("usb: isp1301-omap: Convert to use GPIO descriptors") Link: https://github.com/ClangBuiltLinux/linux/issues/1325 Signed-off-by: Tony Lindgren --- arch/arm/mach-omap1/board-h2.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-omap1/board-h2.c b/arch/arm/mach-omap1/board-h2.c index c40cf5ef8607..977b0b744c22 100644 --- a/arch/arm/mach-omap1/board-h2.c +++ b/arch/arm/mach-omap1/board-h2.c @@ -320,7 +320,7 @@ static int tps_setup(struct i2c_client *client, void *context) { if (!IS_BUILTIN(CONFIG_TPS65010)) return -ENOSYS; - + tps65010_config_vregs1(TPS_LDO2_ENABLE | TPS_VLDO2_3_0V | TPS_LDO1_ENABLE | TPS_VLDO1_3_0V); @@ -394,6 +394,8 @@ static void __init h2_init(void) BUG_ON(gpio_request(H2_NAND_RB_GPIO_PIN, "NAND ready") < 0); gpio_direction_input(H2_NAND_RB_GPIO_PIN); + gpiod_add_lookup_table(&isp1301_gpiod_table); + omap_cfg_reg(L3_1610_FLASH_CS2B_OE); omap_cfg_reg(M8_1610_FLASH_CS2B_WE); -- cgit v1.2.3 From 040ab72ee10ea88e1883ad143b3e2b77596abc31 Mon Sep 17 00:00:00 2001 From: Yongqiang Liu Date: Thu, 1 Apr 2021 13:15:33 +0000 Subject: ARM: OMAP2+: Fix build warning when mmc_omap is not built GCC reports the following warning with W=1: arch/arm/mach-omap2/board-n8x0.c:325:19: warning: variable 'index' set but not used [-Wunused-but-set-variable] 325 | int bit, *openp, index; | ^~~~~ Fix this by moving CONFIG_MMC_OMAP to cover the rest codes in the n8x0_mmc_callback(). Signed-off-by: Yongqiang Liu Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/board-n8x0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c index 418a61ecb827..5e86145db0e2 100644 --- a/arch/arm/mach-omap2/board-n8x0.c +++ b/arch/arm/mach-omap2/board-n8x0.c @@ -322,6 +322,7 @@ static int n8x0_mmc_get_cover_state(struct device *dev, int slot) static void n8x0_mmc_callback(void *data, u8 card_mask) { +#ifdef CONFIG_MMC_OMAP int bit, *openp, index; if (board_is_n800()) { @@ -339,7 +340,6 @@ static void n8x0_mmc_callback(void *data, u8 card_mask) else *openp = 0; -#ifdef CONFIG_MMC_OMAP omap_mmc_notify_cover_event(mmc_device, index, *openp); #else pr_warn("MMC: notify cover event not available\n"); -- cgit v1.2.3 From dabea675faf16e8682aa478ff3ce65dd775620bc Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Thu, 8 Apr 2021 13:02:18 +0200 Subject: arm64: dts: ls1028a: fix memory node While enabling EDAC support for the LS1028A it was discovered that the memory node has a wrong endianness setting as well as a wrong interrupt assignment. Fix both. This was tested on a sl28 board. To force ECC errors, you can use the error injection supported by the controller in hardware (with CONFIG_EDAC_DEBUG enabled): # enable error injection $ echo 0x100 > /sys/devices/system/edac/mc/mc0/inject_ctrl # flip lowest bit of the data $ echo 0x1 > /sys/devices/system/edac/mc/mc0/inject_data_lo Fixes: 8897f3255c9c ("arm64: dts: Add support for NXP LS1028A SoC") Signed-off-by: Michael Walle Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi index eca06a0c3cf8..a30249ebffa8 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi @@ -197,8 +197,8 @@ ddr: memory-controller@1080000 { compatible = "fsl,qoriq-memory-controller"; reg = <0x0 0x1080000 0x0 0x1000>; - interrupts = ; - big-endian; + interrupts = ; + little-endian; }; dcfg: syscon@1e00000 { -- cgit v1.2.3 From e98d98028989e023e0cbff539dc616c4e5036839 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 7 May 2021 21:44:39 +0200 Subject: arm64: dts: zii-ultra: remove second GEN_3V3 regulator instance When adding the sound support a second instance of the GEN_3V3 regulator was added by accident. Remove it and point the consumers to the first instance. Fixes: 663a5b5efa51 ("arm64: dts: zii-ultra: add sound support") Signed-off-by: Lucas Stach Signed-off-by: Shawn Guo --- .../boot/dts/freescale/imx8mq-zii-ultra-rmb3.dts | 10 +++++----- arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi | 19 +++++-------------- 2 files changed, 10 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra-rmb3.dts b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra-rmb3.dts index 631e01c1b9fd..be1e7d6f0ecb 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra-rmb3.dts +++ b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra-rmb3.dts @@ -88,11 +88,11 @@ pinctrl-0 = <&pinctrl_codec2>; reg = <0x18>; #sound-dai-cells = <0>; - HPVDD-supply = <®_3p3v>; - SPRVDD-supply = <®_3p3v>; - SPLVDD-supply = <®_3p3v>; - AVDD-supply = <®_3p3v>; - IOVDD-supply = <®_3p3v>; + HPVDD-supply = <®_gen_3p3>; + SPRVDD-supply = <®_gen_3p3>; + SPLVDD-supply = <®_gen_3p3>; + AVDD-supply = <®_gen_3p3>; + IOVDD-supply = <®_gen_3p3>; DVDD-supply = <&vgen4_reg>; reset-gpios = <&gpio3 4 GPIO_ACTIVE_HIGH>; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi index 4dc8383478ee..1e5d34e81ab7 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi @@ -77,15 +77,6 @@ regulator-always-on; }; - reg_3p3v: regulator-3p3v { - compatible = "regulator-fixed"; - vin-supply = <®_3p3_main>; - regulator-name = "GEN_3V3"; - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; - regulator-always-on; - }; - reg_usdhc2_vmmc: regulator-vsd-3v3 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_reg_usdhc2>; @@ -415,11 +406,11 @@ pinctrl-0 = <&pinctrl_codec1>; reg = <0x18>; #sound-dai-cells = <0>; - HPVDD-supply = <®_3p3v>; - SPRVDD-supply = <®_3p3v>; - SPLVDD-supply = <®_3p3v>; - AVDD-supply = <®_3p3v>; - IOVDD-supply = <®_3p3v>; + HPVDD-supply = <®_gen_3p3>; + SPRVDD-supply = <®_gen_3p3>; + SPLVDD-supply = <®_gen_3p3>; + AVDD-supply = <®_gen_3p3>; + IOVDD-supply = <®_gen_3p3>; DVDD-supply = <&vgen4_reg>; reset-gpios = <&gpio3 3 GPIO_ACTIVE_LOW>; }; -- cgit v1.2.3 From ac0cbf9d13dccfd09bebc2f8f5697b6d3ffe27c4 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 7 May 2021 21:44:40 +0200 Subject: arm64: dts: zii-ultra: fix 12V_MAIN voltage As this is a fixed regulator on the board there was no harm in the wrong voltage being specified, apart from a confusing reporting to userspace. Fixes: 4a13b3bec3b4 ("arm64: dts: imx: add Zii Ultra board support") Signed-off-by: Lucas Stach Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi index 1e5d34e81ab7..a08a568c31d9 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi @@ -45,8 +45,8 @@ reg_12p0_main: regulator-12p0-main { compatible = "regulator-fixed"; regulator-name = "12V_MAIN"; - regulator-min-microvolt = <5000000>; - regulator-max-microvolt = <5000000>; + regulator-min-microvolt = <12000000>; + regulator-max-microvolt = <12000000>; regulator-always-on; }; -- cgit v1.2.3 From 779b56bb679767712761a79232331f8519402e75 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sat, 8 May 2021 13:03:19 -0300 Subject: ARM: imx: pm-imx27: Include "common.h" Since commit 879c0e5e0ac7 ("ARM: imx: Remove i.MX27 board files") the following W=1 build warning is seen: arch/arm/mach-imx/pm-imx27.c:40:13: warning: no previous prototype for 'imx27_pm_init' [-Wmissing-prototypes] Fix it by including the "common.h" header file, which contains the prototype for imx27_pm_init(). Fixes: 879c0e5e0ac7 ("ARM: imx: Remove i.MX27 board files") Reported-by: kernel test robot Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/mach-imx/pm-imx27.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/mach-imx/pm-imx27.c b/arch/arm/mach-imx/pm-imx27.c index 020e6deb67c8..237e8aa9fe83 100644 --- a/arch/arm/mach-imx/pm-imx27.c +++ b/arch/arm/mach-imx/pm-imx27.c @@ -12,6 +12,7 @@ #include #include +#include "common.h" #include "hardware.h" static int mx27_suspend_enter(suspend_state_t state) -- cgit v1.2.3 From 25201269c6ec3e9398426962ccdd55428261f7d0 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Fri, 14 May 2021 20:55:52 +0200 Subject: arm64: dts: freescale: sl28: var4: fix RGMII clock and voltage During hardware validation it was noticed that the clock isn't continuously enabled when there is no link. This is because the 125MHz clock is derived from the internal PLL which seems to go into some kind of power-down mode every once in a while. The LS1028A expects a contiuous clock. Thus enable the PLL all the time. Also, the RGMII pad voltage is wrong. It was configured to 2.5V (that is the VDDH regulator). The correct voltage is 1.8V, i.e. the VDDIO regulator. This fix is for the freescale/fsl-ls1028a-kontron-sl28-var4.dts. Fixes: 815364d0424e ("arm64: dts: freescale: add Kontron sl28 support") Signed-off-by: Michael Walle Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts index df212ed5bb94..e65d1c477e2c 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts @@ -31,11 +31,10 @@ reg = <0x4>; eee-broken-1000t; eee-broken-100tx; - qca,clk-out-frequency = <125000000>; qca,clk-out-strength = ; - - vddio-supply = <&vddh>; + qca,keep-pll-enabled; + vddio-supply = <&vddio>; vddio: vddio-regulator { regulator-name = "VDDIO"; -- cgit v1.2.3 From 52387bb9a4a75b88887383cb91d3995ae6f4044a Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Fri, 14 May 2021 20:55:53 +0200 Subject: arm64: dts: freescale: sl28: var1: fix RGMII clock and voltage During hardware validation it was noticed that the clock isn't continuously enabled when there is no link. This is because the 125MHz clock is derived from the internal PLL which seems to go into some kind of power-down mode every once in a while. The LS1028A expects a contiuous clock. Thus enable the PLL all the time. Also, the RGMII pad voltage is wrong, it was configured to 2.5V (that is the VDDH regulator). The correct voltage is 1.8V, i.e. the VDDIO regulator. This fix is for the freescale/fsl-ls1028a-kontron-sl28-var1.dts. Fixes: 642856097c18 ("arm64: dts: freescale: sl28: add variant 1") Signed-off-by: Michael Walle Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts index 6c309b97587d..e8d31279b7a3 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts @@ -46,7 +46,8 @@ eee-broken-100tx; qca,clk-out-frequency = <125000000>; qca,clk-out-strength = ; - vddio-supply = <&vddh>; + qca,keep-pll-enabled; + vddio-supply = <&vddio>; vddio: vddio-regulator { regulator-name = "VDDIO"; -- cgit v1.2.3 From 7c8f0338cdacc90fdf6468adafa8e27952987f00 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 20 May 2021 18:42:12 -0300 Subject: ARM: dts: imx7d-meerkat96: Fix the 'tuning-step' property According to Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.yaml, the correct name of the property is 'fsl,tuning-step'. Fix it accordingly. Signed-off-by: Fabio Estevam Fixes: ae7b3384b61b ("ARM: dts: Add support for 96Boards Meerkat96 board") Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx7d-meerkat96.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/imx7d-meerkat96.dts b/arch/arm/boot/dts/imx7d-meerkat96.dts index 5339210b63d0..dd8003bd1fc0 100644 --- a/arch/arm/boot/dts/imx7d-meerkat96.dts +++ b/arch/arm/boot/dts/imx7d-meerkat96.dts @@ -193,7 +193,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc1>; keep-power-in-suspend; - tuning-step = <2>; + fsl,tuning-step = <2>; vmmc-supply = <®_3p3v>; no-1-8-v; broken-cd; -- cgit v1.2.3 From 0e2fa4959c4f44815ce33e46e4054eeb0f346053 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 20 May 2021 18:42:13 -0300 Subject: ARM: dts: imx7d-pico: Fix the 'tuning-step' property According to Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.yaml, the correct name of the property is 'fsl,tuning-step'. Fix it accordingly. Signed-off-by: Fabio Estevam Fixes: f13f571ac8a1 ("ARM: dts: imx7d-pico: Extend peripherals support") Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx7d-pico.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/imx7d-pico.dtsi b/arch/arm/boot/dts/imx7d-pico.dtsi index e57da0d32b98..e519897fae08 100644 --- a/arch/arm/boot/dts/imx7d-pico.dtsi +++ b/arch/arm/boot/dts/imx7d-pico.dtsi @@ -351,7 +351,7 @@ pinctrl-2 = <&pinctrl_usdhc1_200mhz>; cd-gpios = <&gpio5 0 GPIO_ACTIVE_LOW>; bus-width = <4>; - tuning-step = <2>; + fsl,tuning-step = <2>; vmmc-supply = <®_3p3v>; wakeup-source; no-1-8-v; -- cgit v1.2.3 From b73eb6b3b91ff7d76cff5f8c7ab92fe0c51e3829 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 21 May 2021 09:54:07 +0200 Subject: ARM: dts: imx: emcon-avari: Fix nxp,pca8574 #gpio-cells According to the DT bindings, #gpio-cells must be two. Fixes: 63e71fedc07c4ece ("ARM: dts: Add support for emtrion emCON-MX6 series") Signed-off-by: Geert Uytterhoeven Reviewed-by: Laurent Pinchart Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6qdl-emcon-avari.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/imx6qdl-emcon-avari.dtsi b/arch/arm/boot/dts/imx6qdl-emcon-avari.dtsi index 828cf3e39784..c4e146f3341b 100644 --- a/arch/arm/boot/dts/imx6qdl-emcon-avari.dtsi +++ b/arch/arm/boot/dts/imx6qdl-emcon-avari.dtsi @@ -126,7 +126,7 @@ compatible = "nxp,pca8574"; reg = <0x3a>; gpio-controller; - #gpio-cells = <1>; + #gpio-cells = <2>; }; }; -- cgit v1.2.3 From bae989c4bc53f861cc1b706aab0194703e9907a8 Mon Sep 17 00:00:00 2001 From: Maciej Falkowski Date: Thu, 1 Apr 2021 18:04:34 +0200 Subject: ARM: OMAP1: ams-delta: remove unused function ams_delta_camera_power The ams_delta_camera_power() function is unused as reports Clang compilation with omap1_defconfig on linux-next: arch/arm/mach-omap1/board-ams-delta.c:462:12: warning: unused function 'ams_delta_camera_power' [-Wunused-function] static int ams_delta_camera_power(struct device *dev, int power) ^ 1 warning generated. The soc_camera support was dropped without removing ams_delta_camera_power() function, making it unused. Fixes: ce548396a433 ("media: mach-omap1: board-ams-delta.c: remove soc_camera dependencies") Signed-off-by: Maciej Falkowski Reviewed-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Signed-off-by: Tony Lindgren Link: https://github.com/ClangBuiltLinux/linux/issues/1326 --- arch/arm/mach-omap1/board-ams-delta.c | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c index 2ee527c00284..1026a816dcc0 100644 --- a/arch/arm/mach-omap1/board-ams-delta.c +++ b/arch/arm/mach-omap1/board-ams-delta.c @@ -458,20 +458,6 @@ static struct gpiod_lookup_table leds_gpio_table = { #ifdef CONFIG_LEDS_TRIGGERS DEFINE_LED_TRIGGER(ams_delta_camera_led_trigger); - -static int ams_delta_camera_power(struct device *dev, int power) -{ - /* - * turn on camera LED - */ - if (power) - led_trigger_event(ams_delta_camera_led_trigger, LED_FULL); - else - led_trigger_event(ams_delta_camera_led_trigger, LED_OFF); - return 0; -} -#else -#define ams_delta_camera_power NULL #endif static struct platform_device ams_delta_audio_device = { -- cgit v1.2.3 From 82123a3d1d5a306fdf50c968a474cc60fe43a80f Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 19 May 2021 16:17:17 +0530 Subject: powerpc/kprobes: Fix validation of prefixed instructions across page boundary When checking if the probed instruction is the suffix of a prefixed instruction, we access the instruction at the previous word. If the probed instruction is the very first word of a module, we can end up trying to access an invalid page. Fix this by skipping the check for all instructions at the beginning of a page. Prefixed instructions cannot cross a 64-byte boundary and as such, we don't expect to encounter a suffix as the very first word in a page for kernel text. Even if there are prefixed instructions crossing a page boundary (from a module, for instance), the instruction will be illegal, so preventing probing on the suffix of such prefix instructions isn't worthwhile. Fixes: b4657f7650ba ("powerpc/kprobes: Don't allow breakpoints on suffixes") Cc: stable@vger.kernel.org # v5.8+ Reported-by: Christophe Leroy Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/0df9a032a05576a2fa8e97d1b769af2ff0eafbd6.1621416666.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/kernel/kprobes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 01ab2163659e..e8c2a6373157 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -108,7 +108,6 @@ int arch_prepare_kprobe(struct kprobe *p) int ret = 0; struct kprobe *prev; struct ppc_inst insn = ppc_inst_read((struct ppc_inst *)p->addr); - struct ppc_inst prefix = ppc_inst_read((struct ppc_inst *)(p->addr - 1)); if ((unsigned long)p->addr & 0x03) { printk("Attempt to register kprobe at an unaligned address\n"); @@ -116,7 +115,8 @@ int arch_prepare_kprobe(struct kprobe *p) } else if (IS_MTMSRD(insn) || IS_RFID(insn) || IS_RFI(insn)) { printk("Cannot register a kprobe on rfi/rfid or mtmsr[d]\n"); ret = -EINVAL; - } else if (ppc_inst_prefixed(prefix)) { + } else if ((unsigned long)p->addr & ~PAGE_MASK && + ppc_inst_prefixed(ppc_inst_read((struct ppc_inst *)(p->addr - 1)))) { printk("Cannot register a kprobe on the second word of prefixed instruction\n"); ret = -EINVAL; } -- cgit v1.2.3 From 5362a4b6ee6136018558ef6b2c4701aa15ebc602 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 May 2021 22:00:05 +1000 Subject: powerpc: Fix reverse map real-mode address lookup with huge vmalloc real_vmalloc_addr() does not currently work for huge vmalloc, which is what the reverse map can be allocated with for radix host, hash guest. Extract the hugepage aware equivalent from eeh code into a helper, and convert existing sites including this one to use it. Fixes: 8abddd968a30 ("powerpc/64s/radix: Enable huge vmalloc mappings") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210526120005.3432222-1-npiggin@gmail.com --- arch/powerpc/include/asm/pte-walk.h | 29 +++++++++++++++++++++++++++++ arch/powerpc/kernel/eeh.c | 23 +---------------------- arch/powerpc/kernel/io-workarounds.c | 16 +++------------- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 15 ++------------- 4 files changed, 35 insertions(+), 48 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/pte-walk.h b/arch/powerpc/include/asm/pte-walk.h index 33fa5dd8ee6a..714a35f0d425 100644 --- a/arch/powerpc/include/asm/pte-walk.h +++ b/arch/powerpc/include/asm/pte-walk.h @@ -31,6 +31,35 @@ static inline pte_t *find_init_mm_pte(unsigned long ea, unsigned *hshift) pgd_t *pgdir = init_mm.pgd; return __find_linux_pte(pgdir, ea, NULL, hshift); } + +/* + * Convert a kernel vmap virtual address (vmalloc or ioremap space) to a + * physical address, without taking locks. This can be used in real-mode. + */ +static inline phys_addr_t ppc_find_vmap_phys(unsigned long addr) +{ + pte_t *ptep; + phys_addr_t pa; + int hugepage_shift; + + /* + * init_mm does not free page tables, and does not do THP. It may + * have huge pages from huge vmalloc / ioremap etc. + */ + ptep = find_init_mm_pte(addr, &hugepage_shift); + if (WARN_ON(!ptep)) + return 0; + + pa = PFN_PHYS(pte_pfn(*ptep)); + + if (!hugepage_shift) + hugepage_shift = PAGE_SHIFT; + + pa |= addr & ((1ul << hugepage_shift) - 1); + + return pa; +} + /* * This is what we should always use. Any other lockless page table lookup needs * careful audit against THP split. diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index f24cd53ff26e..3bbdcc86d01b 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -346,28 +346,7 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity) */ static inline unsigned long eeh_token_to_phys(unsigned long token) { - pte_t *ptep; - unsigned long pa; - int hugepage_shift; - - /* - * We won't find hugepages here(this is iomem). Hence we are not - * worried about _PAGE_SPLITTING/collapse. Also we will not hit - * page table free, because of init_mm. - */ - ptep = find_init_mm_pte(token, &hugepage_shift); - if (!ptep) - return token; - - pa = pte_pfn(*ptep); - - /* On radix we can do hugepage mappings for io, so handle that */ - if (!hugepage_shift) - hugepage_shift = PAGE_SHIFT; - - pa <<= PAGE_SHIFT; - pa |= token & ((1ul << hugepage_shift) - 1); - return pa; + return ppc_find_vmap_phys(token); } /* diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c index 51bbaae94ccc..c877f074d174 100644 --- a/arch/powerpc/kernel/io-workarounds.c +++ b/arch/powerpc/kernel/io-workarounds.c @@ -55,7 +55,6 @@ static struct iowa_bus *iowa_pci_find(unsigned long vaddr, unsigned long paddr) #ifdef CONFIG_PPC_INDIRECT_MMIO struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) { - unsigned hugepage_shift; struct iowa_bus *bus; int token; @@ -65,22 +64,13 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) bus = &iowa_busses[token - 1]; else { unsigned long vaddr, paddr; - pte_t *ptep; vaddr = (unsigned long)PCI_FIX_ADDR(addr); if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END) return NULL; - /* - * We won't find huge pages here (iomem). Also can't hit - * a page table free due to init_mm - */ - ptep = find_init_mm_pte(vaddr, &hugepage_shift); - if (ptep == NULL) - paddr = 0; - else { - WARN_ON(hugepage_shift); - paddr = pte_pfn(*ptep) << PAGE_SHIFT; - } + + paddr = ppc_find_vmap_phys(vaddr); + bus = iowa_pci_find(vaddr, paddr); if (bus == NULL) diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 7af7c70f1468..7a0f12404e0e 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -23,20 +23,9 @@ #include /* Translate address of a vmalloc'd thing to a linear map address */ -static void *real_vmalloc_addr(void *x) +static void *real_vmalloc_addr(void *addr) { - unsigned long addr = (unsigned long) x; - pte_t *p; - /* - * assume we don't have huge pages in vmalloc space... - * So don't worry about THP collapse/split. Called - * Only in realmode with MSR_EE = 0, hence won't need irq_save/restore. - */ - p = find_init_mm_pte(addr, NULL); - if (!p || !pte_present(*p)) - return NULL; - addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK); - return __va(addr); + return __va(ppc_find_vmap_phys((unsigned long)addr)); } /* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */ -- cgit v1.2.3 From 1438709e6328925ef496dafd467dbd0353137434 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 May 2021 22:58:51 +1000 Subject: KVM: PPC: Book3S HV: Save host FSCR in the P7/8 path Similar to commit 25edcc50d76c ("KVM: PPC: Book3S HV: Save and restore FSCR in the P9 path"), ensure the P7/8 path saves and restores the host FSCR. The logic explained in that patch actually applies there to the old path well: a context switch can be made before kvmppc_vcpu_run_hv restores the host FSCR and returns. Now both the p9 and the p7/8 paths now save and restore their FSCR, it no longer needs to be restored at the end of kvmppc_vcpu_run_hv Fixes: b005255e12a3 ("KVM: PPC: Book3S HV: Context-switch new POWER8 SPRs") Cc: stable@vger.kernel.org # v3.14+ Signed-off-by: Nicholas Piggin Reviewed-by: Fabiano Rosas Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210526125851.3436735-1-npiggin@gmail.com --- arch/powerpc/kvm/book3s_hv.c | 1 - arch/powerpc/kvm/book3s_hv_rmhandlers.S | 7 +++++++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 28a80d240b76..13728495ac66 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4455,7 +4455,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) mtspr(SPRN_EBBRR, ebb_regs[1]); mtspr(SPRN_BESCR, ebb_regs[2]); mtspr(SPRN_TAR, user_tar); - mtspr(SPRN_FSCR, current->thread.fscr); } mtspr(SPRN_VRSAVE, user_vrsave); diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 5e634db4809b..004f0d4e665f 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -59,6 +59,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) #define STACK_SLOT_UAMOR (SFS-88) #define STACK_SLOT_DAWR1 (SFS-96) #define STACK_SLOT_DAWRX1 (SFS-104) +#define STACK_SLOT_FSCR (SFS-112) /* the following is used by the P9 short path */ #define STACK_SLOT_NVGPRS (SFS-152) /* 18 gprs */ @@ -686,6 +687,8 @@ BEGIN_FTR_SECTION std r6, STACK_SLOT_DAWR0(r1) std r7, STACK_SLOT_DAWRX0(r1) std r8, STACK_SLOT_IAMR(r1) + mfspr r5, SPRN_FSCR + std r5, STACK_SLOT_FSCR(r1) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) BEGIN_FTR_SECTION mfspr r6, SPRN_DAWR1 @@ -1663,6 +1666,10 @@ FTR_SECTION_ELSE ld r7, STACK_SLOT_HFSCR(r1) mtspr SPRN_HFSCR, r7 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) +BEGIN_FTR_SECTION + ld r5, STACK_SLOT_FSCR(r1) + mtspr SPRN_FSCR, r5 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) /* * Restore various registers to 0, where non-zero values * set by the guest could disrupt the host. -- cgit v1.2.3 From 7d65f9e80646c595e8c853640a9d0768a33e204c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 May 2021 13:08:41 +0200 Subject: x86/apic: Mark _all_ legacy interrupts when IO/APIC is missing PIC interrupts do not support affinity setting and they can end up on any online CPU. Therefore, it's required to mark the associated vectors as system-wide reserved. Otherwise, the corresponding irq descriptors are copied to the secondary CPUs but the vectors are not marked as assigned or reserved. This works correctly for the IO/APIC case. When the IO/APIC is disabled via config, kernel command line or lack of enumeration then all legacy interrupts are routed through the PIC, but nothing marks them as system-wide reserved vectors. As a consequence, a subsequent allocation on a secondary CPU can result in allocating one of these vectors, which triggers the BUG() in apic_update_vector() because the interrupt descriptor slot is not empty. Imran tried to work around that by marking those interrupts as allocated when a CPU comes online. But that's wrong in case that the IO/APIC is available and one of the legacy interrupts, e.g. IRQ0, has been switched to PIC mode because then marking them as allocated will fail as they are already marked as system vectors. Stay consistent and update the legacy vectors after attempting IO/APIC initialization and mark them as system vectors in case that no IO/APIC is available. Fixes: 69cde0004a4b ("x86/vector: Use matrix allocator for vector assignment") Reported-by: Imran Khan Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20210519233928.2157496-1-imran.f.khan@oracle.com --- arch/x86/include/asm/apic.h | 1 + arch/x86/kernel/apic/apic.c | 1 + arch/x86/kernel/apic/vector.c | 20 ++++++++++++++++++++ 3 files changed, 22 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 412b51e059c8..48067af94678 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -174,6 +174,7 @@ static inline int apic_is_clustered_box(void) extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask); extern void lapic_assign_system_vectors(void); extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace); +extern void lapic_update_legacy_vectors(void); extern void lapic_online(void); extern void lapic_offline(void); extern bool apic_needs_pit(void); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 4a39fb429f15..d262811ce14b 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2604,6 +2604,7 @@ static void __init apic_bsp_setup(bool upmode) end_local_APIC_setup(); irq_remap_enable_fault_handling(); setup_IO_APIC(); + lapic_update_legacy_vectors(); } #ifdef CONFIG_UP_LATE_INIT diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 6dbdc7c22bb7..fb67ed5e7e6a 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -738,6 +738,26 @@ void lapic_assign_legacy_vector(unsigned int irq, bool replace) irq_matrix_assign_system(vector_matrix, ISA_IRQ_VECTOR(irq), replace); } +void __init lapic_update_legacy_vectors(void) +{ + unsigned int i; + + if (IS_ENABLED(CONFIG_X86_IO_APIC) && nr_ioapics > 0) + return; + + /* + * If the IO/APIC is disabled via config, kernel command line or + * lack of enumeration then all legacy interrupts are routed + * through the PIC. Make sure that they are marked as legacy + * vectors. PIC_CASCADE_IRQ has already been marked in + * lapic_assign_system_vectors(). + */ + for (i = 0; i < nr_legacy_irqs(); i++) { + if (i != PIC_CASCADE_IR) + lapic_assign_legacy_vector(i, true); + } +} + void __init lapic_assign_system_vectors(void) { unsigned int i, vector = 0; -- cgit v1.2.3 From ec3a5cb61146c91f0f7dcec8b7e7157a4879a9ee Mon Sep 17 00:00:00 2001 From: Khem Raj Date: Fri, 14 May 2021 14:37:41 -0700 Subject: riscv: Use -mno-relax when using lld linker lld does not implement the RISCV relaxation optimizations like GNU ld therefore disable it when building with lld, Also pass it to assembler when using external GNU assembler ( LLVM_IAS != 1 ), this ensures that relevant assembler option is also enabled along. if these options are not used then we see following relocations in objects 0000000000000000 R_RISCV_ALIGN *ABS*+0x0000000000000002 These are then rejected by lld ld.lld: error: capability.c:(.fixup+0x0): relocation R_RISCV_ALIGN requires unimplemented linker relaxation; recompile with -mno-relax but the .o is already compiled with -mno-relax Signed-off-by: Khem Raj Reviewed-by: Nathan Chancellor Signed-off-by: Palmer Dabbelt --- arch/riscv/Makefile | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 3eb9590a0775..4be020695428 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -38,6 +38,15 @@ else KBUILD_LDFLAGS += -melf32lriscv endif +ifeq ($(CONFIG_LD_IS_LLD),y) + KBUILD_CFLAGS += -mno-relax + KBUILD_AFLAGS += -mno-relax +ifneq ($(LLVM_IAS),1) + KBUILD_CFLAGS += -Wa,-mno-relax + KBUILD_AFLAGS += -Wa,-mno-relax +endif +endif + # ISA string setting riscv-march-$(CONFIG_ARCH_RV32I) := rv32ima riscv-march-$(CONFIG_ARCH_RV64I) := rv64ima -- cgit v1.2.3 From 4cce442ffe5448ef572adc8b3abe7001b398e709 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Thu, 29 Apr 2021 10:38:23 +0200 Subject: arm64: meson: select COMMON_CLK This fix the recent removal of clock drivers selection. While it is not necessary to select the clock drivers themselves, we need to select a proper implementation of the clock API, which for the meson, is CCF Fixes: ba66a25536dd ("arm64: meson: ship only the necessary clock controllers") Reviewed-by: Neil Armstrong Signed-off-by: Jerome Brunet Reviewed-by: Martin Blumenstingl Signed-off-by: Kevin Hilman Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20210429083823.59546-1-jbrunet@baylibre.com --- arch/arm64/Kconfig.platforms | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index 6409b47b73e4..7336c1fd0dda 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -165,6 +165,7 @@ config ARCH_MEDIATEK config ARCH_MESON bool "Amlogic Platforms" + select COMMON_CLK select MESON_IRQ_GPIO help This enables support for the arm64 based Amlogic SoCs -- cgit v1.2.3 From 4a0e3ff30980b7601b13dd3b7ee275212b852843 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 26 May 2021 06:58:47 -0700 Subject: perf/x86/intel/uncore: Fix a kernel WARNING triggered by maxcpus=1 A kernel WARNING may be triggered when setting maxcpus=1. The uncore counters are Die-scope. When probing a PCI device, only the BUS information can be retrieved. The uncore driver has to maintain a mapping table used to calculate the logical Die ID from a given BUS#. Before the patch ba9506be4e40, the mapping table stores the mapping information from the BUS# -> a Physical Socket ID. To calculate the logical die ID, perf does, - In snbep_pci2phy_map_init(), retrieve the BUS# -> a Physical Socket ID from the UBOX PCI configure space. - Calculate the mapping information (a BUS# -> a Physical Socket ID) for the other PCI BUS. - In the uncore_pci_probe(), get the physical Socket ID from a given BUS and the mapping table. - Calculate the logical Die ID Since only the logical Die ID is required, with the patch ba9506be4e40, the mapping table stores the mapping information from the BUS# -> a logical Die ID. Now perf does, - In snbep_pci2phy_map_init(), retrieve the BUS# -> a Physical Socket ID from the UBOX PCI configure space. - Calculate the logical Die ID - Calculate the mapping information (a BUS# -> a logical Die ID) for the other PCI BUS. - In the uncore_pci_probe(), get the logical die ID from a given BUS and the mapping table. When calculating the logical Die ID, -1 may be returned, especially when maxcpus=1. Here, -1 means the logical Die ID is not found. But when calculating the mapping information for the other PCI BUS, -1 indicates that it's the other PCI BUS that requires the calculation of the mapping. The driver will mistakenly do the calculation. Uses the -ENODEV to indicate the case which the logical Die ID is not found. The driver will not mess up the mapping table anymore. Fixes: ba9506be4e40 ("perf/x86/intel/uncore: Store the logical die id instead of the physical die id.") Reported-by: John Donnelly Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Acked-by: John Donnelly Tested-by: John Donnelly Link: https://lkml.kernel.org/r/1622037527-156028-1-git-send-email-kan.liang@linux.intel.com --- arch/x86/events/intel/uncore_snbep.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 63f097289a84..1587d3289743 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1406,6 +1406,8 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool die_id = i; else die_id = topology_phys_to_logical_pkg(i); + if (die_id < 0) + die_id = -ENODEV; map->pbus_to_dieid[bus] = die_id; break; } @@ -1452,14 +1454,14 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool i = -1; if (reverse) { for (bus = 255; bus >= 0; bus--) { - if (map->pbus_to_dieid[bus] >= 0) + if (map->pbus_to_dieid[bus] != -1) i = map->pbus_to_dieid[bus]; else map->pbus_to_dieid[bus] = i; } } else { for (bus = 0; bus <= 255; bus++) { - if (map->pbus_to_dieid[bus] >= 0) + if (map->pbus_to_dieid[bus] != -1) i = map->pbus_to_dieid[bus]; else map->pbus_to_dieid[bus] = i; -- cgit v1.2.3 From 9a90ed065a155d13db0d0ffeaad5cc54e51c90c6 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 27 May 2021 11:02:26 +0200 Subject: x86/thermal: Fix LVT thermal setup for SMI delivery mode There are machines out there with added value crap^WBIOS which provide an SMI handler for the local APIC thermal sensor interrupt. Out of reset, the BSP on those machines has something like 0x200 in that APIC register (timestamps left in because this whole issue is timing sensitive): [ 0.033858] read lvtthmr: 0x330, val: 0x200 which means: - bit 16 - the interrupt mask bit is clear and thus that interrupt is enabled - bits [10:8] have 010b which means SMI delivery mode. Now, later during boot, when the kernel programs the local APIC, it soft-disables it temporarily through the spurious vector register: setup_local_APIC: ... /* * If this comes from kexec/kcrash the APIC might be enabled in * SPIV. Soft disable it before doing further initialization. */ value = apic_read(APIC_SPIV); value &= ~APIC_SPIV_APIC_ENABLED; apic_write(APIC_SPIV, value); which means (from the SDM): "10.4.7.2 Local APIC State After It Has Been Software Disabled ... * The mask bits for all the LVT entries are set. Attempts to reset these bits will be ignored." And this happens too: [ 0.124111] APIC: Switch to symmetric I/O mode setup [ 0.124117] lvtthmr 0x200 before write 0xf to APIC 0xf0 [ 0.124118] lvtthmr 0x10200 after write 0xf to APIC 0xf0 This results in CPU 0 soft lockups depending on the placement in time when the APIC soft-disable happens. Those soft lockups are not 100% reproducible and the reason for that can only be speculated as no one tells you what SMM does. Likely, it confuses the SMM code that the APIC is disabled and the thermal interrupt doesn't doesn't fire at all, leading to CPU 0 stuck in SMM forever... Now, before 4f432e8bb15b ("x86/mce: Get rid of mcheck_intel_therm_init()") due to how the APIC_LVTTHMR was read before APIC initialization in mcheck_intel_therm_init(), it would read the value with the mask bit 16 clear and then intel_init_thermal() would replicate it onto the APs and all would be peachy - the thermal interrupt would remain enabled. But that commit moved that reading to a later moment in intel_init_thermal(), resulting in reading APIC_LVTTHMR on the BSP too late and with its interrupt mask bit set. Thus, revert back to the old behavior of reading the thermal LVT register before the APIC gets initialized. Fixes: 4f432e8bb15b ("x86/mce: Get rid of mcheck_intel_therm_init()") Reported-by: James Feeney Signed-off-by: Borislav Petkov Cc: Cc: Zhang Rui Cc: Srinivas Pandruvada Link: https://lkml.kernel.org/r/YKIqDdFNaXYd39wz@zn.tnic --- arch/x86/include/asm/thermal.h | 4 +++- arch/x86/kernel/setup.c | 9 +++++++++ drivers/thermal/intel/therm_throt.c | 15 +++++++++++---- 3 files changed, 23 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/thermal.h b/arch/x86/include/asm/thermal.h index ddbdefd5b94f..91a7b6687c3b 100644 --- a/arch/x86/include/asm/thermal.h +++ b/arch/x86/include/asm/thermal.h @@ -3,11 +3,13 @@ #define _ASM_X86_THERMAL_H #ifdef CONFIG_X86_THERMAL_VECTOR +void therm_lvt_init(void); void intel_init_thermal(struct cpuinfo_x86 *c); bool x86_thermal_enabled(void); void intel_thermal_interrupt(void); #else -static inline void intel_init_thermal(struct cpuinfo_x86 *c) { } +static inline void therm_lvt_init(void) { } +static inline void intel_init_thermal(struct cpuinfo_x86 *c) { } #endif #endif /* _ASM_X86_THERMAL_H */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 72920af0b3c0..ff653d608d5f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -1226,6 +1227,14 @@ void __init setup_arch(char **cmdline_p) x86_init.timers.wallclock_init(); + /* + * This needs to run before setup_local_APIC() which soft-disables the + * local APIC temporarily and that masks the thermal LVT interrupt, + * leading to softlockups on machines which have configured SMI + * interrupt delivery. + */ + therm_lvt_init(); + mcheck_init(); register_refined_jiffies(CLOCK_TICK_RATE); diff --git a/drivers/thermal/intel/therm_throt.c b/drivers/thermal/intel/therm_throt.c index f8e882592ba5..99abdc03c44c 100644 --- a/drivers/thermal/intel/therm_throt.c +++ b/drivers/thermal/intel/therm_throt.c @@ -621,6 +621,17 @@ bool x86_thermal_enabled(void) return atomic_read(&therm_throt_en); } +void __init therm_lvt_init(void) +{ + /* + * This function is only called on boot CPU. Save the init thermal + * LVT value on BSP and use that value to restore APs' thermal LVT + * entry BIOS programmed later + */ + if (intel_thermal_supported(&boot_cpu_data)) + lvtthmr_init = apic_read(APIC_LVTTHMR); +} + void intel_init_thermal(struct cpuinfo_x86 *c) { unsigned int cpu = smp_processor_id(); @@ -630,10 +641,6 @@ void intel_init_thermal(struct cpuinfo_x86 *c) if (!intel_thermal_supported(c)) return; - /* On the BSP? */ - if (c == &boot_cpu_data) - lvtthmr_init = apic_read(APIC_LVTTHMR); - /* * First check if its enabled already, in which case there might * be some SMM goo which handles it, so we can't even put a handler -- cgit v1.2.3 From 59cc84c802eb923805e7bba425976a3df5ce35d8 Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Wed, 26 May 2021 16:45:40 +0200 Subject: Revert "powerpc/kernel/iommu: Align size for IOMMU_PAGE_SIZE() to save TCEs" This reverts commit 3c0468d4451eb6b4f6604370639f163f9637a479. That commit was breaking alignment guarantees for the DMA address when allocating coherent mappings, as described in Documentation/core-api/dma-api-howto.rst It was also noticed by Mellanox' driver: [ 1515.763621] mlx5_core c002:01:00.0: mlx5_frag_buf_alloc_node:146:(pid 13402): unexpected map alignment: 0x0800000000c61000, page_shift=16 [ 1515.763635] mlx5_core c002:01:00.0: mlx5_cqwq_create:181:(pid 13402): mlx5_frag_buf_alloc_node() failed, -12 Fixes: 3c0468d4451e ("powerpc/kernel/iommu: Align size for IOMMU_PAGE_SIZE() to save TCEs") Signed-off-by: Frederic Barrat Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210526144540.117795-1-fbarrat@linux.ibm.com --- arch/powerpc/kernel/iommu.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 57d6b85e9b96..2af89a5e379f 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -898,7 +898,6 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, unsigned int order; unsigned int nio_pages, io_order; struct page *page; - size_t size_io = size; size = PAGE_ALIGN(size); order = get_order(size); @@ -925,9 +924,8 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, memset(ret, 0, size); /* Set up tces to cover the allocated range */ - size_io = IOMMU_PAGE_ALIGN(size_io, tbl); - nio_pages = size_io >> tbl->it_page_shift; - io_order = get_iommu_order(size_io, tbl); + nio_pages = size >> tbl->it_page_shift; + io_order = get_iommu_order(size, tbl); mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL, mask >> tbl->it_page_shift, io_order, 0); if (mapping == DMA_MAPPING_ERROR) { @@ -942,9 +940,10 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size, void *vaddr, dma_addr_t dma_handle) { if (tbl) { - size_t size_io = IOMMU_PAGE_ALIGN(size, tbl); - unsigned int nio_pages = size_io >> tbl->it_page_shift; + unsigned int nio_pages; + size = PAGE_ALIGN(size); + nio_pages = size >> tbl->it_page_shift; iommu_free(tbl, dma_handle, nio_pages); size = PAGE_ALIGN(size); free_pages((unsigned long)vaddr, get_order(size)); -- cgit v1.2.3 From 848ff3768684701a4ce73a2ec0e5d438d4e2b0da Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 1 Jun 2021 06:09:03 -0700 Subject: perf/x86/intel/uncore: Fix M2M event umask for Ice Lake server Perf tool errors out with the latest event list for the Ice Lake server. event syntax error: 'unc_m2m_imc_reads.to_pmm' \___ value too big for format, maximum is 255 The same as the Snow Ridge server, the M2M uncore unit in the Ice Lake server has the unit mask extension field as well. Fixes: 2b3b76b5ec67 ("perf/x86/intel/uncore: Add Ice Lake server uncore support") Reported-by: Jin Yao Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1622552943-119174-1-git-send-email-kan.liang@linux.intel.com --- arch/x86/events/intel/uncore_snbep.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 1587d3289743..3a75a2c601c2 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -5099,9 +5099,10 @@ static struct intel_uncore_type icx_uncore_m2m = { .perf_ctr = SNR_M2M_PCI_PMON_CTR0, .event_ctl = SNR_M2M_PCI_PMON_CTL0, .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .event_mask_ext = SNR_M2M_PCI_PMON_UMASK_EXT, .box_ctl = SNR_M2M_PCI_PMON_BOX_CTL, .ops = &snr_m2m_uncore_pci_ops, - .format_group = &skx_uncore_format_group, + .format_group = &snr_m2m_uncore_format_group, }; static struct attribute *icx_upi_uncore_formats_attr[] = { -- cgit v1.2.3 From 8a4102a0cf07cc76a18f373f6b49485258cc6af4 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Sun, 16 May 2021 17:00:38 +0800 Subject: riscv: mm: Fix W+X mappings at boot When the kernel mapping was moved the last 2GB of the address space, (__va(PFN_PHYS(max_low_pfn))) is much smaller than the .data section start address, the last set_memory_nx() in protect_kernel_text_data() will fail, thus the .data section is still mapped as W+X. This results in below W+X mapping waring at boot. Fix it by passing the correct .data section page num to the set_memory_nx(). [ 0.396516] ------------[ cut here ]------------ [ 0.396889] riscv/mm: Found insecure W+X mapping at address (____ptrval____)/0xffffffff80c00000 [ 0.398347] WARNING: CPU: 0 PID: 1 at arch/riscv/mm/ptdump.c:258 note_page+0x244/0x24a [ 0.398964] Modules linked in: [ 0.399459] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.13.0-rc1+ #14 [ 0.400003] Hardware name: riscv-virtio,qemu (DT) [ 0.400591] epc : note_page+0x244/0x24a [ 0.401368] ra : note_page+0x244/0x24a [ 0.401772] epc : ffffffff80007c86 ra : ffffffff80007c86 sp : ffffffe000e7bc30 [ 0.402304] gp : ffffffff80caae88 tp : ffffffe000e70000 t0 : ffffffff80cb80cf [ 0.402800] t1 : ffffffff80cb80c0 t2 : 0000000000000000 s0 : ffffffe000e7bc80 [ 0.403310] s1 : ffffffe000e7bde8 a0 : 0000000000000053 a1 : ffffffff80c83ff0 [ 0.403805] a2 : 0000000000000010 a3 : 0000000000000000 a4 : 6c7e7a5137233100 [ 0.404298] a5 : 6c7e7a5137233100 a6 : 0000000000000030 a7 : ffffffffffffffff [ 0.404849] s2 : ffffffff80e00000 s3 : 0000000040000000 s4 : 0000000000000000 [ 0.405393] s5 : 0000000000000000 s6 : 0000000000000003 s7 : ffffffe000e7bd48 [ 0.405935] s8 : ffffffff81000000 s9 : ffffffffc0000000 s10: ffffffe000e7bd48 [ 0.406476] s11: 0000000000001000 t3 : 0000000000000072 t4 : ffffffffffffffff [ 0.407016] t5 : 0000000000000002 t6 : ffffffe000e7b978 [ 0.407435] status: 0000000000000120 badaddr: 0000000000000000 cause: 0000000000000003 [ 0.408052] Call Trace: [ 0.408343] [] note_page+0x244/0x24a [ 0.408855] [] ptdump_hole+0x14/0x1e [ 0.409263] [] walk_pgd_range+0x2a0/0x376 [ 0.409690] [] walk_page_range_novma+0x4e/0x6e [ 0.410146] [] ptdump_walk_pgd+0x48/0x78 [ 0.410570] [] ptdump_check_wx+0xb4/0xf8 [ 0.410990] [] mark_rodata_ro+0x26/0x2e [ 0.411407] [] kernel_init+0x44/0x108 [ 0.411814] [] ret_from_exception+0x0/0xc [ 0.412309] ---[ end trace 7ec3459f2547ea83 ]--- [ 0.413141] Checked W+X mappings: failed, 512 W+X pages found Fixes: 2bfc6cd81bd17e43 ("riscv: Move kernel mapping outside of linear mapping") Signed-off-by: Jisheng Zhang Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 4faf8bd157ea..4c4c92ce0bb8 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -746,14 +746,18 @@ void __init protect_kernel_text_data(void) unsigned long init_data_start = (unsigned long)__init_data_begin; unsigned long rodata_start = (unsigned long)__start_rodata; unsigned long data_start = (unsigned long)_data; - unsigned long max_low = (unsigned long)(__va(PFN_PHYS(max_low_pfn))); +#if defined(CONFIG_64BIT) && defined(CONFIG_MMU) + unsigned long end_va = kernel_virt_addr + load_sz; +#else + unsigned long end_va = (unsigned long)(__va(PFN_PHYS(max_low_pfn))); +#endif set_memory_ro(text_start, (init_text_start - text_start) >> PAGE_SHIFT); set_memory_ro(init_text_start, (init_data_start - init_text_start) >> PAGE_SHIFT); set_memory_nx(init_data_start, (rodata_start - init_data_start) >> PAGE_SHIFT); /* rodata section is marked readonly in mark_rodata_ro */ set_memory_nx(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT); - set_memory_nx(data_start, (max_low - data_start) >> PAGE_SHIFT); + set_memory_nx(data_start, (end_va - data_start) >> PAGE_SHIFT); } void mark_rodata_ro(void) -- cgit v1.2.3 From b75db25c416b9f0edae7cd86c4901c216a52e7a0 Mon Sep 17 00:00:00 2001 From: Vincent Date: Sat, 22 May 2021 07:40:15 +0800 Subject: riscv: skip errata_cip_453.o if CONFIG_ERRATA_SIFIVE_CIP_453 is disabled The errata_cip_453.o should be built only when the Kconfig CONFIG_ERRATA_SIFIVE_CIP_453 is enabled. Reported-by: kernel test robot Signed-off-by: Vincent Fixes: 0e0d4992517f ("riscv: enable SiFive errata CIP-453 and CIP-1200 Kconfig only if CONFIG_64BIT=y") Signed-off-by: Palmer Dabbelt --- arch/riscv/errata/sifive/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/errata/sifive/Makefile b/arch/riscv/errata/sifive/Makefile index bdd5fc843b8e..2fde48db0619 100644 --- a/arch/riscv/errata/sifive/Makefile +++ b/arch/riscv/errata/sifive/Makefile @@ -1,2 +1,2 @@ -obj-y += errata_cip_453.o +obj-$(CONFIG_ERRATA_SIFIVE_CIP_453) += errata_cip_453.o obj-y += errata.o -- cgit v1.2.3 From da2d48808fbd1eddefefe245c6c0e92a9195df8b Mon Sep 17 00:00:00 2001 From: Wende Tan Date: Sat, 22 May 2021 17:49:51 +0000 Subject: RISC-V: Fix memblock_free() usages in init_resources() `memblock_free()` takes a physical address as its first argument. Fix the wrong usages in `init_resources()`. Fixes: ffe0e526126884cf036a6f724220f1f9b4094fd2 ("RISC-V: Improve init_resources()") Fixes: 797f0375dd2ef5cdc68ac23450cbae9a5c67a74e ("RISC-V: Do not allocate memblock while iterating reserved memblocks") Signed-off-by: Wende Tan Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/setup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 03901d3a8b02..9a1b7a0603b2 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -231,13 +231,13 @@ static void __init init_resources(void) /* Clean-up any unused pre-allocated resources */ mem_res_sz = (num_resources - res_idx + 1) * sizeof(*mem_res); - memblock_free((phys_addr_t) mem_res, mem_res_sz); + memblock_free(__pa(mem_res), mem_res_sz); return; error: /* Better an empty resource tree than an inconsistent one */ release_child_resources(&iomem_resource); - memblock_free((phys_addr_t) mem_res, mem_res_sz); + memblock_free(__pa(mem_res), mem_res_sz); } -- cgit v1.2.3 From d94b93a9101573eb75b819dee94b1417acff631b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 30 Dec 2020 16:54:56 +0100 Subject: ARM: cpuidle: Avoid orphan section warning Since commit 83109d5d5fba ("x86/build: Warn on orphan section placement"), we get a warning for objects in orphan sections. The cpuidle implementation for OMAP causes this when CONFIG_CPU_IDLE is disabled: arm-linux-gnueabi-ld: warning: orphan section `__cpuidle_method_of_table' from `arch/arm/mach-omap2/pm33xx-core.o' being placed in section `__cpuidle_method_of_table' arm-linux-gnueabi-ld: warning: orphan section `__cpuidle_method_of_table' from `arch/arm/mach-omap2/pm33xx-core.o' being placed in section `__cpuidle_method_of_table' arm-linux-gnueabi-ld: warning: orphan section `__cpuidle_method_of_table' from `arch/arm/mach-omap2/pm33xx-core.o' being placed in section `__cpuidle_method_of_table' Change the definition of CPUIDLE_METHOD_OF_DECLARE() to silently drop the table and all code referenced from it when CONFIG_CPU_IDLE is disabled. Fixes: 06ee7a950b6a ("ARM: OMAP2+: pm33xx-core: Add cpuidle_ops for am335x/am437x") Signed-off-by: Arnd Bergmann Reviewed-by: Miguel Ojeda Reviewed-by: Nick Desaulniers Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20201230155506.1085689-1-arnd@kernel.org --- arch/arm/include/asm/cpuidle.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/cpuidle.h b/arch/arm/include/asm/cpuidle.h index 0d67ed682e07..bc4ffa7ca04c 100644 --- a/arch/arm/include/asm/cpuidle.h +++ b/arch/arm/include/asm/cpuidle.h @@ -7,9 +7,11 @@ #ifdef CONFIG_CPU_IDLE extern int arm_cpuidle_simple_enter(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index); +#define __cpuidle_method_section __used __section("__cpuidle_method_of_table") #else static inline int arm_cpuidle_simple_enter(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { return -ENODEV; } +#define __cpuidle_method_section __maybe_unused /* drop silently */ #endif /* Common ARM WFI state */ @@ -42,8 +44,7 @@ struct of_cpuidle_method { #define CPUIDLE_METHOD_OF_DECLARE(name, _method, _ops) \ static const struct of_cpuidle_method __cpuidle_method_of_table_##name \ - __used __section("__cpuidle_method_of_table") \ - = { .method = _method, .ops = _ops } + __cpuidle_method_section = { .method = _method, .ops = _ops } extern int arm_cpuidle_suspend(int index); -- cgit v1.2.3 From 9bfecd05833918526cc7357d55e393393440c5fa Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 29 May 2021 11:17:30 +0200 Subject: x86/cpufeatures: Force disable X86_FEATURE_ENQCMD and remove update_pasid() While digesting the XSAVE-related horrors which got introduced with the supervisor/user split, the recent addition of ENQCMD-related functionality got on the radar and turned out to be similarly broken. update_pasid(), which is only required when X86_FEATURE_ENQCMD is available, is invoked from two places: 1) From switch_to() for the incoming task 2) Via a SMP function call from the IOMMU/SMV code #1 is half-ways correct as it hacks around the brokenness of get_xsave_addr() by enforcing the state to be 'present', but all the conditionals in that code are completely pointless for that. Also the invocation is just useless overhead because at that point it's guaranteed that TIF_NEED_FPU_LOAD is set on the incoming task and all of this can be handled at return to user space. #2 is broken beyond repair. The comment in the code claims that it is safe to invoke this in an IPI, but that's just wishful thinking. FPU state of a running task is protected by fregs_lock() which is nothing else than a local_bh_disable(). As BH-disabled regions run usually with interrupts enabled the IPI can hit a code section which modifies FPU state and there is absolutely no guarantee that any of the assumptions which are made for the IPI case is true. Also the IPI is sent to all CPUs in mm_cpumask(mm), but the IPI is invoked with a NULL pointer argument, so it can hit a completely unrelated task and unconditionally force an update for nothing. Worse, it can hit a kernel thread which operates on a user space address space and set a random PASID for it. The offending commit does not cleanly revert, but it's sufficient to force disable X86_FEATURE_ENQCMD and to remove the broken update_pasid() code to make this dysfunctional all over the place. Anything more complex would require more surgery and none of the related functions outside of the x86 core code are blatantly wrong, so removing those would be overkill. As nothing enables the PASID bit in the IA32_XSS MSR yet, which is required to make this actually work, this cannot result in a regression except for related out of tree train-wrecks, but they are broken already today. Fixes: 20f0afd1fb3d ("x86/mmu: Allocate/free a PASID") Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Acked-by: Andy Lutomirski Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/87mtsd6gr9.ffs@nanos.tec.linutronix.de --- arch/x86/include/asm/disabled-features.h | 7 ++-- arch/x86/include/asm/fpu/api.h | 6 +--- arch/x86/include/asm/fpu/internal.h | 7 ---- arch/x86/kernel/fpu/xstate.c | 57 -------------------------------- 4 files changed, 3 insertions(+), 74 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index b7dd944dc867..8f28fafa98b3 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -56,11 +56,8 @@ # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) #endif -#ifdef CONFIG_IOMMU_SUPPORT -# define DISABLE_ENQCMD 0 -#else -# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) -#endif +/* Force disable because it's broken beyond repair */ +#define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) #ifdef CONFIG_X86_SGX # define DISABLE_SGX 0 diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index ed33a14188f6..23bef08a8388 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -106,10 +106,6 @@ extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name); */ #define PASID_DISABLED 0 -#ifdef CONFIG_IOMMU_SUPPORT -/* Update current's PASID MSR/state by mm's PASID. */ -void update_pasid(void); -#else static inline void update_pasid(void) { } -#endif + #endif /* _ASM_X86_FPU_API_H */ diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 8d33ad80704f..ceeba9f63172 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -584,13 +584,6 @@ static inline void switch_fpu_finish(struct fpu *new_fpu) pkru_val = pk->pkru; } __write_pkru(pkru_val); - - /* - * Expensive PASID MSR write will be avoided in update_pasid() because - * TIF_NEED_FPU_LOAD was set. And the PASID state won't be updated - * unless it's different from mm->pasid to reduce overhead. - */ - update_pasid(); } #endif /* _ASM_X86_FPU_INTERNAL_H */ diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index a85c64000218..d0eef963aad1 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1402,60 +1402,3 @@ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns, return 0; } #endif /* CONFIG_PROC_PID_ARCH_STATUS */ - -#ifdef CONFIG_IOMMU_SUPPORT -void update_pasid(void) -{ - u64 pasid_state; - u32 pasid; - - if (!cpu_feature_enabled(X86_FEATURE_ENQCMD)) - return; - - if (!current->mm) - return; - - pasid = READ_ONCE(current->mm->pasid); - /* Set the valid bit in the PASID MSR/state only for valid pasid. */ - pasid_state = pasid == PASID_DISABLED ? - pasid : pasid | MSR_IA32_PASID_VALID; - - /* - * No need to hold fregs_lock() since the task's fpstate won't - * be changed by others (e.g. ptrace) while the task is being - * switched to or is in IPI. - */ - if (!test_thread_flag(TIF_NEED_FPU_LOAD)) { - /* The MSR is active and can be directly updated. */ - wrmsrl(MSR_IA32_PASID, pasid_state); - } else { - struct fpu *fpu = ¤t->thread.fpu; - struct ia32_pasid_state *ppasid_state; - struct xregs_state *xsave; - - /* - * The CPU's xstate registers are not currently active. Just - * update the PASID state in the memory buffer here. The - * PASID MSR will be loaded when returning to user mode. - */ - xsave = &fpu->state.xsave; - xsave->header.xfeatures |= XFEATURE_MASK_PASID; - ppasid_state = get_xsave_addr(xsave, XFEATURE_PASID); - /* - * Since XFEATURE_MASK_PASID is set in xfeatures, ppasid_state - * won't be NULL and no need to check its value. - * - * Only update the task's PASID state when it's different - * from the mm's pasid. - */ - if (ppasid_state->pasid != pasid_state) { - /* - * Invalid fpregs so that state restoring will pick up - * the PASID state. - */ - __fpu_invalidate_fpregs_state(fpu); - ppasid_state->pasid = pasid_state; - } - } -} -#endif /* CONFIG_IOMMU_SUPPORT */ -- cgit v1.2.3 From 2b31e8ed96b260ce2c22bd62ecbb9458399e3b62 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 1 Jun 2021 17:51:22 +0200 Subject: x86/alternative: Optimize single-byte NOPs at an arbitrary position Up until now the assumption was that an alternative patching site would have some instructions at the beginning and trailing single-byte NOPs (0x90) padding. Therefore, the patching machinery would go and optimize those single-byte NOPs into longer ones. However, this assumption is broken on 32-bit when code like hv_do_hypercall() in hyperv_init() would use the ratpoline speculation killer CALL_NOSPEC. The 32-bit version of that macro would align certain insns to 16 bytes, leading to the compiler issuing a one or more single-byte NOPs, depending on the holes it needs to fill for alignment. That would lead to the warning in optimize_nops() to fire: ------------[ cut here ]------------ Not a NOP at 0xc27fb598 WARNING: CPU: 0 PID: 0 at arch/x86/kernel/alternative.c:211 optimize_nops.isra.13 due to that function verifying whether all of the following bytes really are single-byte NOPs. Therefore, carve out the NOP padding into a separate function and call it for each NOP range beginning with a single-byte NOP. Fixes: 23c1ad538f4f ("x86/alternatives: Optimize optimize_nops()") Reported-by: Richard Narron Signed-off-by: Borislav Petkov Acked-by: Peter Zijlstra (Intel) Link: https://bugzilla.kernel.org/show_bug.cgi?id=213301 Link: https://lkml.kernel.org/r/20210601212125.17145-1-bp@alien8.de --- arch/x86/kernel/alternative.c | 64 +++++++++++++++++++++++++++++++------------ 1 file changed, 46 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 6974b5174495..6fe5b44fcbc9 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -182,42 +182,70 @@ done: n_dspl, (unsigned long)orig_insn + n_dspl + repl_len); } +/* + * optimize_nops_range() - Optimize a sequence of single byte NOPs (0x90) + * + * @instr: instruction byte stream + * @instrlen: length of the above + * @off: offset within @instr where the first NOP has been detected + * + * Return: number of NOPs found (and replaced). + */ +static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off) +{ + unsigned long flags; + int i = off, nnops; + + while (i < instrlen) { + if (instr[i] != 0x90) + break; + + i++; + } + + nnops = i - off; + + if (nnops <= 1) + return nnops; + + local_irq_save(flags); + add_nops(instr + off, nnops); + local_irq_restore(flags); + + DUMP_BYTES(instr, instrlen, "%px: [%d:%d) optimized NOPs: ", instr, off, i); + + return nnops; +} + /* * "noinline" to cause control flow change and thus invalidate I$ and * cause refetch after modification. */ static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr) { - unsigned long flags; struct insn insn; - int nop, i = 0; + int i = 0; /* - * Jump over the non-NOP insns, the remaining bytes must be single-byte - * NOPs, optimize them. + * Jump over the non-NOP insns and optimize single-byte NOPs into bigger + * ones. */ for (;;) { if (insn_decode_kernel(&insn, &instr[i])) return; + /* + * See if this and any potentially following NOPs can be + * optimized. + */ if (insn.length == 1 && insn.opcode.bytes[0] == 0x90) - break; - - if ((i += insn.length) >= a->instrlen) - return; - } + i += optimize_nops_range(instr, a->instrlen, i); + else + i += insn.length; - for (nop = i; i < a->instrlen; i++) { - if (WARN_ONCE(instr[i] != 0x90, "Not a NOP at 0x%px\n", &instr[i])) + if (i >= a->instrlen) return; } - - local_irq_save(flags); - add_nops(instr + nop, i - nop); - local_irq_restore(flags); - - DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ", - instr, nop, a->instrlen); } /* -- cgit v1.2.3 From f1d4d47c5851b348b7713007e152bc68b94d728b Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Tue, 1 Jun 2021 10:53:52 +0300 Subject: x86/setup: Always reserve the first 1M of RAM There are BIOSes that are known to corrupt the memory under 1M, or more precisely under 640K because the memory above 640K is anyway reserved for the EGA/VGA frame buffer and BIOS. To prevent usage of the memory that will be potentially clobbered by the kernel, the beginning of the memory is always reserved. The exact size of the reserved area is determined by CONFIG_X86_RESERVE_LOW build time and the "reservelow=" command line option. The reserved range may be from 4K to 640K with the default of 64K. There are also configurations that reserve the entire 1M range, like machines with SandyBridge graphic devices or systems that enable crash kernel. In addition to the potentially clobbered memory, EBDA of unknown size may be as low as 128K and the memory above that EBDA start is also reserved early. It would have been possible to reserve the entire range under 1M unless for the real mode trampoline that must reside in that area. To accommodate placement of the real mode trampoline and keep the memory safe from being clobbered by BIOS, reserve the first 64K of RAM before memory allocations are possible and then, after the real mode trampoline is allocated, reserve the entire range from 0 to 1M. Update trim_snb_memory() and reserve_real_mode() to avoid redundant reservations of the same memory range. Also make sure the memory under 1M is not getting freed by efi_free_boot_services(). [ bp: Massage commit message and comments. ] Fixes: a799c2bd29d1 ("x86/setup: Consolidate early memory reservations") Signed-off-by: Mike Rapoport Signed-off-by: Borislav Petkov Tested-by: Hugh Dickins Link: https://bugzilla.kernel.org/show_bug.cgi?id=213177 Link: https://lkml.kernel.org/r/20210601075354.5149-2-rppt@kernel.org --- arch/x86/kernel/setup.c | 35 +++++++++++++++++++++-------------- arch/x86/platform/efi/quirks.c | 12 ++++++++++++ arch/x86/realmode/init.c | 14 ++++++++------ 3 files changed, 41 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ff653d608d5f..1e720626069a 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -638,11 +638,11 @@ static void __init trim_snb_memory(void) * them from accessing certain memory ranges, namely anything below * 1M and in the pages listed in bad_pages[] above. * - * To avoid these pages being ever accessed by SNB gfx devices - * reserve all memory below the 1 MB mark and bad_pages that have - * not already been reserved at boot time. + * To avoid these pages being ever accessed by SNB gfx devices reserve + * bad_pages that have not already been reserved at boot time. + * All memory below the 1 MB mark is anyway reserved later during + * setup_arch(), so there is no need to reserve it here. */ - memblock_reserve(0, 1<<20); for (i = 0; i < ARRAY_SIZE(bad_pages); i++) { if (memblock_reserve(bad_pages[i], PAGE_SIZE)) @@ -734,14 +734,14 @@ static void __init early_reserve_memory(void) * The first 4Kb of memory is a BIOS owned area, but generally it is * not listed as such in the E820 table. * - * Reserve the first memory page and typically some additional - * memory (64KiB by default) since some BIOSes are known to corrupt - * low memory. See the Kconfig help text for X86_RESERVE_LOW. + * Reserve the first 64K of memory since some BIOSes are known to + * corrupt low memory. After the real mode trampoline is allocated the + * rest of the memory below 640k is reserved. * * In addition, make sure page 0 is always reserved because on * systems with L1TF its contents can be leaked to user processes. */ - memblock_reserve(0, ALIGN(reserve_low, PAGE_SIZE)); + memblock_reserve(0, SZ_64K); early_reserve_initrd(); @@ -752,6 +752,7 @@ static void __init early_reserve_memory(void) reserve_ibft_region(); reserve_bios_regions(); + trim_snb_memory(); } /* @@ -1082,14 +1083,20 @@ void __init setup_arch(char **cmdline_p) (max_pfn_mapped< Date: Tue, 1 Jun 2021 16:52:03 +0800 Subject: x86/fault: Don't send SIGSEGV twice on SEGV_PKUERR __bad_area_nosemaphore() calls both force_sig_pkuerr() and force_sig_fault() when handling SEGV_PKUERR. This does not cause problems because the second signal is filtered by the legacy_queue() check in __send_signal() because in both cases, the signal is SIGSEGV, the second one seeing that the first one is already pending. This causes the kernel to do unnecessary work so send the signal only once for SEGV_PKUERR. [ bp: Massage commit message. ] Fixes: 9db812dbb29d ("signal/x86: Call force_sig_pkuerr from __bad_area_nosemaphore") Suggested-by: "Eric W. Biederman" Signed-off-by: Jiashuo Liang Signed-off-by: Borislav Petkov Acked-by: "Eric W. Biederman" Link: https://lkml.kernel.org/r/20210601085203.40214-1-liangjs@pku.edu.cn --- arch/x86/mm/fault.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 1c548ad00752..6bda7f67d737 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -836,8 +836,8 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, if (si_code == SEGV_PKUERR) force_sig_pkuerr((void __user *)address, pkey); - - force_sig_fault(SIGSEGV, si_code, (void __user *)address); + else + force_sig_fault(SIGSEGV, si_code, (void __user *)address); local_irq_disable(); } -- cgit v1.2.3 From 009767dbf42ac0dbe3cf48c1ee224f6b778aa85a Mon Sep 17 00:00:00 2001 From: Pu Wen Date: Wed, 2 Jun 2021 15:02:07 +0800 Subject: x86/sev: Check SME/SEV support in CPUID first The first two bits of the CPUID leaf 0x8000001F EAX indicate whether SEV or SME is supported, respectively. It's better to check whether SEV or SME is actually supported before accessing the MSR_AMD64_SEV to check whether SEV or SME is enabled. This is both a bare-metal issue and a guest/VM issue. Since the first generation Hygon Dhyana CPU doesn't support the MSR_AMD64_SEV, reading that MSR results in a #GP - either directly from hardware in the bare-metal case or via the hypervisor (because the RDMSR is actually intercepted) in the guest/VM case, resulting in a failed boot. And since this is very early in the boot phase, rdmsrl_safe()/native_read_msr_safe() can't be used. So check the CPUID bits first, before accessing the MSR. [ tlendacky: Expand and improve commit message. ] [ bp: Massage commit message. ] Fixes: eab696d8e8b9 ("x86/sev: Do not require Hypervisor CPUID bit for SEV guests") Signed-off-by: Pu Wen Signed-off-by: Borislav Petkov Acked-by: Tom Lendacky Cc: # v5.10+ Link: https://lkml.kernel.org/r/20210602070207.2480-1-puwen@hygon.cn --- arch/x86/mm/mem_encrypt_identity.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index a9639f663d25..470b20208430 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -504,10 +504,6 @@ void __init sme_enable(struct boot_params *bp) #define AMD_SME_BIT BIT(0) #define AMD_SEV_BIT BIT(1) - /* Check the SEV MSR whether SEV or SME is enabled */ - sev_status = __rdmsr(MSR_AMD64_SEV); - feature_mask = (sev_status & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT; - /* * Check for the SME/SEV feature: * CPUID Fn8000_001F[EAX] @@ -519,11 +515,16 @@ void __init sme_enable(struct boot_params *bp) eax = 0x8000001f; ecx = 0; native_cpuid(&eax, &ebx, &ecx, &edx); - if (!(eax & feature_mask)) + /* Check whether SEV or SME is supported */ + if (!(eax & (AMD_SEV_BIT | AMD_SME_BIT))) return; me_mask = 1UL << (ebx & 0x3f); + /* Check the SEV MSR whether SEV or SME is enabled */ + sev_status = __rdmsr(MSR_AMD64_SEV); + feature_mask = (sev_status & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT; + /* Check if memory encryption is enabled */ if (feature_mask == AMD_SME_BIT) { /* -- cgit v1.2.3 From 50c25ee97cf6ab011542167ab590c17012cea4ed Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Fri, 4 Jun 2021 20:01:08 -0700 Subject: Revert "MIPS: make userspace mapping young by default" This reverts commit f685a533a7fab35c5d069dcd663f59c8e4171a75. The MIPS cache flush logic needs to know whether the mapping was already established to decide how to flush caches. This is done by checking the valid bit in the PTE. The commit above breaks this logic by setting the valid in the PTE in new mappings, which causes kernel crashes. Link: https://lkml.kernel.org/r/20210526094335.92948-1-tsbogend@alpha.franken.de Fixes: f685a533a7f ("MIPS: make userspace mapping young by default") Reported-by: Zhou Yanjie Signed-off-by: Thomas Bogendoerfer Cc: Huang Pei Cc: Nicholas Piggin Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/mm/cache.c | 30 ++++++++++++++---------------- include/linux/pgtable.h | 8 ++++++++ mm/memory.c | 4 ++++ 3 files changed, 26 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c index a7bf0c80371c..830ab91e574f 100644 --- a/arch/mips/mm/cache.c +++ b/arch/mips/mm/cache.c @@ -158,31 +158,29 @@ unsigned long _page_cachable_default; EXPORT_SYMBOL(_page_cachable_default); #define PM(p) __pgprot(_page_cachable_default | (p)) -#define PVA(p) PM(_PAGE_VALID | _PAGE_ACCESSED | (p)) static inline void setup_protection_map(void) { protection_map[0] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); - protection_map[1] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC); - protection_map[2] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); - protection_map[3] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC); - protection_map[4] = PVA(_PAGE_PRESENT); - protection_map[5] = PVA(_PAGE_PRESENT); - protection_map[6] = PVA(_PAGE_PRESENT); - protection_map[7] = PVA(_PAGE_PRESENT); + protection_map[1] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC); + protection_map[2] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); + protection_map[3] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC); + protection_map[4] = PM(_PAGE_PRESENT); + protection_map[5] = PM(_PAGE_PRESENT); + protection_map[6] = PM(_PAGE_PRESENT); + protection_map[7] = PM(_PAGE_PRESENT); protection_map[8] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); - protection_map[9] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC); - protection_map[10] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE | + protection_map[9] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC); + protection_map[10] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE | _PAGE_NO_READ); - protection_map[11] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE); - protection_map[12] = PVA(_PAGE_PRESENT); - protection_map[13] = PVA(_PAGE_PRESENT); - protection_map[14] = PVA(_PAGE_PRESENT); - protection_map[15] = PVA(_PAGE_PRESENT); + protection_map[11] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE); + protection_map[12] = PM(_PAGE_PRESENT); + protection_map[13] = PM(_PAGE_PRESENT); + protection_map[14] = PM(_PAGE_PRESENT | _PAGE_WRITE); + protection_map[15] = PM(_PAGE_PRESENT | _PAGE_WRITE); } -#undef _PVA #undef PM void cpu_cache_init(void) diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 46b13780c2c8..a43047b1030d 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -432,6 +432,14 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres * To be differentiate with macro pte_mkyoung, this macro is used on platforms * where software maintains page access bit. */ +#ifndef pte_sw_mkyoung +static inline pte_t pte_sw_mkyoung(pte_t pte) +{ + return pte; +} +#define pte_sw_mkyoung pte_sw_mkyoung +#endif + #ifndef pte_savedwrite #define pte_savedwrite pte_write #endif diff --git a/mm/memory.c b/mm/memory.c index 730daa00952b..f3ffab9b9e39 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2939,6 +2939,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) } flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); entry = mk_pte(new_page, vma->vm_page_prot); + entry = pte_sw_mkyoung(entry); entry = maybe_mkwrite(pte_mkdirty(entry), vma); /* @@ -3602,6 +3603,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) __SetPageUptodate(page); entry = mk_pte(page, vma->vm_page_prot); + entry = pte_sw_mkyoung(entry); if (vma->vm_flags & VM_WRITE) entry = pte_mkwrite(pte_mkdirty(entry)); @@ -3786,6 +3788,8 @@ void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr) if (prefault && arch_wants_old_prefaulted_pte()) entry = pte_mkold(entry); + else + entry = pte_sw_mkyoung(entry); if (write) entry = maybe_mkwrite(pte_mkdirty(entry), vma); -- cgit v1.2.3 From 8e11d62e2e8769fe29d1ae98b44b23c7233eb8a2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 5 Jun 2021 08:56:09 +0000 Subject: powerpc/mem: Add back missing header to fix 'no previous prototype' error Commit b26e8f27253a ("powerpc/mem: Move cache flushing functions into mm/cacheflush.c") removed asm/sparsemem.h which is required when CONFIG_MEMORY_HOTPLUG is selected to get the declaration of create_section_mapping(). Add it back. Fixes: b26e8f27253a ("powerpc/mem: Move cache flushing functions into mm/cacheflush.c") Reported-by: kernel test robot Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3e5b63bb3daab54a1eb9c20221c2e9528c4db9b3.1622883330.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/mem.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 043bbeaf407c..a6b36a40897a 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3 From 5bcbe3285fb614c49db6b238253f7daff7e66312 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Mon, 17 May 2021 08:18:11 +0200 Subject: s390/mcck: fix calculation of SIE critical section size The size of SIE critical section is calculated wrongly as result of a missed subtraction in commit 0b0ed657fe00 ("s390: remove critical section cleanup from entry.S") Fixes: 0b0ed657fe00 ("s390: remove critical section cleanup from entry.S") Cc: Signed-off-by: Alexander Gordeev Reviewed-by: Christian Borntraeger Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 12de7a9c85b3..3a81e38c95e7 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -651,7 +651,7 @@ ENDPROC(stack_overflow) .Lcleanup_sie_mcck: larl %r13,.Lsie_entry slgr %r9,%r13 - larl %r13,.Lsie_skip + lghi %r13,.Lsie_skip - .Lsie_entry clgr %r9,%r13 jh .Lcleanup_sie_int oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST -- cgit v1.2.3 From 1874cb13d5d7cafa61ce93a760093ebc5485b6ab Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Mon, 17 May 2021 08:18:12 +0200 Subject: s390/mcck: fix invalid KVM guest condition check Wrong condition check is used to decide if a machine check hit while in KVM guest. As result of this check the instruction following the SIE critical section might be considered as still in KVM guest and _CIF_MCCK_GUEST CPU flag mistakenly set as result. Fixes: c929500d7a5a ("s390/nmi: s390: New low level handling for machine check happening in guest") Cc: Signed-off-by: Alexander Gordeev Reviewed-by: Christian Borntraeger Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 3a81e38c95e7..9cc71ca9a88f 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -653,7 +653,7 @@ ENDPROC(stack_overflow) slgr %r9,%r13 lghi %r13,.Lsie_skip - .Lsie_entry clgr %r9,%r13 - jh .Lcleanup_sie_int + jhe .Lcleanup_sie_int oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST .Lcleanup_sie_int: BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) -- cgit v1.2.3 From 8d651ee9c71bb12fc0c8eb2786b66cbe5aa3e43b Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Tue, 8 Jun 2021 11:54:33 +0200 Subject: x86/ioremap: Map EFI-reserved memory as encrypted for SEV Some drivers require memory that is marked as EFI boot services data. In order for this memory to not be re-used by the kernel after ExitBootServices(), efi_mem_reserve() is used to preserve it by inserting a new EFI memory descriptor and marking it with the EFI_MEMORY_RUNTIME attribute. Under SEV, memory marked with the EFI_MEMORY_RUNTIME attribute needs to be mapped encrypted by Linux, otherwise the kernel might crash at boot like below: EFI Variables Facility v0.08 2004-May-17 general protection fault, probably for non-canonical address 0x3597688770a868b2: 0000 [#1] SMP NOPTI CPU: 13 PID: 1 Comm: swapper/0 Not tainted 5.12.4-2-default #1 openSUSE Tumbleweed Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 RIP: 0010:efi_mokvar_entry_next [...] Call Trace: efi_mokvar_sysfs_init ? efi_mokvar_table_init do_one_initcall ? __kmalloc kernel_init_freeable ? rest_init kernel_init ret_from_fork Expand the __ioremap_check_other() function to additionally check for this other type of boot data reserved at runtime and indicate that it should be mapped encrypted for an SEV guest. [ bp: Massage commit message. ] Fixes: 58c909022a5a ("efi: Support for MOK variable config table") Reported-by: Joerg Roedel Signed-off-by: Tom Lendacky Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Tested-by: Joerg Roedel Cc: # 5.10+ Link: https://lkml.kernel.org/r/20210608095439.12668-2-joro@8bytes.org --- arch/x86/mm/ioremap.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 12c686c65ea9..60ade7dd71bd 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -118,7 +118,9 @@ static void __ioremap_check_other(resource_size_t addr, struct ioremap_desc *des if (!IS_ENABLED(CONFIG_EFI)) return; - if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA) + if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA || + (efi_mem_type(addr) == EFI_BOOT_SERVICES_DATA && + efi_mem_attributes(addr) & EFI_MEMORY_RUNTIME)) desc->flags |= IORES_MAP_ENCRYPTED; } -- cgit v1.2.3 From 484cea4f362e1eeb5c869abbfb5f90eae6421b38 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 8 Jun 2021 16:36:18 +0200 Subject: x86/fpu: Prevent state corruption in __fpu__restore_sig() The non-compacted slowpath uses __copy_from_user() and copies the entire user buffer into the kernel buffer, verbatim. This means that the kernel buffer may now contain entirely invalid state on which XRSTOR will #GP. validate_user_xstate_header() can detect some of that corruption, but that leaves the onus on callers to clear the buffer. Prior to XSAVES support, it was possible just to reinitialize the buffer, completely, but with supervisor states that is not longer possible as the buffer clearing code split got it backwards. Fixing that is possible but not corrupting the state in the first place is more robust. Avoid corruption of the kernel XSAVE buffer by using copy_user_to_xstate() which validates the XSAVE header contents before copying the actual states to the kernel. copy_user_to_xstate() was previously only called for compacted-format kernel buffers, but it works for both compacted and non-compacted forms. Using it for the non-compacted form is slower because of multiple __copy_from_user() operations, but that cost is less important than robust code in an already slow path. [ Changelog polished by Dave Hansen ] Fixes: b860eb8dce59 ("x86/fpu/xstate: Define new functions for clearing fpregs and xstates") Reported-by: syzbot+2067e764dbcd10721e2e@syzkaller.appspotmail.com Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Acked-by: Dave Hansen Acked-by: Rik van Riel Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20210608144345.611833074@linutronix.de --- arch/x86/kernel/fpu/signal.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index a4ec65317a7f..d5bc96a536c2 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -405,14 +405,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) if (use_xsave() && !fx_only) { u64 init_bv = xfeatures_mask_user() & ~user_xfeatures; - if (using_compacted_format()) { - ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx); - } else { - ret = __copy_from_user(&fpu->state.xsave, buf_fx, state_size); - - if (!ret && state_size > offsetof(struct xregs_state, header)) - ret = validate_user_xstate_header(&fpu->state.xsave.header); - } + ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx); if (ret) goto err_out; -- cgit v1.2.3 From d8778e393afa421f1f117471144f8ce6deb6953a Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 8 Jun 2021 16:36:19 +0200 Subject: x86/fpu: Invalidate FPU state after a failed XRSTOR from a user buffer Both Intel and AMD consider it to be architecturally valid for XRSTOR to fail with #PF but nonetheless change the register state. The actual conditions under which this might occur are unclear [1], but it seems plausible that this might be triggered if one sibling thread unmaps a page and invalidates the shared TLB while another sibling thread is executing XRSTOR on the page in question. __fpu__restore_sig() can execute XRSTOR while the hardware registers are preserved on behalf of a different victim task (using the fpu_fpregs_owner_ctx mechanism), and, in theory, XRSTOR could fail but modify the registers. If this happens, then there is a window in which __fpu__restore_sig() could schedule out and the victim task could schedule back in without reloading its own FPU registers. This would result in part of the FPU state that __fpu__restore_sig() was attempting to load leaking into the victim task's user-visible state. Invalidate preserved FPU registers on XRSTOR failure to prevent this situation from corrupting any state. [1] Frequent readers of the errata lists might imagine "complex microarchitectural conditions". Fixes: 1d731e731c4c ("x86/fpu: Add a fastpath to __fpu__restore_sig()") Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Acked-by: Dave Hansen Acked-by: Rik van Riel Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20210608144345.758116583@linutronix.de --- arch/x86/kernel/fpu/signal.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index d5bc96a536c2..4ab9aeb9a963 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -369,6 +369,25 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) fpregs_unlock(); return 0; } + + /* + * The above did an FPU restore operation, restricted to + * the user portion of the registers, and failed, but the + * microcode might have modified the FPU registers + * nevertheless. + * + * If the FPU registers do not belong to current, then + * invalidate the FPU register state otherwise the task might + * preempt current and return to user space with corrupted + * FPU registers. + * + * In case current owns the FPU registers then no further + * action is required. The fixup below will handle it + * correctly. + */ + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + __cpu_invalidate_fpregs_state(); + fpregs_unlock(); } else { /* -- cgit v1.2.3 From 12f7764ac61200e32c916f038bdc08f884b0b604 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 8 Jun 2021 16:36:20 +0200 Subject: x86/process: Check PF_KTHREAD and not current->mm for kernel threads switch_fpu_finish() checks current->mm as indicator for kernel threads. That's wrong because kernel threads can temporarily use a mm of a user process via kthread_use_mm(). Check the task flags for PF_KTHREAD instead. Fixes: 0cecca9d03c9 ("x86/fpu: Eager switch PKRU state") Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Acked-by: Dave Hansen Acked-by: Rik van Riel Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20210608144345.912645927@linutronix.de --- arch/x86/include/asm/fpu/internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index ceeba9f63172..18382ac1ecc4 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -578,7 +578,7 @@ static inline void switch_fpu_finish(struct fpu *new_fpu) * PKRU state is switched eagerly because it needs to be valid before we * return to userland e.g. for a copy_to_user() operation. */ - if (current->mm) { + if (!(current->flags & PF_KTHREAD)) { pk = get_xsave_addr(&new_fpu->state.xsave, XFEATURE_PKRU); if (pk) pkru_val = pk->pkru; -- cgit v1.2.3 From 510b80a6a0f1a0d114c6e33bcea64747d127973c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 8 Jun 2021 16:36:21 +0200 Subject: x86/pkru: Write hardware init value to PKRU when xstate is init When user space brings PKRU into init state, then the kernel handling is broken: T1 user space xsave(state) state.header.xfeatures &= ~XFEATURE_MASK_PKRU; xrstor(state) T1 -> kernel schedule() XSAVE(S) -> T1->xsave.header.xfeatures[PKRU] == 0 T1->flags |= TIF_NEED_FPU_LOAD; wrpkru(); schedule() ... pk = get_xsave_addr(&T1->fpu->state.xsave, XFEATURE_PKRU); if (pk) wrpkru(pk->pkru); else wrpkru(DEFAULT_PKRU); Because the xfeatures bit is 0 and therefore the value in the xsave storage is not valid, get_xsave_addr() returns NULL and switch_to() writes the default PKRU. -> FAIL #1! So that wrecks any copy_to/from_user() on the way back to user space which hits memory which is protected by the default PKRU value. Assumed that this does not fail (pure luck) then T1 goes back to user space and because TIF_NEED_FPU_LOAD is set it ends up in switch_fpu_return() __fpregs_load_activate() if (!fpregs_state_valid()) { load_XSTATE_from_task(); } But if nothing touched the FPU between T1 scheduling out and back in, then the fpregs_state is still valid which means switch_fpu_return() does nothing and just clears TIF_NEED_FPU_LOAD. Back to user space with DEFAULT_PKRU loaded. -> FAIL #2! The fix is simple: if get_xsave_addr() returns NULL then set the PKRU value to 0 instead of the restrictive default PKRU value in init_pkru_value. [ bp: Massage in minor nitpicks from folks. ] Fixes: 0cecca9d03c9 ("x86/fpu: Eager switch PKRU state") Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Acked-by: Dave Hansen Acked-by: Rik van Riel Tested-by: Babu Moger Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20210608144346.045616965@linutronix.de --- arch/x86/include/asm/fpu/internal.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 18382ac1ecc4..fdee23ea4e17 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -579,9 +579,16 @@ static inline void switch_fpu_finish(struct fpu *new_fpu) * return to userland e.g. for a copy_to_user() operation. */ if (!(current->flags & PF_KTHREAD)) { + /* + * If the PKRU bit in xsave.header.xfeatures is not set, + * then the PKRU component was in init state, which means + * XRSTOR will set PKRU to 0. If the bit is not set then + * get_xsave_addr() will return NULL because the PKRU value + * in memory is not valid. This means pkru_val has to be + * set to 0 and not to init_pkru_value. + */ pk = get_xsave_addr(&new_fpu->state.xsave, XFEATURE_PKRU); - if (pk) - pkru_val = pk->pkru; + pkru_val = pk ? pk->pkru : 0; } __write_pkru(pkru_val); } -- cgit v1.2.3 From efa165504943f2128d50f63de0c02faf6dcceb0d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 9 Jun 2021 21:18:00 +0200 Subject: x86/fpu: Reset state for all signal restore failures If access_ok() or fpregs_soft_set() fails in __fpu__restore_sig() then the function just returns but does not clear the FPU state as it does for all other fatal failures. Clear the FPU state for these failures as well. Fixes: 72a671ced66d ("x86, fpu: Unify signal handling code paths for x86 and x86_64 kernels") Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/87mtryyhhz.ffs@nanos.tec.linutronix.de --- arch/x86/kernel/fpu/signal.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 4ab9aeb9a963..ec3ae3054792 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -307,13 +307,17 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) return 0; } - if (!access_ok(buf, size)) - return -EACCES; + if (!access_ok(buf, size)) { + ret = -EACCES; + goto out; + } - if (!static_cpu_has(X86_FEATURE_FPU)) - return fpregs_soft_set(current, NULL, - 0, sizeof(struct user_i387_ia32_struct), - NULL, buf) != 0; + if (!static_cpu_has(X86_FEATURE_FPU)) { + ret = fpregs_soft_set(current, NULL, 0, + sizeof(struct user_i387_ia32_struct), + NULL, buf); + goto out; + } if (use_xsave()) { struct _fpx_sw_bytes fx_sw_user; @@ -396,7 +400,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) */ ret = __copy_from_user(&env, buf, sizeof(env)); if (ret) - goto err_out; + goto out; envp = &env; } @@ -426,7 +430,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx); if (ret) - goto err_out; + goto out; sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures, fx_only); @@ -446,7 +450,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) ret = __copy_from_user(&fpu->state.fxsave, buf_fx, state_size); if (ret) { ret = -EFAULT; - goto err_out; + goto out; } sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures, @@ -464,7 +468,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) } else { ret = __copy_from_user(&fpu->state.fsave, buf_fx, state_size); if (ret) - goto err_out; + goto out; fpregs_lock(); ret = copy_kernel_to_fregs_err(&fpu->state.fsave); @@ -475,7 +479,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) fpregs_deactivate(fpu); fpregs_unlock(); -err_out: +out: if (ret) fpu__clear_user_states(fpu); return ret; -- cgit v1.2.3 From a8383dfb2138742a1bb77b481ada047aededa2ba Mon Sep 17 00:00:00 2001 From: CodyYao-oc Date: Mon, 7 Jun 2021 10:53:35 +0800 Subject: x86/nmi_watchdog: Fix old-style NMI watchdog regression on old Intel CPUs The following commit: 3a4ac121c2ca ("x86/perf: Add hardware performance events support for Zhaoxin CPU.") Got the old-style NMI watchdog logic wrong and broke it for basically every Intel CPU where it was active. Which is only truly old CPUs, so few people noticed. On CPUs with perf events support we turn off the old-style NMI watchdog, so it was pretty pointless to add the logic for X86_VENDOR_ZHAOXIN to begin with ... :-/ Anyway, the fix is to restore the old logic and add a 'break'. [ mingo: Wrote a new changelog. ] Fixes: 3a4ac121c2ca ("x86/perf: Add hardware performance events support for Zhaoxin CPU.") Signed-off-by: CodyYao-oc Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210607025335.9643-1-CodyYao-oc@zhaoxin.com --- arch/x86/kernel/cpu/perfctr-watchdog.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 3ef5868ac588..7aecb2fc3186 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -63,7 +63,7 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) case 15: return msr - MSR_P4_BPU_PERFCTR0; } - fallthrough; + break; case X86_VENDOR_ZHAOXIN: case X86_VENDOR_CENTAUR: return msr - MSR_ARCH_PERFMON_PERFCTR0; @@ -96,7 +96,7 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) case 15: return msr - MSR_P4_BSU_ESCR0; } - fallthrough; + break; case X86_VENDOR_ZHAOXIN: case X86_VENDOR_CENTAUR: return msr - MSR_ARCH_PERFMON_EVENTSEL0; -- cgit v1.2.3 From 5e63215c2f64079fbd011df5005c8bea63f149c2 Mon Sep 17 00:00:00 2001 From: Vitaly Wool Date: Mon, 31 May 2021 11:53:42 +0300 Subject: riscv: xip: support runtime trap patching RISCV_ERRATA_ALTERNATIVE patches text at runtime which is currently not possible when the kernel is executed from the flash in XIP mode. Since runtime patching concerns only traps at the moment, let's just have all the traps reside in RAM anyway if RISCV_ERRATA_ALTERNATIVE is set. Thus, these functions will be patch-able even when the .text section is in flash. Signed-off-by: Vitaly Wool Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/traps.c | 13 +++++++++---- arch/riscv/kernel/vmlinux-xip.lds.S | 15 ++++++++++++++- 2 files changed, 23 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 0721b9798595..7bc88d8aab97 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -86,8 +86,13 @@ static void do_trap_error(struct pt_regs *regs, int signo, int code, } } +#if defined (CONFIG_XIP_KERNEL) && defined (CONFIG_RISCV_ERRATA_ALTERNATIVE) +#define __trap_section __section(".xip.traps") +#else +#define __trap_section +#endif #define DO_ERROR_INFO(name, signo, code, str) \ -asmlinkage __visible void name(struct pt_regs *regs) \ +asmlinkage __visible __trap_section void name(struct pt_regs *regs) \ { \ do_trap_error(regs, signo, code, regs->epc, "Oops - " str); \ } @@ -111,7 +116,7 @@ DO_ERROR_INFO(do_trap_store_misaligned, int handle_misaligned_load(struct pt_regs *regs); int handle_misaligned_store(struct pt_regs *regs); -asmlinkage void do_trap_load_misaligned(struct pt_regs *regs) +asmlinkage void __trap_section do_trap_load_misaligned(struct pt_regs *regs) { if (!handle_misaligned_load(regs)) return; @@ -119,7 +124,7 @@ asmlinkage void do_trap_load_misaligned(struct pt_regs *regs) "Oops - load address misaligned"); } -asmlinkage void do_trap_store_misaligned(struct pt_regs *regs) +asmlinkage void __trap_section do_trap_store_misaligned(struct pt_regs *regs) { if (!handle_misaligned_store(regs)) return; @@ -146,7 +151,7 @@ static inline unsigned long get_break_insn_length(unsigned long pc) return GET_INSN_LENGTH(insn); } -asmlinkage __visible void do_trap_break(struct pt_regs *regs) +asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs) { #ifdef CONFIG_KPROBES if (kprobe_single_step_handler(regs)) diff --git a/arch/riscv/kernel/vmlinux-xip.lds.S b/arch/riscv/kernel/vmlinux-xip.lds.S index 4b29b9917f99..a3ff09c4c3f9 100644 --- a/arch/riscv/kernel/vmlinux-xip.lds.S +++ b/arch/riscv/kernel/vmlinux-xip.lds.S @@ -99,9 +99,22 @@ SECTIONS } PERCPU_SECTION(L1_CACHE_BYTES) - . = ALIGN(PAGE_SIZE); + . = ALIGN(8); + .alternative : { + __alt_start = .; + *(.alternative) + __alt_end = .; + } __init_end = .; + . = ALIGN(16); + .xip.traps : { + __xip_traps_start = .; + *(.xip.traps) + __xip_traps_end = .; + } + + . = ALIGN(PAGE_SIZE); .sdata : { __global_pointer$ = . + 0x800; *(.sdata*) -- cgit v1.2.3 From 42e0e0b453bc6ead49c573ed512502069627546b Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 11 May 2021 00:28:38 +0800 Subject: riscv: code patching only works on !XIP_KERNEL Some features which need code patching such as KPROBES, DYNAMIC_FTRACE KGDB can only work on !XIP_KERNEL. Add dependencies for these features that rely on code patching. Signed-off-by: Jisheng Zhang Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index c5914e70a0fd..18ec0f9bb8d5 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -61,11 +61,11 @@ config RISCV select GENERIC_TIME_VSYSCALL if MMU && 64BIT select HANDLE_DOMAIN_IRQ select HAVE_ARCH_AUDITSYSCALL - select HAVE_ARCH_JUMP_LABEL - select HAVE_ARCH_JUMP_LABEL_RELATIVE + select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL + select HAVE_ARCH_JUMP_LABEL_RELATIVE if !XIP_KERNEL select HAVE_ARCH_KASAN if MMU && 64BIT select HAVE_ARCH_KASAN_VMALLOC if MMU && 64BIT - select HAVE_ARCH_KGDB + select HAVE_ARCH_KGDB if !XIP_KERNEL select HAVE_ARCH_KGDB_QXFER_PKT select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_SECCOMP_FILTER @@ -80,9 +80,9 @@ config RISCV select HAVE_GCC_PLUGINS select HAVE_GENERIC_VDSO if MMU && 64BIT select HAVE_IRQ_TIME_ACCOUNTING - select HAVE_KPROBES - select HAVE_KPROBES_ON_FTRACE - select HAVE_KRETPROBES + select HAVE_KPROBES if !XIP_KERNEL + select HAVE_KPROBES_ON_FTRACE if !XIP_KERNEL + select HAVE_KRETPROBES if !XIP_KERNEL select HAVE_PCI select HAVE_PERF_EVENTS select HAVE_PERF_REGS @@ -231,11 +231,11 @@ config ARCH_RV64I bool "RV64I" select 64BIT select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && GCC_VERSION >= 50000 - select HAVE_DYNAMIC_FTRACE if MMU && $(cc-option,-fpatchable-function-entry=8) + select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && $(cc-option,-fpatchable-function-entry=8) select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE - select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL select HAVE_FUNCTION_GRAPH_TRACER - select HAVE_FUNCTION_TRACER + select HAVE_FUNCTION_TRACER if !XIP_KERNEL select SWIOTLB if MMU endchoice -- cgit v1.2.3 From 96f1b00138cb8f04c742c82d0a7c460b2202e887 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 8 Jun 2021 19:39:25 -0700 Subject: ARCv2: save ABI registers across signal handling ARCv2 has some configuration dependent registers (r30, r58, r59) which could be targetted by the compiler. To keep the ABI stable, these were unconditionally part of the glibc ABI (sysdeps/unix/sysv/linux/arc/sys/ucontext.h:mcontext_t) however we missed populating them (by saving/restoring them across signal handling). This patch fixes the issue by - adding arcv2 ABI regs to kernel struct sigcontext - populating them during signal handling Change to struct sigcontext might seem like a glibc ABI change (although it primarily uses ucontext_t:mcontext_t) but the fact is - it has only been extended (existing fields are not touched) - the old sigcontext was ABI incomplete to begin with anyways Fixes: https://github.com/foss-for-synopsys-dwc-arc-processors/linux/issues/53 Cc: Tested-by: kernel test robot Reported-by: Vladimir Isaev Signed-off-by: Vineet Gupta --- arch/arc/include/uapi/asm/sigcontext.h | 1 + arch/arc/kernel/signal.c | 43 ++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) (limited to 'arch') diff --git a/arch/arc/include/uapi/asm/sigcontext.h b/arch/arc/include/uapi/asm/sigcontext.h index 95f8a4380e11..7a5449dfcb29 100644 --- a/arch/arc/include/uapi/asm/sigcontext.h +++ b/arch/arc/include/uapi/asm/sigcontext.h @@ -18,6 +18,7 @@ */ struct sigcontext { struct user_regs_struct regs; + struct user_regs_arcv2 v2abi; }; #endif /* _ASM_ARC_SIGCONTEXT_H */ diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c index b3ccb9e5ffe4..cb2f88502baf 100644 --- a/arch/arc/kernel/signal.c +++ b/arch/arc/kernel/signal.c @@ -61,6 +61,41 @@ struct rt_sigframe { unsigned int sigret_magic; }; +static int save_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs) +{ + int err = 0; +#ifndef CONFIG_ISA_ARCOMPACT + struct user_regs_arcv2 v2abi; + + v2abi.r30 = regs->r30; +#ifdef CONFIG_ARC_HAS_ACCL_REGS + v2abi.r58 = regs->r58; + v2abi.r59 = regs->r59; +#else + v2abi.r58 = v2abi.r59 = 0; +#endif + err = __copy_to_user(&mctx->v2abi, &v2abi, sizeof(v2abi)); +#endif + return err; +} + +static int restore_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs) +{ + int err = 0; +#ifndef CONFIG_ISA_ARCOMPACT + struct user_regs_arcv2 v2abi; + + err = __copy_from_user(&v2abi, &mctx->v2abi, sizeof(v2abi)); + + regs->r30 = v2abi.r30; +#ifdef CONFIG_ARC_HAS_ACCL_REGS + regs->r58 = v2abi.r58; + regs->r59 = v2abi.r59; +#endif +#endif + return err; +} + static int stash_usr_regs(struct rt_sigframe __user *sf, struct pt_regs *regs, sigset_t *set) @@ -94,6 +129,10 @@ stash_usr_regs(struct rt_sigframe __user *sf, struct pt_regs *regs, err = __copy_to_user(&(sf->uc.uc_mcontext.regs.scratch), &uregs.scratch, sizeof(sf->uc.uc_mcontext.regs.scratch)); + + if (is_isa_arcv2()) + err |= save_arcv2_regs(&(sf->uc.uc_mcontext), regs); + err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(sigset_t)); return err ? -EFAULT : 0; @@ -109,6 +148,10 @@ static int restore_usr_regs(struct pt_regs *regs, struct rt_sigframe __user *sf) err |= __copy_from_user(&uregs.scratch, &(sf->uc.uc_mcontext.regs.scratch), sizeof(sf->uc.uc_mcontext.regs.scratch)); + + if (is_isa_arcv2()) + err |= restore_arcv2_regs(&(sf->uc.uc_mcontext), regs); + if (err) return -EFAULT; -- cgit v1.2.3 From 110febc0148f8ab867344061d5cf95ee1e1ebb3e Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 26 Feb 2021 10:35:39 -0800 Subject: ARC: fix CONFIG_HARDENED_USERCOPY Currently enabling this triggers a warning | usercopy: Kernel memory overwrite attempt detected to kernel text (offset 155633, size 11)! | usercopy: BUG: failure at mm/usercopy.c:99/usercopy_abort()! | |gcc generated __builtin_trap |Path: /bin/busybox |CPU: 0 PID: 84 Comm: init Not tainted 5.4.22 | |[ECR ]: 0x00090005 => gcc generated __builtin_trap |[EFA ]: 0x9024fcaa |[BLINK ]: usercopy_abort+0x8a/0x8c |[ERET ]: memfd_fcntl+0x0/0x470 |[STAT32]: 0x80080802 : IE K |... |... |Stack Trace: | memfd_fcntl+0x0/0x470 | usercopy_abort+0x8a/0x8c | __check_object_size+0x10e/0x138 | copy_strings+0x1f4/0x38c | __do_execve_file+0x352/0x848 | EV_Trap+0xcc/0xd0 The issue is triggered by an allocation in "init reclaimed" region. ARC _stext emcompasses the init region (for historical reasons we wanted the init.text to be under .text as well). This however trips up __check_object_size()->check_kernel_text_object() which treats this as object bleeding into kernel text. Fix that by rezoning _stext to start from regular kernel .text and leave out .init altogether. Fixes: https://github.com/foss-for-synopsys-dwc-arc-processors/linux/issues/15 Reported-by: Evgeniy Didin Reviewed-by: Kees Cook Signed-off-by: Vineet Gupta --- arch/arc/kernel/vmlinux.lds.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S index 33ce59d91461..e2146a8da195 100644 --- a/arch/arc/kernel/vmlinux.lds.S +++ b/arch/arc/kernel/vmlinux.lds.S @@ -57,7 +57,6 @@ SECTIONS .init.ramfs : { INIT_RAM_FS } . = ALIGN(PAGE_SIZE); - _stext = .; HEAD_TEXT_SECTION INIT_TEXT_SECTION(L1_CACHE_BYTES) @@ -83,6 +82,7 @@ SECTIONS .text : { _text = .; + _stext = .; TEXT_TEXT SCHED_TEXT CPUIDLE_TEXT -- cgit v1.2.3 From 858cf860494fab545abfa206d17efcb8bee73e36 Mon Sep 17 00:00:00 2001 From: Vitaly Wool Date: Mon, 31 May 2021 12:33:10 +0300 Subject: riscv: alternative: fix typo in macro name alternative-macros.h defines ALT_NEW_CONTENT in its assembly part and ALT_NEW_CONSTENT in the C part. Most likely it is the latter that is wrong. Fixes: 6f4eea90465ad (riscv: Introduce alternative mechanism to apply errata solution) Signed-off-by: Vitaly Wool Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/alternative-macros.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h index 88c08705f64a..67406c376389 100644 --- a/arch/riscv/include/asm/alternative-macros.h +++ b/arch/riscv/include/asm/alternative-macros.h @@ -51,7 +51,7 @@ REG_ASM " " newlen "\n" \ ".word " errata_id "\n" -#define ALT_NEW_CONSTENT(vendor_id, errata_id, enable, new_c) \ +#define ALT_NEW_CONTENT(vendor_id, errata_id, enable, new_c) \ ".if " __stringify(enable) " == 1\n" \ ".pushsection .alternative, \"a\"\n" \ ALT_ENTRY("886b", "888f", __stringify(vendor_id), __stringify(errata_id), "889f - 888f") \ @@ -69,7 +69,7 @@ "886 :\n" \ old_c "\n" \ "887 :\n" \ - ALT_NEW_CONSTENT(vendor_id, errata_id, enable, new_c) + ALT_NEW_CONTENT(vendor_id, errata_id, enable, new_c) #define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \ __ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, IS_ENABLED(CONFIG_k)) -- cgit v1.2.3 From 2398ce80152aae33b9501ef54452e09e8e8d4262 Mon Sep 17 00:00:00 2001 From: Tor Vic Date: Thu, 10 Jun 2021 20:58:06 +0000 Subject: x86, lto: Pass -stack-alignment only on LLD < 13.0.0 Since LLVM commit 3787ee4, the '-stack-alignment' flag has been dropped [1], leading to the following error message when building a LTO kernel with Clang-13 and LLD-13: ld.lld: error: -plugin-opt=-: ld.lld: Unknown command line argument '-stack-alignment=8'. Try 'ld.lld --help' ld.lld: Did you mean '--stackrealign=8'? It also appears that the '-code-model' flag is not necessary anymore starting with LLVM-9 [2]. Drop '-code-model' and make '-stack-alignment' conditional on LLD < 13.0.0. These flags were necessary because these flags were not encoded in the IR properly, so the link would restart optimizations without them. Now there are properly encoded in the IR, and these flags exposing implementation details are no longer necessary. [1] https://reviews.llvm.org/D103048 [2] https://reviews.llvm.org/D52322 Cc: stable@vger.kernel.org Link: https://github.com/ClangBuiltLinux/linux/issues/1377 Signed-off-by: Tor Vic Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/f2c018ee-5999-741e-58d4-e482d5246067@mailbox.org --- arch/x86/Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 307529417021..cb5e8d39cac1 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -200,8 +200,9 @@ endif KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE) ifdef CONFIG_LTO_CLANG -KBUILD_LDFLAGS += -plugin-opt=-code-model=kernel \ - -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8) +ifeq ($(shell test $(CONFIG_LLD_VERSION) -lt 130000; echo $$?),0) +KBUILD_LDFLAGS += -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8) +endif endif ifdef CONFIG_X86_NEED_RELOCS -- cgit v1.2.3 From 0ddd7eaffa644baa78e247bbd220ab7195b1eed6 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Fri, 4 Jun 2021 14:06:39 +0200 Subject: riscv: Fix BUILTIN_DTB for sifive and microchip soc Fix BUILTIN_DTB config which resulted in a dtb that was actually not built into the Linux image: in the same manner as Canaan soc does, create an object file from the dtb file that will get linked into the Linux image. Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/dts/microchip/Makefile | 1 + arch/riscv/boot/dts/sifive/Makefile | 1 + 2 files changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/riscv/boot/dts/microchip/Makefile b/arch/riscv/boot/dts/microchip/Makefile index 622b12771fd3..855c1502d912 100644 --- a/arch/riscv/boot/dts/microchip/Makefile +++ b/arch/riscv/boot/dts/microchip/Makefile @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0 dtb-$(CONFIG_SOC_MICROCHIP_POLARFIRE) += microchip-mpfs-icicle-kit.dtb +obj-$(CONFIG_BUILTIN_DTB) += $(addsuffix .o, $(dtb-y)) diff --git a/arch/riscv/boot/dts/sifive/Makefile b/arch/riscv/boot/dts/sifive/Makefile index 74c47fe9fc22..d90e4eb0ade8 100644 --- a/arch/riscv/boot/dts/sifive/Makefile +++ b/arch/riscv/boot/dts/sifive/Makefile @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 dtb-$(CONFIG_SOC_SIFIVE) += hifive-unleashed-a00.dtb \ hifive-unmatched-a00.dtb +obj-$(CONFIG_BUILTIN_DTB) += $(addsuffix .o, $(dtb-y)) -- cgit v1.2.3 From 5d2388dbf84adebeb6d9742164be8d32728e4269 Mon Sep 17 00:00:00 2001 From: Khem Raj Date: Sun, 6 Jun 2021 15:09:40 -0700 Subject: riscv32: Use medany C model for modules When CONFIG_CMODEL_MEDLOW is used it ends up generating riscv_hi20_rela relocations in modules which are not resolved during runtime and following errors would be seen [ 4.802714] virtio_input: target 00000000c1539090 can not be addressed by the 32-bit offset from PC = 39148b7b [ 4.854800] virtio_input: target 00000000c1539090 can not be addressed by the 32-bit offset from PC = 9774456d Signed-off-by: Khem Raj Signed-off-by: Palmer Dabbelt --- arch/riscv/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 4be020695428..99ecd8bcfd77 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -16,7 +16,7 @@ ifeq ($(CONFIG_DYNAMIC_FTRACE),y) CC_FLAGS_FTRACE := -fpatchable-function-entry=8 endif -ifeq ($(CONFIG_64BIT)$(CONFIG_CMODEL_MEDLOW),yy) +ifeq ($(CONFIG_CMODEL_MEDLOW),y) KBUILD_CFLAGS_MODULE += -mcmodel=medany endif -- cgit v1.2.3 From 01f5315dd7327b53a5f538b74a2338a651b1832d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 22 May 2021 14:20:36 -0700 Subject: riscv: sifive: fix Kconfig errata warning The SOC_SIFIVE Kconfig entry unconditionally selects ERRATA_SIFIVE. However, ERRATA_SIFIVE depends on RISCV_ERRATA_ALTERNATIVE, which is not set, so SOC_SIFIVE should either depend on or select RISCV_ERRATA_ALTERNATIVE. Use 'select' here to quieten the Kconfig warning. WARNING: unmet direct dependencies detected for ERRATA_SIFIVE Depends on [n]: RISCV_ERRATA_ALTERNATIVE [=n] Selected by [y]: - SOC_SIFIVE [=y] Fixes: 1a0e5dbd3723 ("riscv: sifive: Add SiFive alternative ports") Signed-off-by: Randy Dunlap Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: linux-riscv@lists.infradead.org Cc: Vincent Chen Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig.socs | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index ed963761fbd2..30676ebb16eb 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -14,6 +14,7 @@ config SOC_SIFIVE select CLK_SIFIVE select CLK_SIFIVE_PRCI select SIFIVE_PLIC + select RISCV_ERRATA_ALTERNATIVE select ERRATA_SIFIVE help This enables support for SiFive SoC platform hardware. -- cgit v1.2.3 From e41d6c3f4f9b4804e53ca87aba8ee11ada606c77 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 8 Jun 2021 23:46:05 +1000 Subject: powerpc/signal64: Copy siginfo before changing regs->nip In commit 96d7a4e06fab ("powerpc/signal64: Rewrite handle_rt_signal64() to minimise uaccess switches") the 64-bit signal code was rearranged to use user_write_access_begin/end(). As part of that change the call to copy_siginfo_to_user() was moved later in the function, so that it could be done after the user_write_access_end(). In particular it was moved after we modify regs->nip to point to the signal trampoline. That means if copy_siginfo_to_user() fails we exit handle_rt_signal64() with an error but with regs->nip modified, whereas previously we would not modify regs->nip until the copy succeeded. Returning an error from signal delivery but with regs->nip updated leaves the process in a sort of half-delivered state. We do immediately force a SEGV in signal_setup_done(), called from do_signal(), so the process should never run in the half-delivered state. However that SEGV is not delivered until we've gone around to do_notify_resume() again, so it's possible some tracing could observe the half-delivered state. There are other cases where we fail signal delivery with regs partly updated, eg. the write to newsp and SA_SIGINFO, but the latter at least is very unlikely to fail as it reads back from the frame we just wrote to. Looking at other arches they seem to be more careful about leaving regs unchanged until the copy operations have succeeded, and in general that seems like good hygenie. So although the current behaviour is not cleary buggy, it's also not clearly correct. So move the call to copy_siginfo_to_user() up prior to the modification of regs->nip, which is closer to the old behaviour, and easier to reason about. Fixes: 96d7a4e06fab ("powerpc/signal64: Rewrite handle_rt_signal64() to minimise uaccess switches") Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210608134605.2783677-1-mpe@ellerman.id.au --- arch/powerpc/kernel/signal_64.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index dca66481d0c2..f9e1f5428b9e 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -902,6 +902,10 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, unsafe_copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set), badframe_block); user_write_access_end(); + /* Save the siginfo outside of the unsafe block. */ + if (copy_siginfo_to_user(&frame->info, &ksig->info)) + goto badframe; + /* Make sure signal handler doesn't get spurious FP exceptions */ tsk->thread.fp_state.fpscr = 0; @@ -915,11 +919,6 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, regs->nip = (unsigned long) &frame->tramp[0]; } - - /* Save the siginfo outside of the unsafe block. */ - if (copy_siginfo_to_user(&frame->info, &ksig->info)) - goto badframe; - /* Allocate a dummy caller frame for the signal handler. */ newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE; err |= put_user(regs->gpr[1], (unsigned long __user *)newsp); -- cgit v1.2.3 From 478036c4cd1a16e613a2f883d79c03cf187faacb Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 14 Jun 2021 23:14:40 +1000 Subject: powerpc: Fix initrd corruption with relative jump labels Commit b0b3b2c78ec0 ("powerpc: Switch to relative jump labels") switched us to using relative jump labels. That involves changing the code, target and key members in struct jump_entry to be relative to the address of the jump_entry, rather than absolute addresses. We have two static inlines that create a struct jump_entry, arch_static_branch() and arch_static_branch_jump(), as well as an asm macro ARCH_STATIC_BRANCH, which is used by the pseries-only hypervisor tracing code. Unfortunately we missed updating the key to be a relative reference in ARCH_STATIC_BRANCH. That causes a pseries kernel to have a handful of jump_entry structs with bad key values. Instead of being a relative reference they instead hold the full address of the key. However the code doesn't expect that, it still adds the key value to the address of the jump_entry (see jump_entry_key()) expecting to get a pointer to a key somewhere in kernel data. The table of jump_entry structs sits in rodata, which comes after the kernel text. In a typical build this will be somewhere around 15MB. The address of the key will be somewhere in data, typically around 20MB. Adding the two values together gets us a pointer somewhere around 45MB. We then call static_key_set_entries() with that bad pointer and modify some members of the struct static_key we think we are pointing at. A pseries kernel is typically ~30MB in size, so writing to ~45MB won't corrupt the kernel itself. However if we're booting with an initrd, depending on the size and exact location of the initrd, we can corrupt the initrd. Depending on how exactly we corrupt the initrd it can either cause the system to not boot, or just corrupt one of the files in the initrd. The fix is simply to make the key value relative to the jump_entry struct in the ARCH_STATIC_BRANCH macro. Fixes: b0b3b2c78ec0 ("powerpc: Switch to relative jump labels") Reported-by: Anastasia Kovaleva Reported-by: Roman Bolshakov Reported-by: Greg Kurz Reported-by: Daniel Axtens Signed-off-by: Michael Ellerman Tested-by: Daniel Axtens Tested-by: Greg Kurz Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210614131440.312360-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/jump_label.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index 2d5c6bec2b4f..93ce3ec25387 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h @@ -50,7 +50,7 @@ l_yes: 1098: nop; \ .pushsection __jump_table, "aw"; \ .long 1098b - ., LABEL - .; \ - FTR_ENTRY_LONG KEY; \ + FTR_ENTRY_LONG KEY - .; \ .popsection #endif -- cgit v1.2.3 From 4692bc775d2180a937335ccba0edce557103d44a Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Tue, 15 Jun 2021 22:16:39 +1200 Subject: x86/sgx: Add missing xa_destroy() when virtual EPC is destroyed xa_destroy() needs to be called to destroy a virtual EPC's page array before calling kfree() to free the virtual EPC. Currently it is not called so add the missing xa_destroy(). Fixes: 540745ddbc70 ("x86/sgx: Introduce virtual EPC for use by KVM guests") Signed-off-by: Kai Huang Signed-off-by: Borislav Petkov Acked-by: Dave Hansen Tested-by: Yang Zhong Link: https://lkml.kernel.org/r/20210615101639.291929-1-kai.huang@intel.com --- arch/x86/kernel/cpu/sgx/virt.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/sgx/virt.c b/arch/x86/kernel/cpu/sgx/virt.c index 6ad165a5c0cc..64511c4a5200 100644 --- a/arch/x86/kernel/cpu/sgx/virt.c +++ b/arch/x86/kernel/cpu/sgx/virt.c @@ -212,6 +212,7 @@ static int sgx_vepc_release(struct inode *inode, struct file *file) list_splice_tail(&secs_pages, &zombie_secs_pages); mutex_unlock(&zombie_secs_pages_lock); + xa_destroy(&vepc->page_array); kfree(vepc); return 0; -- cgit v1.2.3 From 60b7ed54a41b550d50caf7f2418db4a7e75b5bdc Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 17 Jun 2021 13:55:06 -0400 Subject: powerpc/perf: Fix crash in perf_instruction_pointer() when ppmu is not set On systems without any specific PMU driver support registered, running perf record causes Oops. The relevant portion from call trace: BUG: Kernel NULL pointer dereference on read at 0x00000040 Faulting instruction address: 0xc0021f0c Oops: Kernel access of bad area, sig: 11 [#1] BE PAGE_SIZE=4K PREEMPT CMPCPRO SAF3000 DIE NOTIFICATION CPU: 0 PID: 442 Comm: null_syscall Not tainted 5.13.0-rc6-s3k-dev-01645-g7649ee3d2957 #5164 NIP: c0021f0c LR: c00e8ad8 CTR: c00d8a5c NIP perf_instruction_pointer+0x10/0x60 LR perf_prepare_sample+0x344/0x674 Call Trace: perf_prepare_sample+0x7c/0x674 (unreliable) perf_event_output_forward+0x3c/0x94 __perf_event_overflow+0x74/0x14c perf_swevent_hrtimer+0xf8/0x170 __hrtimer_run_queues.constprop.0+0x160/0x318 hrtimer_interrupt+0x148/0x3b0 timer_interrupt+0xc4/0x22c Decrementer_virt+0xb8/0xbc During perf record session, perf_instruction_pointer() is called to capture the sample IP. This function in core-book3s accesses ppmu->flags. If a platform specific PMU driver is not registered, ppmu is set to NULL and accessing its members results in a crash. Fix this crash by checking if ppmu is set. Fixes: 2ca13a4cc56c ("powerpc/perf: Use regs->nip when SIAR is zero") Cc: stable@vger.kernel.org # v5.11+ Reported-by: Christophe Leroy Signed-off-by: Athira Rajeev Tested-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1623952506-1431-1-git-send-email-atrajeev@linux.vnet.ibm.com --- arch/powerpc/perf/core-book3s.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 16d4d1b6a1ff..51622411a7cc 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -2254,7 +2254,7 @@ unsigned long perf_instruction_pointer(struct pt_regs *regs) bool use_siar = regs_use_siar(regs); unsigned long siar = mfspr(SPRN_SIAR); - if (ppmu->flags & PPMU_P10_DD1) { + if (ppmu && (ppmu->flags & PPMU_P10_DD1)) { if (siar) return siar; else -- cgit v1.2.3 From cacf994a91d3a55c0c2f853d6429cd7b86113915 Mon Sep 17 00:00:00 2001 From: Mikel Rychliski Date: Fri, 11 Jun 2021 17:48:23 -0400 Subject: PCI: Add AMD RS690 quirk to enable 64-bit DMA Although the AMD RS690 chipset has 64-bit DMA support, BIOS implementations sometimes fail to configure the memory limit registers correctly. The Acer F690GVM mainboard uses this chipset and a Marvell 88E8056 NIC. The sky2 driver programs the NIC to use 64-bit DMA, which will not work: sky2 0000:02:00.0: error interrupt status=0x8 sky2 0000:02:00.0 eth0: tx timeout sky2 0000:02:00.0 eth0: transmit ring 0 .. 22 report=0 done=0 Other drivers required by this mainboard either don't support 64-bit DMA, or have it disabled using driver specific quirks. For example, the ahci driver has quirks to enable or disable 64-bit DMA depending on the BIOS version (see ahci_sb600_enable_64bit() in ahci.c). This ahci quirk matches against the SB600 SATA controller, but the real issue is almost certainly with the RS690 PCI host that it was commonly attached to. To avoid this issue in all drivers with 64-bit DMA support, fix the configuration of the PCI host. If the kernel is aware of physical memory above 4GB, but the BIOS never configured the PCI host with this information, update the registers with our values. [bhelgaas: drop PCI_DEVICE_ID_ATI_RS690 definition] Link: https://lore.kernel.org/r/20210611214823.4898-1-mikel@mikelr.com Signed-off-by: Mikel Rychliski Signed-off-by: Bjorn Helgaas --- arch/x86/pci/fixup.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'arch') diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 02dc64625e64..2edd86649468 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -779,4 +779,48 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar); DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar); DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar); +#define RS690_LOWER_TOP_OF_DRAM2 0x30 +#define RS690_LOWER_TOP_OF_DRAM2_VALID 0x1 +#define RS690_UPPER_TOP_OF_DRAM2 0x31 +#define RS690_HTIU_NB_INDEX 0xA8 +#define RS690_HTIU_NB_INDEX_WR_ENABLE 0x100 +#define RS690_HTIU_NB_DATA 0xAC + +/* + * Some BIOS implementations support RAM above 4GB, but do not configure the + * PCI host to respond to bus master accesses for these addresses. These + * implementations set the TOP_OF_DRAM_SLOT1 register correctly, so PCI DMA + * works as expected for addresses below 4GB. + * + * Reference: "AMD RS690 ASIC Family Register Reference Guide" (pg. 2-57) + * https://www.amd.com/system/files/TechDocs/43372_rs690_rrg_3.00o.pdf + */ +static void rs690_fix_64bit_dma(struct pci_dev *pdev) +{ + u32 val = 0; + phys_addr_t top_of_dram = __pa(high_memory - 1) + 1; + + if (top_of_dram <= (1ULL << 32)) + return; + + pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX, + RS690_LOWER_TOP_OF_DRAM2); + pci_read_config_dword(pdev, RS690_HTIU_NB_DATA, &val); + + if (val) + return; + + pci_info(pdev, "Adjusting top of DRAM to %pa for 64-bit DMA support\n", &top_of_dram); + + pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX, + RS690_UPPER_TOP_OF_DRAM2 | RS690_HTIU_NB_INDEX_WR_ENABLE); + pci_write_config_dword(pdev, RS690_HTIU_NB_DATA, top_of_dram >> 32); + + pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX, + RS690_LOWER_TOP_OF_DRAM2 | RS690_HTIU_NB_INDEX_WR_ENABLE); + pci_write_config_dword(pdev, RS690_HTIU_NB_DATA, + top_of_dram | RS690_LOWER_TOP_OF_DRAM2_VALID); +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7910, rs690_fix_64bit_dma); + #endif -- cgit v1.2.3 From 28e5e44aa3f4e0e0370864ed008fb5e2d85f4dc8 Mon Sep 17 00:00:00 2001 From: Fan Du Date: Thu, 17 Jun 2021 12:46:57 -0700 Subject: x86/mm: Avoid truncating memblocks for SGX memory tl;dr: Several SGX users reported seeing the following message on NUMA systems: sgx: [Firmware Bug]: Unable to map EPC section to online node. Fallback to the NUMA node 0. This turned out to be the memblock code mistakenly throwing away SGX memory. === Full Changelog === The 'max_pfn' variable represents the highest known RAM address. It can be used, for instance, to quickly determine for which physical addresses there is mem_map[] space allocated. The numa_meminfo code makes an effort to throw out ("trim") all memory blocks which are above 'max_pfn'. SGX memory is not considered RAM (it is marked as "Reserved" in the e820) and is not taken into account by max_pfn. Despite this, SGX memory areas have NUMA affinity and are enumerated in the ACPI SRAT table. The existing SGX code uses the numa_meminfo mechanism to look up the NUMA affinity for its memory areas. In cases where SGX memory was above max_pfn (usually just the one EPC section in the last highest NUMA node), the numa_memblock is truncated at 'max_pfn', which is below the SGX memory. When the SGX code tries to look up the affinity of this memory, it fails and produces an error message: sgx: [Firmware Bug]: Unable to map EPC section to online node. Fallback to the NUMA node 0. and assigns the memory to NUMA node 0. Instead of silently truncating the memory block at 'max_pfn' and dropping the SGX memory, add the truncated portion to 'numa_reserved_meminfo'. This allows the SGX code to later determine the NUMA affinity of its 'Reserved' area. Before, numa_meminfo looked like this (from 'crash'): blk = { start = 0x0, end = 0x2080000000, nid = 0x0 } { start = 0x2080000000, end = 0x4000000000, nid = 0x1 } numa_reserved_meminfo is empty. With this, numa_meminfo looks like this: blk = { start = 0x0, end = 0x2080000000, nid = 0x0 } { start = 0x2080000000, end = 0x4000000000, nid = 0x1 } and numa_reserved_meminfo has an entry for node 1's SGX memory: blk = { start = 0x4000000000, end = 0x4080000000, nid = 0x1 } [ daveh: completely rewrote/reworked changelog ] Fixes: 5d30f92e7631 ("x86/NUMA: Provide a range-to-target_node lookup facility") Reported-by: Reinette Chatre Signed-off-by: Fan Du Signed-off-by: Dave Hansen Signed-off-by: Borislav Petkov Reviewed-by: Jarkko Sakkinen Reviewed-by: Dan Williams Reviewed-by: Dave Hansen Cc: Link: https://lkml.kernel.org/r/20210617194657.0A99CB22@viggo.jf.intel.com --- arch/x86/mm/numa.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 5eb4dc2b97da..e94da744386f 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -254,7 +254,13 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi) /* make sure all non-reserved blocks are inside the limits */ bi->start = max(bi->start, low); - bi->end = min(bi->end, high); + + /* preserve info for non-RAM areas above 'max_pfn': */ + if (bi->end > high) { + numa_add_memblk_to(bi->nid, high, bi->end, + &numa_reserved_meminfo); + bi->end = high; + } /* and there's no empty block */ if (bi->start >= bi->end) -- cgit v1.2.3 From 314b781706e337b8cbde98cfefd3975863e032f2 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Fri, 18 Jun 2021 22:01:36 +0800 Subject: riscv: kasan: Fix MODULES_VADDR evaluation due to local variables' name commit 2bfc6cd81bd1 ("riscv: Move kernel mapping outside of linear mapping") makes use of MODULES_VADDR to populate kernel, BPF, modules mapping. Currently, MODULES_VADDR is defined as below for RV64: | #define MODULES_VADDR (PFN_ALIGN((unsigned long)&_end) - SZ_2G) But kasan_init() has two local variables which are also named as _start, _end, so MODULES_VADDR is evaluated with the local variable _end rather than the global "_end" as we expected. Fix this issue by renaming the two local variables. Fixes: 2bfc6cd81bd1 ("riscv: Move kernel mapping outside of linear mapping") Signed-off-by: Jisheng Zhang Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/kasan_init.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index 9daacae93e33..a0d9e4ace331 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -169,7 +169,7 @@ static void __init kasan_shallow_populate(void *start, void *end) void __init kasan_init(void) { - phys_addr_t _start, _end; + phys_addr_t p_start, p_end; u64 i; /* @@ -189,9 +189,9 @@ void __init kasan_init(void) (void *)kasan_mem_to_shadow((void *)VMALLOC_END)); /* Populate the linear mapping */ - for_each_mem_range(i, &_start, &_end) { - void *start = (void *)__va(_start); - void *end = (void *)__va(_end); + for_each_mem_range(i, &p_start, &p_end) { + void *start = (void *)__va(p_start); + void *end = (void *)__va(p_end); if (start >= end) break; -- cgit v1.2.3 From 3a02764c372c50ff7917fde5c6961f6cdb81d9d5 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Fri, 18 Jun 2021 22:09:13 +0800 Subject: riscv: Ensure BPF_JIT_REGION_START aligned with PMD size Andreas reported commit fc8504765ec5 ("riscv: bpf: Avoid breaking W^X") breaks booting with one kind of defconfig, I reproduced a kernel panic with the defconfig: [ 0.138553] Unable to handle kernel paging request at virtual address ffffffff81201220 [ 0.139159] Oops [#1] [ 0.139303] Modules linked in: [ 0.139601] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.13.0-rc5-default+ #1 [ 0.139934] Hardware name: riscv-virtio,qemu (DT) [ 0.140193] epc : __memset+0xc4/0xfc [ 0.140416] ra : skb_flow_dissector_init+0x1e/0x82 [ 0.140609] epc : ffffffff8029806c ra : ffffffff8033be78 sp : ffffffe001647da0 [ 0.140878] gp : ffffffff81134b08 tp : ffffffe001654380 t0 : ffffffff81201158 [ 0.141156] t1 : 0000000000000002 t2 : 0000000000000154 s0 : ffffffe001647dd0 [ 0.141424] s1 : ffffffff80a43250 a0 : ffffffff81201220 a1 : 0000000000000000 [ 0.141654] a2 : 000000000000003c a3 : ffffffff81201258 a4 : 0000000000000064 [ 0.141893] a5 : ffffffff8029806c a6 : 0000000000000040 a7 : ffffffffffffffff [ 0.142126] s2 : ffffffff81201220 s3 : 0000000000000009 s4 : ffffffff81135088 [ 0.142353] s5 : ffffffff81135038 s6 : ffffffff8080ce80 s7 : ffffffff80800438 [ 0.142584] s8 : ffffffff80bc6578 s9 : 0000000000000008 s10: ffffffff806000ac [ 0.142810] s11: 0000000000000000 t3 : fffffffffffffffc t4 : 0000000000000000 [ 0.143042] t5 : 0000000000000155 t6 : 00000000000003ff [ 0.143220] status: 0000000000000120 badaddr: ffffffff81201220 cause: 000000000000000f [ 0.143560] [] __memset+0xc4/0xfc [ 0.143859] [] init_default_flow_dissectors+0x22/0x60 [ 0.144092] [] do_one_initcall+0x3e/0x168 [ 0.144278] [] kernel_init_freeable+0x1c8/0x224 [ 0.144479] [] kernel_init+0x12/0x110 [ 0.144658] [] ret_from_exception+0x0/0xc [ 0.145124] ---[ end trace f1e9643daa46d591 ]--- After some investigation, I think I found the root cause: commit 2bfc6cd81bd ("move kernel mapping outside of linear mapping") moves BPF JIT region after the kernel: | #define BPF_JIT_REGION_START PFN_ALIGN((unsigned long)&_end) The &_end is unlikely aligned with PMD size, so the front bpf jit region sits with part of kernel .data section in one PMD size mapping. But kernel is mapped in PMD SIZE, when bpf_jit_binary_lock_ro() is called to make the first bpf jit prog ROX, we will make part of kernel .data section RO too, so when we write to, for example memset the .data section, MMU will trigger a store page fault. To fix the issue, we need to ensure the BPF JIT region is PMD size aligned. This patch acchieve this goal by restoring the BPF JIT region to original position, I.E the 128MB before kernel .text section. The modification to kasan_init.c is inspired by Alexandre. Fixes: fc8504765ec5 ("riscv: bpf: Avoid breaking W^X") Reported-by: Andreas Schwab Signed-off-by: Jisheng Zhang Signed-off-by: Palmer Dabbelt --- Documentation/riscv/vm-layout.rst | 4 ++-- arch/riscv/include/asm/pgtable.h | 5 ++--- arch/riscv/mm/kasan_init.c | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/Documentation/riscv/vm-layout.rst b/Documentation/riscv/vm-layout.rst index 329d32098af4..b7f98930d38d 100644 --- a/Documentation/riscv/vm-layout.rst +++ b/Documentation/riscv/vm-layout.rst @@ -58,6 +58,6 @@ RISC-V Linux Kernel SV39 | ____________________________________________________________|____________________________________________________________ | | | | - ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | modules - ffffffff80000000 | -2 GB | ffffffffffffffff | 2 GB | kernel, BPF + ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | modules, BPF + ffffffff80000000 | -2 GB | ffffffffffffffff | 2 GB | kernel __________________|____________|__________________|_________|____________________________________________________________ diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 9469f464e71a..380cd3a7e548 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -30,9 +30,8 @@ #define BPF_JIT_REGION_SIZE (SZ_128M) #ifdef CONFIG_64BIT -/* KASLR should leave at least 128MB for BPF after the kernel */ -#define BPF_JIT_REGION_START PFN_ALIGN((unsigned long)&_end) -#define BPF_JIT_REGION_END (BPF_JIT_REGION_START + BPF_JIT_REGION_SIZE) +#define BPF_JIT_REGION_START (BPF_JIT_REGION_END - BPF_JIT_REGION_SIZE) +#define BPF_JIT_REGION_END (MODULES_END) #else #define BPF_JIT_REGION_START (PAGE_OFFSET - BPF_JIT_REGION_SIZE) #define BPF_JIT_REGION_END (VMALLOC_END) diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index a0d9e4ace331..d7189c8714a9 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -201,7 +201,7 @@ void __init kasan_init(void) /* Populate kernel, BPF, modules mapping */ kasan_populate(kasan_mem_to_shadow((const void *)MODULES_VADDR), - kasan_mem_to_shadow((const void *)BPF_JIT_REGION_END)); + kasan_mem_to_shadow((const void *)MODULES_VADDR + SZ_2G)); for (i = 0; i < PTRS_PER_PTE; i++) set_pte(&kasan_early_shadow_pte[i], -- cgit v1.2.3 From 7ede12b01b59dc67bef2e2035297dd2da5bfe427 Mon Sep 17 00:00:00 2001 From: David Abdurachmanov Date: Sat, 12 Jun 2021 17:43:57 -0700 Subject: riscv: dts: fu740: fix cache-controller interrupts The order of interrupt numbers is incorrect. The order for FU740 is: DirError, DataError, DataFail, DirFail From SiFive FU740-C000 Manual: 19 - L2 Cache DirError 20 - L2 Cache DirFail 21 - L2 Cache DataError 22 - L2 Cache DataFail Signed-off-by: David Abdurachmanov Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/dts/sifive/fu740-c000.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/boot/dts/sifive/fu740-c000.dtsi b/arch/riscv/boot/dts/sifive/fu740-c000.dtsi index 8eef82e4199f..abbb960f90a0 100644 --- a/arch/riscv/boot/dts/sifive/fu740-c000.dtsi +++ b/arch/riscv/boot/dts/sifive/fu740-c000.dtsi @@ -273,7 +273,7 @@ cache-size = <2097152>; cache-unified; interrupt-parent = <&plic0>; - interrupts = <19 20 21 22>; + interrupts = <19 21 22 20>; reg = <0x0 0x2010000 0x0 0x1000>; }; gpio: gpio@10060000 { -- cgit v1.2.3