From 0764e365dacd0b8f75c1736f9236be280649bd18 Mon Sep 17 00:00:00 2001 From: Clément Bœsch Date: Sun, 5 Sep 2021 02:20:27 +0200 Subject: arm64: dts: allwinner: h5: NanoPI Neo 2: Fix ethernet node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RX and TX delay are provided by ethernet PHY. Reflect that in ethernet node. Fixes: 44a94c7ef989 ("arm64: dts: allwinner: H5: Restore EMAC changes") Signed-off-by: Clément Bœsch Reviewed-by: Jernej Skrabec Reviewed-by: Andrew Lunn Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20210905002027.171984-1-u@pkh.me --- arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts index 02f8e72f0cad..05486cccee1c 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts @@ -75,7 +75,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; -- cgit v1.2.3 From 55dd7e059098ce4bd0a55c251cb78e74604abb57 Mon Sep 17 00:00:00 2001 From: Bastien Roucariès Date: Thu, 16 Sep 2021 08:17:21 +0000 Subject: ARM: dts: sun7i: A20-olinuxino-lime2: Fix ethernet phy-mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config") sets the RX/TX delay according to the phy-mode property in the device tree. For the A20-olinuxino-lime2 board this is "rgmii", which is the wrong setting. Following the example of a900cac3750b ("ARM: dts: sun7i: a20: bananapro: Fix ethernet phy-mode") the phy-mode is changed to "rgmii-id" which gets the Ethernet working again on this board. Signed-off-by: Bastien Roucariès Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20210916081721.237137-1-rouca@debian.org --- arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts b/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts index 8077f1716fbc..ecb91fb899ff 100644 --- a/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts +++ b/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts @@ -112,7 +112,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; -- cgit v1.2.3 From 636707e593120c9fa35f6a908c0d052f6154910d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 30 Sep 2021 13:11:20 +0200 Subject: mac80211: mesh: fix HE operation element length check The length check here was bad, if the length doesn't at least include the length of the fixed part, we cannot call ieee80211_he_oper_size() to determine the total. Fix this, and convert to cfg80211_find_ext_elem() while at it. Cc: stable@vger.kernel.org Fixes: 70debba3ab7d ("mac80211: save HE oper info in BSS config for mesh") Link: https://lore.kernel.org/r/20210930131120.b0f940976c56.I954e1be55e9f87cc303165bff5c906afe1e54648@changeid Signed-off-by: Johannes Berg --- net/mac80211/mesh.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 97095b7c9c64..5dcfd53a4ab6 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -672,7 +672,7 @@ ieee80211_mesh_update_bss_params(struct ieee80211_sub_if_data *sdata, u8 *ie, u8 ie_len) { struct ieee80211_supported_band *sband; - const u8 *cap; + const struct element *cap; const struct ieee80211_he_operation *he_oper = NULL; sband = ieee80211_get_sband(sdata); @@ -687,9 +687,10 @@ ieee80211_mesh_update_bss_params(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.he_support = true; - cap = cfg80211_find_ext_ie(WLAN_EID_EXT_HE_OPERATION, ie, ie_len); - if (cap && cap[1] >= ieee80211_he_oper_size(&cap[3])) - he_oper = (void *)(cap + 3); + cap = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_OPERATION, ie, ie_len); + if (cap && cap->datalen >= 1 + sizeof(*he_oper) && + cap->datalen >= 1 + ieee80211_he_oper_size(cap->data + 1)) + he_oper = (void *)(cap->data + 1); if (he_oper) sdata->vif.bss_conf.he_oper.params = -- cgit v1.2.3 From a2083eeb119fb9307258baea9b7c243ca9a2e0b6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 30 Sep 2021 13:11:21 +0200 Subject: cfg80211: scan: fix RCU in cfg80211_add_nontrans_list() The SSID pointer is pointing to RCU protected data, so we need to have it under rcu_read_lock() for the entire use. Fix this. Cc: stable@vger.kernel.org Fixes: 0b8fb8235be8 ("cfg80211: Parsing of Multiple BSSID information in scanning") Link: https://lore.kernel.org/r/20210930131120.6ddfc603aa1d.I2137344c4e2426525b1a8e4ce5fca82f8ecbfe7e@changeid Signed-off-by: Johannes Berg --- net/wireless/scan.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 11c68b159324..adc0d14cfd86 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -418,14 +418,17 @@ cfg80211_add_nontrans_list(struct cfg80211_bss *trans_bss, } ssid_len = ssid[1]; ssid = ssid + 2; - rcu_read_unlock(); /* check if nontrans_bss is in the list */ list_for_each_entry(bss, &trans_bss->nontrans_list, nontrans_list) { - if (is_bss(bss, nontrans_bss->bssid, ssid, ssid_len)) + if (is_bss(bss, nontrans_bss->bssid, ssid, ssid_len)) { + rcu_read_unlock(); return 0; + } } + rcu_read_unlock(); + /* add to the list */ list_add_tail(&nontrans_bss->nontrans_list, &trans_bss->nontrans_list); return 0; -- cgit v1.2.3 From f33eb7f29c16ba78db3221ee02346fd832274cdd Mon Sep 17 00:00:00 2001 From: Jim Quinlan Date: Tue, 14 Sep 2021 15:11:21 -0700 Subject: reset: brcmstb-rescal: fix incorrect polarity of status bit The readl_poll_timeout() should complete when the status bit is a 1, not 0. Fixes: 4cf176e52397 ("reset: Add Broadcom STB RESCAL reset controller") Signed-off-by: Jim Quinlan Signed-off-by: Florian Fainelli Link: https://lore.kernel.org/r/20210914221122.62315-1-f.fainelli@gmail.com Signed-off-by: Philipp Zabel --- drivers/reset/reset-brcmstb-rescal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/reset/reset-brcmstb-rescal.c b/drivers/reset/reset-brcmstb-rescal.c index b6f074d6a65f..433fa0c40e47 100644 --- a/drivers/reset/reset-brcmstb-rescal.c +++ b/drivers/reset/reset-brcmstb-rescal.c @@ -38,7 +38,7 @@ static int brcm_rescal_reset_set(struct reset_controller_dev *rcdev, } ret = readl_poll_timeout(base + BRCM_RESCAL_STATUS, reg, - !(reg & BRCM_RESCAL_STATUS_BIT), 100, 1000); + (reg & BRCM_RESCAL_STATUS_BIT), 100, 1000); if (ret) { dev_err(data->dev, "time out on SATA/PCIe rescal\n"); return ret; -- cgit v1.2.3 From 4af160707d7197d671a5b0ad9899383b6cb7c067 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 14 Sep 2021 11:15:59 +0200 Subject: reset: pistachio: Re-enable driver selection After the retirement of MACH_PISTACHIO, the Pistachio Reset Driver is no longer auto-enabled when building a kernel for Pistachio systems. Worse, the driver cannot be enabled by the user at all (unless compile-testing), as the config symbol is invisible. Fix this partially by making the symbol visible again when compiling for MIPS, and dropping the useless default. The user still has to enable the driver manually when building a kernel for Pistachio systems, though. Fixes: 104f942b2832ab13 ("MIPS: Retire MACH_PISTACHIO") Signed-off-by: Geert Uytterhoeven Reviewed-by: Rahul Bedarkar Reviewed-by: Jiaxun Yang Link: https://lore.kernel.org/r/2c399e52540536df9c4006e46ef93fbccdde88db.1631610825.git.geert+renesas@glider.be Signed-off-by: Philipp Zabel --- drivers/reset/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/reset/Kconfig b/drivers/reset/Kconfig index be799a5abf8a..b0056ae5d463 100644 --- a/drivers/reset/Kconfig +++ b/drivers/reset/Kconfig @@ -147,8 +147,8 @@ config RESET_OXNAS bool config RESET_PISTACHIO - bool "Pistachio Reset Driver" if COMPILE_TEST - default MACH_PISTACHIO + bool "Pistachio Reset Driver" + depends on MIPS || COMPILE_TEST help This enables the reset driver for ImgTec Pistachio SoCs. -- cgit v1.2.3 From c045ceb5a145d2a9a4bf33cbc55185ddf99f60ab Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Wed, 15 Sep 2021 11:55:14 +0300 Subject: reset: tegra-bpmp: Handle errors in BPMP response The return value from tegra_bpmp_transfer indicates the success or failure of the IPC transaction with BPMP. If the transaction succeeded, we also need to check the actual command's result code. Add code to do this. Signed-off-by: Mikko Perttunen Link: https://lore.kernel.org/r/20210915085517.1669675-2-mperttunen@nvidia.com Signed-off-by: Philipp Zabel --- drivers/reset/tegra/reset-bpmp.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/reset/tegra/reset-bpmp.c b/drivers/reset/tegra/reset-bpmp.c index 24d3395964cc..4c5bba52b105 100644 --- a/drivers/reset/tegra/reset-bpmp.c +++ b/drivers/reset/tegra/reset-bpmp.c @@ -20,6 +20,7 @@ static int tegra_bpmp_reset_common(struct reset_controller_dev *rstc, struct tegra_bpmp *bpmp = to_tegra_bpmp(rstc); struct mrq_reset_request request; struct tegra_bpmp_message msg; + int err; memset(&request, 0, sizeof(request)); request.cmd = command; @@ -30,7 +31,13 @@ static int tegra_bpmp_reset_common(struct reset_controller_dev *rstc, msg.tx.data = &request; msg.tx.size = sizeof(request); - return tegra_bpmp_transfer(bpmp, &msg); + err = tegra_bpmp_transfer(bpmp, &msg); + if (err) + return err; + if (msg.rx.ret) + return -EINVAL; + + return 0; } static int tegra_bpmp_reset_module(struct reset_controller_dev *rstc, -- cgit v1.2.3 From 3ad60b4b3570937f3278509fe6797a5093ce53f8 Mon Sep 17 00:00:00 2001 From: Paweł Anikiel Date: Mon, 20 Sep 2021 14:41:41 +0200 Subject: reset: socfpga: add empty driver allowing consumers to probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The early reset driver doesn't ever probe, which causes consuming devices to be unable to probe. Add an empty driver to set this device as available, allowing consumers to probe. Signed-off-by: Paweł Anikiel Link: https://lore.kernel.org/r/20210920124141.1166544-4-pan@semihalf.com Signed-off-by: Philipp Zabel --- drivers/reset/reset-socfpga.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/reset/reset-socfpga.c b/drivers/reset/reset-socfpga.c index 2a72f861f798..8c6492e5693c 100644 --- a/drivers/reset/reset-socfpga.c +++ b/drivers/reset/reset-socfpga.c @@ -92,3 +92,29 @@ void __init socfpga_reset_init(void) for_each_matching_node(np, socfpga_early_reset_dt_ids) a10_reset_init(np); } + +/* + * The early driver is problematic, because it doesn't register + * itself as a driver. This causes certain device links to prevent + * consumer devices from probing. The hacky solution is to register + * an empty driver, whose only job is to attach itself to the reset + * manager and call probe. + */ +static const struct of_device_id socfpga_reset_dt_ids[] = { + { .compatible = "altr,rst-mgr", }, + { /* sentinel */ }, +}; + +static int reset_simple_probe(struct platform_device *pdev) +{ + return 0; +} + +static struct platform_driver reset_socfpga_driver = { + .probe = reset_simple_probe, + .driver = { + .name = "socfpga-reset", + .of_match_table = socfpga_reset_dt_ids, + }, +}; +builtin_platform_driver(reset_socfpga_driver); -- cgit v1.2.3 From e93c7d8e8c4cf80c6afe56e71c83c1cd31b4fce1 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Tue, 5 Oct 2021 13:23:02 -0500 Subject: RDMA/irdma: Process extended CQ entries correctly The valid bit for extended CQE's written by HW is retrieved from the incorrect quad-word. This leads to missed completions for any UD traffic particularly after a wrap-around. Get the valid bit for extended CQE's from the correct quad-word in the descriptor. Fixes: 551c46edc769 ("RDMA/irdma: Add user/kernel shared libraries") Link: https://lore.kernel.org/r/20211005182302.374-1-shiraz.saleem@intel.com Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/uk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c index 5fb92de1f015..9b544a3b1288 100644 --- a/drivers/infiniband/hw/irdma/uk.c +++ b/drivers/infiniband/hw/irdma/uk.c @@ -1092,12 +1092,12 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, struct irdma_cq_poll_info *info) if (cq->avoid_mem_cflct) { ext_cqe = (__le64 *)((u8 *)cqe + 32); get_64bit_val(ext_cqe, 24, &qword7); - polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3); + polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword7); } else { peek_head = (cq->cq_ring.head + 1) % cq->cq_ring.size; ext_cqe = cq->cq_base[peek_head].buf; get_64bit_val(ext_cqe, 24, &qword7); - polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3); + polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword7); if (!peek_head) polarity ^= 1; } -- cgit v1.2.3 From 1ab52ac1e9bc9391f592c9fa8340a6e3e9c36286 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Wed, 6 Oct 2021 12:31:53 +0300 Subject: RDMA/mlx5: Set user priority for DCT Currently, the driver doesn't set the PCP-based priority for DCT, hence DCT response packets are transmitted without user priority. Fix it by setting user provided priority in the eth_prio field in the DCT context, which in turn sets the value in the transmitted packet. Fixes: 776a3906b692 ("IB/mlx5: Add support for DC target QP") Link: https://lore.kernel.org/r/5fd2d94a13f5742d8803c218927322257d53205c.1633512672.git.leonro@nvidia.com Signed-off-by: Patrisious Haddad Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index b2fca110346c..e5abbcfc1d57 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4458,6 +4458,8 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, MLX5_SET(dctc, dctc, mtu, attr->path_mtu); MLX5_SET(dctc, dctc, my_addr_index, attr->ah_attr.grh.sgid_index); MLX5_SET(dctc, dctc, hop_limit, attr->ah_attr.grh.hop_limit); + if (attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE) + MLX5_SET(dctc, dctc, eth_prio, attr->ah_attr.sl & 0x7); err = mlx5_core_create_dct(dev, &qp->dct.mdct, qp->dct.in, MLX5_ST_SZ_BYTES(create_dct_in), out, -- cgit v1.2.3 From 4217d07b9fb328751f877d3bd9550122014860a2 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Mon, 4 Oct 2021 10:49:35 +0800 Subject: mmc: sdhci: Map more voltage level to SDHCI_POWER_330 On Thundercomm TurboX CM2290, the eMMC OCR reports vdd = 23 (3.5 ~ 3.6 V), which is being treated as an invalid value by sdhci_set_power_noreg(). And thus eMMC is totally broken on the platform. [ 1.436599] ------------[ cut here ]------------ [ 1.436606] mmc0: Invalid vdd 0x17 [ 1.436640] WARNING: CPU: 2 PID: 69 at drivers/mmc/host/sdhci.c:2048 sdhci_set_power_noreg+0x168/0x2b4 [ 1.436655] Modules linked in: [ 1.436662] CPU: 2 PID: 69 Comm: kworker/u8:1 Tainted: G W 5.15.0-rc1+ #137 [ 1.436669] Hardware name: Thundercomm TurboX CM2290 (DT) [ 1.436674] Workqueue: events_unbound async_run_entry_fn [ 1.436685] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 1.436692] pc : sdhci_set_power_noreg+0x168/0x2b4 [ 1.436698] lr : sdhci_set_power_noreg+0x168/0x2b4 [ 1.436703] sp : ffff800010803a60 [ 1.436705] x29: ffff800010803a60 x28: ffff6a9102465f00 x27: ffff6a9101720a70 [ 1.436715] x26: ffff6a91014de1c0 x25: ffff6a91014de010 x24: ffff6a91016af280 [ 1.436724] x23: ffffaf7b1b276640 x22: 0000000000000000 x21: ffff6a9101720000 [ 1.436733] x20: ffff6a9101720370 x19: ffff6a9101720580 x18: 0000000000000020 [ 1.436743] x17: 0000000000000000 x16: 0000000000000004 x15: ffffffffffffffff [ 1.436751] x14: 0000000000000000 x13: 00000000fffffffd x12: ffffaf7b1b84b0bc [ 1.436760] x11: ffffaf7b1b720d10 x10: 000000000000000a x9 : ffff800010803a60 [ 1.436769] x8 : 000000000000000a x7 : 000000000000000f x6 : 00000000fffff159 [ 1.436778] x5 : 0000000000000000 x4 : 0000000000000000 x3 : 00000000ffffffff [ 1.436787] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff6a9101718d80 [ 1.436797] Call trace: [ 1.436800] sdhci_set_power_noreg+0x168/0x2b4 [ 1.436805] sdhci_set_ios+0xa0/0x7fc [ 1.436811] mmc_power_up.part.0+0xc4/0x164 [ 1.436818] mmc_start_host+0xa0/0xb0 [ 1.436824] mmc_add_host+0x60/0x90 [ 1.436830] __sdhci_add_host+0x174/0x330 [ 1.436836] sdhci_msm_probe+0x7c0/0x920 [ 1.436842] platform_probe+0x68/0xe0 [ 1.436850] really_probe.part.0+0x9c/0x31c [ 1.436857] __driver_probe_device+0x98/0x144 [ 1.436863] driver_probe_device+0xc8/0x15c [ 1.436869] __device_attach_driver+0xb4/0x120 [ 1.436875] bus_for_each_drv+0x78/0xd0 [ 1.436881] __device_attach_async_helper+0xac/0xd0 [ 1.436888] async_run_entry_fn+0x34/0x110 [ 1.436895] process_one_work+0x1d0/0x354 [ 1.436903] worker_thread+0x13c/0x470 [ 1.436910] kthread+0x150/0x160 [ 1.436915] ret_from_fork+0x10/0x20 [ 1.436923] ---[ end trace fcfac44cb045c3a8 ]--- Fix the issue by mapping MMC_VDD_35_36 (and MMC_VDD_34_35) to SDHCI_POWER_330 as well. Signed-off-by: Shawn Guo Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211004024935.15326-1-shawn.guo@linaro.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 8eefa7d5fe85..2d80a04e11d8 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2042,6 +2042,12 @@ void sdhci_set_power_noreg(struct sdhci_host *host, unsigned char mode, break; case MMC_VDD_32_33: case MMC_VDD_33_34: + /* + * 3.4 ~ 3.6V are valid only for those platforms where it's + * known that the voltage range is supported by hardware. + */ + case MMC_VDD_34_35: + case MMC_VDD_35_36: pwr = SDHCI_POWER_330; break; default: -- cgit v1.2.3 From 55e6d8037805b3400096d621091dfbf713f97e83 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 12 Oct 2021 10:37:35 +0800 Subject: regmap: Fix possible double-free in regcache_rbtree_exit() In regcache_rbtree_insert_to_block(), when 'present' realloc failed, the 'blk' which is supposed to assign to 'rbnode->block' will be freed, so 'rbnode->block' points a freed memory, in the error handling path of regcache_rbtree_init(), 'rbnode->block' will be freed again in regcache_rbtree_exit(), KASAN will report double-free as follows: BUG: KASAN: double-free or invalid-free in kfree+0xce/0x390 Call Trace: slab_free_freelist_hook+0x10d/0x240 kfree+0xce/0x390 regcache_rbtree_exit+0x15d/0x1a0 regcache_rbtree_init+0x224/0x2c0 regcache_init+0x88d/0x1310 __regmap_init+0x3151/0x4a80 __devm_regmap_init+0x7d/0x100 madera_spi_probe+0x10f/0x333 [madera_spi] spi_probe+0x183/0x210 really_probe+0x285/0xc30 To fix this, moving up the assignment of rbnode->block to immediately after the reallocation has succeeded so that the data structure stays valid even if the second reallocation fails. Reported-by: Hulk Robot Fixes: 3f4ff561bc88b ("regmap: rbtree: Make cache_present bitmap per node") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20211012023735.1632786-1-yangyingliang@huawei.com Signed-off-by: Mark Brown --- drivers/base/regmap/regcache-rbtree.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/base/regmap/regcache-rbtree.c b/drivers/base/regmap/regcache-rbtree.c index cfa29dc89bbf..fabf87058d80 100644 --- a/drivers/base/regmap/regcache-rbtree.c +++ b/drivers/base/regmap/regcache-rbtree.c @@ -281,14 +281,14 @@ static int regcache_rbtree_insert_to_block(struct regmap *map, if (!blk) return -ENOMEM; + rbnode->block = blk; + if (BITS_TO_LONGS(blklen) > BITS_TO_LONGS(rbnode->blklen)) { present = krealloc(rbnode->cache_present, BITS_TO_LONGS(blklen) * sizeof(*present), GFP_KERNEL); - if (!present) { - kfree(blk); + if (!present) return -ENOMEM; - } memset(present + BITS_TO_LONGS(rbnode->blklen), 0, (BITS_TO_LONGS(blklen) - BITS_TO_LONGS(rbnode->blklen)) @@ -305,7 +305,6 @@ static int regcache_rbtree_insert_to_block(struct regmap *map, } /* update the rbnode block, its size and the base register */ - rbnode->block = blk; rbnode->blklen = blklen; rbnode->base_reg = base_reg; rbnode->cache_present = present; -- cgit v1.2.3 From d39bf40e55e666b5905fdbd46a0dced030ce87be Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Tue, 12 Oct 2021 13:55:19 -0400 Subject: IB/qib: Protect from buffer overflow in struct qib_user_sdma_pkt fields Overflowing either addrlimit or bytes_togo can allow userspace to trigger a buffer overflow of kernel memory. Check for overflows in all the places doing math on user controlled buffers. Fixes: f931551bafe1 ("IB/qib: Add new qib driver for QLogic PCIe InfiniBand adapters") Link: https://lore.kernel.org/r/20211012175519.7298.77738.stgit@awfm-01.cornelisnetworks.com Reported-by: Ilja Van Sprundel Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_user_sdma.c | 33 +++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c index a67599b5a550..ac11943a5ddb 100644 --- a/drivers/infiniband/hw/qib/qib_user_sdma.c +++ b/drivers/infiniband/hw/qib/qib_user_sdma.c @@ -602,7 +602,7 @@ done: /* * How many pages in this iovec element? */ -static int qib_user_sdma_num_pages(const struct iovec *iov) +static size_t qib_user_sdma_num_pages(const struct iovec *iov) { const unsigned long addr = (unsigned long) iov->iov_base; const unsigned long len = iov->iov_len; @@ -658,7 +658,7 @@ static void qib_user_sdma_free_pkt_frag(struct device *dev, static int qib_user_sdma_pin_pages(const struct qib_devdata *dd, struct qib_user_sdma_queue *pq, struct qib_user_sdma_pkt *pkt, - unsigned long addr, int tlen, int npages) + unsigned long addr, int tlen, size_t npages) { struct page *pages[8]; int i, j; @@ -722,7 +722,7 @@ static int qib_user_sdma_pin_pkt(const struct qib_devdata *dd, unsigned long idx; for (idx = 0; idx < niov; idx++) { - const int npages = qib_user_sdma_num_pages(iov + idx); + const size_t npages = qib_user_sdma_num_pages(iov + idx); const unsigned long addr = (unsigned long) iov[idx].iov_base; ret = qib_user_sdma_pin_pages(dd, pq, pkt, addr, @@ -824,8 +824,8 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, unsigned pktnw; unsigned pktnwc; int nfrags = 0; - int npages = 0; - int bytes_togo = 0; + size_t npages = 0; + size_t bytes_togo = 0; int tiddma = 0; int cfur; @@ -885,7 +885,11 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, npages += qib_user_sdma_num_pages(&iov[idx]); - bytes_togo += slen; + if (check_add_overflow(bytes_togo, slen, &bytes_togo) || + bytes_togo > type_max(typeof(pkt->bytes_togo))) { + ret = -EINVAL; + goto free_pbc; + } pktnwc += slen >> 2; idx++; nfrags++; @@ -904,8 +908,7 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, } if (frag_size) { - int tidsmsize, n; - size_t pktsize; + size_t tidsmsize, n, pktsize, sz, addrlimit; n = npages*((2*PAGE_SIZE/frag_size)+1); pktsize = struct_size(pkt, addr, n); @@ -923,14 +926,24 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, else tidsmsize = 0; - pkt = kmalloc(pktsize+tidsmsize, GFP_KERNEL); + if (check_add_overflow(pktsize, tidsmsize, &sz)) { + ret = -EINVAL; + goto free_pbc; + } + pkt = kmalloc(sz, GFP_KERNEL); if (!pkt) { ret = -ENOMEM; goto free_pbc; } pkt->largepkt = 1; pkt->frag_size = frag_size; - pkt->addrlimit = n + ARRAY_SIZE(pkt->addr); + if (check_add_overflow(n, ARRAY_SIZE(pkt->addr), + &addrlimit) || + addrlimit > type_max(typeof(pkt->addrlimit))) { + ret = -EINVAL; + goto free_pbc; + } + pkt->addrlimit = addrlimit; if (tiddma) { char *tidsm = (char *)pkt + pktsize; -- cgit v1.2.3 From 13bac861952a78664907a0f927d3e874e9a59034 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 13 Oct 2021 10:18:52 -0400 Subject: IB/hfi1: Fix abba locking issue with sc_disable() sc_disable() after having disabled the send context wakes up any waiters by calling hfi1_qp_wakeup() while holding the waitlock for the sc. This is contrary to the model for all other calls to hfi1_qp_wakeup() where the waitlock is dropped and a local is used to drive calls to hfi1_qp_wakeup(). Fix by moving the sc->piowait into a local list and driving the wakeup calls from the list. Fixes: 099a884ba4c0 ("IB/hfi1: Handle wakeup of orphaned QPs for pio") Link: https://lore.kernel.org/r/20211013141852.128104.2682.stgit@awfm-01.cornelisnetworks.com Signed-off-by: Mike Marciniszyn Reported-by: TOTE Robot Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/pio.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 489b436f19bb..3d42bd2b36bd 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -878,6 +878,7 @@ void sc_disable(struct send_context *sc) { u64 reg; struct pio_buf *pbuf; + LIST_HEAD(wake_list); if (!sc) return; @@ -912,19 +913,21 @@ void sc_disable(struct send_context *sc) spin_unlock(&sc->release_lock); write_seqlock(&sc->waitlock); - while (!list_empty(&sc->piowait)) { + if (!list_empty(&sc->piowait)) + list_move(&sc->piowait, &wake_list); + write_sequnlock(&sc->waitlock); + while (!list_empty(&wake_list)) { struct iowait *wait; struct rvt_qp *qp; struct hfi1_qp_priv *priv; - wait = list_first_entry(&sc->piowait, struct iowait, list); + wait = list_first_entry(&wake_list, struct iowait, list); qp = iowait_to_qp(wait); priv = qp->priv; list_del_init(&priv->s_iowait.list); priv->s_iowait.lock = NULL; hfi1_qp_wakeup(qp, RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN); } - write_sequnlock(&sc->waitlock); spin_unlock_irq(&sc->alloc_lock); } -- cgit v1.2.3 From 663991f32857b3b63c94c97de9dbb0ec8600144f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 13 Oct 2021 11:06:45 +0300 Subject: RDMA/rdmavt: Fix error code in rvt_create_qp() Return negative -ENOMEM instead of positive ENOMEM. Returning a postive value will cause an Oops because it becomes an ERR_PTR() in the create_qp() function. Fixes: 514aee660df4 ("RDMA: Globally allocate and release QP memory") Link: https://lore.kernel.org/r/20211013080645.GD6010@kili Signed-off-by: Dan Carpenter Acked-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rdmavt/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 49bdd78ac664..3305f2744bfa 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1223,7 +1223,7 @@ int rvt_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr, spin_lock(&rdi->n_qps_lock); if (rdi->n_qps_allocated == rdi->dparms.props.max_qp) { spin_unlock(&rdi->n_qps_lock); - ret = ENOMEM; + ret = -ENOMEM; goto bail_ip; } -- cgit v1.2.3 From 6ab4e2eb5e956a61e4d53cea3ab8c866ba79a830 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 13 Oct 2021 23:17:18 +0300 Subject: mmc: sdhci-pci: Read card detect from ACPI for Intel Merrifield Intel Merrifield platform had been converted to use ACPI enumeration. However, the driver missed an update to retrieve card detect GPIO. Fix it here. Unfortunately we can't rely on CD GPIO state because there are two different PCB designs in the wild that are using the opposite card detection sense and there is no way to distinguish those platforms, that's why ignore CD GPIO completely and use it only as an event. Fixes: 4590d98f5a4f ("sfi: Remove framework for deprecated firmware") BugLink: https://github.com/edison-fw/meta-intel-edison/issues/135 Signed-off-by: Andy Shevchenko Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211013201723.52212-2-andriy.shevchenko@linux.intel.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-core.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index be19785227fe..d0f2edfe296c 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -616,16 +616,12 @@ static int intel_select_drive_strength(struct mmc_card *card, return intel_host->drv_strength; } -static int bxt_get_cd(struct mmc_host *mmc) +static int sdhci_get_cd_nogpio(struct mmc_host *mmc) { - int gpio_cd = mmc_gpio_get_cd(mmc); struct sdhci_host *host = mmc_priv(mmc); unsigned long flags; int ret = 0; - if (!gpio_cd) - return 0; - spin_lock_irqsave(&host->lock, flags); if (host->flags & SDHCI_DEVICE_DEAD) @@ -638,6 +634,21 @@ out: return ret; } +static int bxt_get_cd(struct mmc_host *mmc) +{ + int gpio_cd = mmc_gpio_get_cd(mmc); + + if (!gpio_cd) + return 0; + + return sdhci_get_cd_nogpio(mmc); +} + +static int mrfld_get_cd(struct mmc_host *mmc) +{ + return sdhci_get_cd_nogpio(mmc); +} + #define SDHCI_INTEL_PWR_TIMEOUT_CNT 20 #define SDHCI_INTEL_PWR_TIMEOUT_UDELAY 100 @@ -1341,6 +1352,14 @@ static int intel_mrfld_mmc_probe_slot(struct sdhci_pci_slot *slot) MMC_CAP_1_8V_DDR; break; case INTEL_MRFLD_SD: + slot->cd_idx = 0; + slot->cd_override_level = true; + /* + * There are two PCB designs of SD card slot with the opposite + * card detection sense. Quirk this out by ignoring GPIO state + * completely in the custom ->get_cd() callback. + */ + slot->host->mmc_host_ops.get_cd = mrfld_get_cd; slot->host->quirks2 |= SDHCI_QUIRK2_NO_1_8_V; break; case INTEL_MRFLD_SDIO: -- cgit v1.2.3 From 82a4f329b133ad0de66bee12c0be5c67bb8aa188 Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Fri, 15 Oct 2021 14:48:36 +0200 Subject: arm64: dts: imx8mm-kontron: Make sure SOC and DRAM supply voltages are correct It looks like the voltages for the SOC and DRAM supply weren't properly validated before. The datasheet and uboot-imx code tells us that VDD_SOC should be 800 mV in suspend and 850 mV in run mode. VDD_DRAM should be 950 mV for DDR clock frequencies of up to 1.5 GHz. Let's fix these values to make sure the voltages are within the required range. Fixes: 8668d8b2e67f ("arm64: dts: Add the Kontron i.MX8M Mini SoMs and baseboards") Cc: stable@vger.kernel.org Signed-off-by: Frieder Schrempf Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi index 9db9b90bf2bc..7cf60c17c29b 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi @@ -91,10 +91,12 @@ reg_vdd_soc: BUCK1 { regulator-name = "buck1"; regulator-min-microvolt = <800000>; - regulator-max-microvolt = <900000>; + regulator-max-microvolt = <850000>; regulator-boot-on; regulator-always-on; regulator-ramp-delay = <3125>; + nxp,dvs-run-voltage = <850000>; + nxp,dvs-standby-voltage = <800000>; }; reg_vdd_arm: BUCK2 { @@ -111,7 +113,7 @@ reg_vdd_dram: BUCK3 { regulator-name = "buck3"; regulator-min-microvolt = <850000>; - regulator-max-microvolt = <900000>; + regulator-max-microvolt = <950000>; regulator-boot-on; regulator-always-on; }; -- cgit v1.2.3 From 256a24eba7f897c817fb0103dac73467d3789202 Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Fri, 15 Oct 2021 14:48:37 +0200 Subject: arm64: dts: imx8mm-kontron: Set lower limit of VDD_SNVS to 800 mV According to the datasheet the typical value for VDD_SNVS should be 800 mV, so let's make sure that this is within the range of the regulator. Fixes: 8668d8b2e67f ("arm64: dts: Add the Kontron i.MX8M Mini SoMs and baseboards") Cc: stable@vger.kernel.org Signed-off-by: Frieder Schrempf Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi index 7cf60c17c29b..42bbbb3f532b 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi @@ -152,7 +152,7 @@ reg_vdd_snvs: LDO2 { regulator-name = "ldo2"; - regulator-min-microvolt = <850000>; + regulator-min-microvolt = <800000>; regulator-max-microvolt = <900000>; regulator-boot-on; regulator-always-on; -- cgit v1.2.3 From 6562d6e350284307e33ea10c7f46a6661ff22770 Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Fri, 15 Oct 2021 14:48:38 +0200 Subject: arm64: dts: imx8mm-kontron: Fix polarity of reg_rst_eth2 The regulator reg_rst_eth2 should keep the reset signal of the USB ethernet adapter deasserted anytime. Fix the polarity and mark it as always-on. Anyway, using the regulator is only a workaround for the missing support of specifying a reset GPIO for USB devices in a generic way. As we don't have a solution for this at the moment, at least fix the current workaround. Fixes: 8668d8b2e67f ("arm64: dts: Add the Kontron i.MX8M Mini SoMs and baseboards") Cc: stable@vger.kernel.org Signed-off-by: Frieder Schrempf Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts index d17abb515835..3912ac2446d7 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts @@ -70,7 +70,9 @@ regulator-name = "rst-usb-eth2"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb_eth2>; - gpio = <&gpio3 2 GPIO_ACTIVE_LOW>; + gpio = <&gpio3 2 GPIO_ACTIVE_HIGH>; + enable-active-high; + regulator-always-on; }; reg_vdd_5v: regulator-5v { -- cgit v1.2.3 From ca6f9d85d5944046a241b325700c1ca395651c28 Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Fri, 15 Oct 2021 14:48:39 +0200 Subject: arm64: dts: imx8mm-kontron: Fix CAN SPI clock frequency The MCP2515 can be used with an SPI clock of up to 10 MHz. Set the limit accordingly to prevent any performance issues caused by the really low clock speed of 100 kHz. This removes the arbitrarily low limit on the SPI frequency, that was caused by a typo in the original dts. Without this change, receiving CAN messages on the board beyond a certain bitrate will cause overrun errors (see 'ip -det -stat link show can0'). With this fix, receiving messages on the bus works without any overrun errors for bitrates up to 1 MBit. Fixes: 8668d8b2e67f ("arm64: dts: Add the Kontron i.MX8M Mini SoMs and baseboards") Cc: stable@vger.kernel.org Signed-off-by: Frieder Schrempf Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts index 3912ac2446d7..a52cdfb0920e 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts @@ -97,7 +97,7 @@ clocks = <&osc_can>; interrupt-parent = <&gpio4>; interrupts = <28 IRQ_TYPE_EDGE_FALLING>; - spi-max-frequency = <100000>; + spi-max-frequency = <10000000>; vdd-supply = <®_vdd_3v3>; xceiver-supply = <®_vdd_5v>; }; -- cgit v1.2.3 From 0b28c41e3c951ea3d4f012cfa9da5ebd6512cf6e Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Fri, 15 Oct 2021 14:48:40 +0200 Subject: arm64: dts: imx8mm-kontron: Fix connection type for VSC8531 RGMII PHY Previously we falsely relied on the PHY driver to unconditionally enable the internal RX delay. Since the following fix for the PHY driver this is not the case anymore: commit 7b005a1742be ("net: phy: mscc: configure both RX and TX internal delays for RGMII") In order to enable the delay we need to set the connection type to "rgmii-rxid". Without the RX delay the ethernet is not functional at all. Fixes: 8668d8b2e67f ("arm64: dts: Add the Kontron i.MX8M Mini SoMs and baseboards") Cc: stable@vger.kernel.org Signed-off-by: Frieder Schrempf Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts index a52cdfb0920e..e99e7644ff39 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts @@ -113,7 +113,7 @@ &fec1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-rxid"; phy-handle = <ðphy>; status = "okay"; -- cgit v1.2.3 From 675c496d0f92b481ebe4abf4fb06eadad7789de6 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 16 Oct 2021 12:50:21 +0200 Subject: clk: composite: Also consider .determine_rate for rate + mux composites Commit 69a00fb3d69706 ("clk: divider: Implement and wire up .determine_rate by default") switches clk_divider_ops to implement .determine_rate by default. This breaks composite clocks with multiple parents because clk-composite.c does not use the special handling for mux + divider combinations anymore (that was restricted to rate clocks which only implement .round_rate, but not .determine_rate). Alex reports: This breaks lot of clocks for Rockchip which intensively uses composites, i.e. those clocks will always stay at the initial parent, which in some cases is the XTAL clock and I strongly guess it is the same for other platforms, which use composite clocks having more than one parent (e.g. mediatek, ti ...) Example (RK3399) clk_sdio is set (initialized) with XTAL (24 MHz) as parent in u-boot. It will always stay at this parent, even if the mmc driver sets a rate of 200 MHz (fails, as the nature of things), which should switch it to any of its possible parent PLLs defined in mux_pll_src_cpll_gpll_npll_ppll_upll_24m_p (see clk-rk3399.c) - which never happens. Restore the original behavior by changing the priority of the conditions inside clk-composite.c. Now the special rate + mux case (with rate_ops having a .round_rate - which is still the case for the default clk_divider_ops) is preferred over rate_ops which have .determine_rate defined (and not further considering the mux). Fixes: 69a00fb3d69706 ("clk: divider: Implement and wire up .determine_rate by default") Reported-by: Alex Bee Signed-off-by: Martin Blumenstingl Link: https://lore.kernel.org/r/20211016105022.303413-2-martin.blumenstingl@googlemail.com Tested-by: Alex Bee Tested-by: Chen-Yu Tsai Signed-off-by: Stephen Boyd --- drivers/clk/clk-composite.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/clk/clk-composite.c b/drivers/clk/clk-composite.c index 0506046a5f4b..510a9965633b 100644 --- a/drivers/clk/clk-composite.c +++ b/drivers/clk/clk-composite.c @@ -58,11 +58,8 @@ static int clk_composite_determine_rate(struct clk_hw *hw, long rate; int i; - if (rate_hw && rate_ops && rate_ops->determine_rate) { - __clk_hw_set_clk(rate_hw, hw); - return rate_ops->determine_rate(rate_hw, req); - } else if (rate_hw && rate_ops && rate_ops->round_rate && - mux_hw && mux_ops && mux_ops->set_parent) { + if (rate_hw && rate_ops && rate_ops->round_rate && + mux_hw && mux_ops && mux_ops->set_parent) { req->best_parent_hw = NULL; if (clk_hw_get_flags(hw) & CLK_SET_RATE_NO_REPARENT) { @@ -107,6 +104,9 @@ static int clk_composite_determine_rate(struct clk_hw *hw, req->rate = best_rate; return 0; + } else if (rate_hw && rate_ops && rate_ops->determine_rate) { + __clk_hw_set_clk(rate_hw, hw); + return rate_ops->determine_rate(rate_hw, req); } else if (mux_hw && mux_ops && mux_ops->determine_rate) { __clk_hw_set_clk(mux_hw, hw); return mux_ops->determine_rate(mux_hw, req); -- cgit v1.2.3 From 59be177a909ac320e5f4b2a461ac09e20f35b2d8 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Tue, 12 Oct 2021 18:00:46 -0700 Subject: drm/i915: Remove memory frequency calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This memory frequency calculated is only used to check if it is zero, what is not useful as it will never actually be zero. Also the calculation is wrong, we should be checking other bit to select the appropriate frequency multiplier while this code is stuck with a fixed multiplier. So here dropping it as whole. v2: - Also remove memory frequency calculation for gen9 LP platforms Cc: Yakui Zhao Cc: Matt Roper Fixes: 5d0c938ec9cc ("drm/i915/gen11+: Only load DRAM information from pcode") Signed-off-by: José Roberto de Souza Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20211013010046.91858-1-jose.souza@intel.com (cherry picked from commit 83f52364b15265aec47d07e02b0fbf4093ab8554) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_reg.h | 8 -------- drivers/gpu/drm/i915/intel_dram.c | 30 ++---------------------------- 2 files changed, 2 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 4037030f0984..9023d4ecf3b3 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -11048,12 +11048,6 @@ enum skl_power_gate { #define DC_STATE_DEBUG_MASK_CORES (1 << 0) #define DC_STATE_DEBUG_MASK_MEMORY_UP (1 << 1) -#define BXT_P_CR_MC_BIOS_REQ_0_0_0 _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x7114) -#define BXT_REQ_DATA_MASK 0x3F -#define BXT_DRAM_CHANNEL_ACTIVE_SHIFT 12 -#define BXT_DRAM_CHANNEL_ACTIVE_MASK (0xF << 12) -#define BXT_MEMORY_FREQ_MULTIPLIER_HZ 133333333 - #define BXT_D_CR_DRP0_DUNIT8 0x1000 #define BXT_D_CR_DRP0_DUNIT9 0x1200 #define BXT_D_CR_DRP0_DUNIT_START 8 @@ -11084,9 +11078,7 @@ enum skl_power_gate { #define BXT_DRAM_TYPE_LPDDR4 (0x2 << 22) #define BXT_DRAM_TYPE_DDR4 (0x4 << 22) -#define SKL_MEMORY_FREQ_MULTIPLIER_HZ 266666666 #define SKL_MC_BIOS_DATA_0_0_0_MCHBAR_PCU _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5E04) -#define SKL_REQ_DATA_MASK (0xF << 0) #define DG1_GEAR_TYPE REG_BIT(16) #define SKL_MAD_INTER_CHANNEL_0_0_0_MCHBAR_MCMAIN _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5000) diff --git a/drivers/gpu/drm/i915/intel_dram.c b/drivers/gpu/drm/i915/intel_dram.c index 91866520c173..7acce64b0941 100644 --- a/drivers/gpu/drm/i915/intel_dram.c +++ b/drivers/gpu/drm/i915/intel_dram.c @@ -244,7 +244,6 @@ static int skl_get_dram_info(struct drm_i915_private *i915) { struct dram_info *dram_info = &i915->dram_info; - u32 mem_freq_khz, val; int ret; dram_info->type = skl_get_dram_type(i915); @@ -255,17 +254,6 @@ skl_get_dram_info(struct drm_i915_private *i915) if (ret) return ret; - val = intel_uncore_read(&i915->uncore, - SKL_MC_BIOS_DATA_0_0_0_MCHBAR_PCU); - mem_freq_khz = DIV_ROUND_UP((val & SKL_REQ_DATA_MASK) * - SKL_MEMORY_FREQ_MULTIPLIER_HZ, 1000); - - if (dram_info->num_channels * mem_freq_khz == 0) { - drm_info(&i915->drm, - "Couldn't get system memory bandwidth\n"); - return -EINVAL; - } - return 0; } @@ -350,24 +338,10 @@ static void bxt_get_dimm_info(struct dram_dimm_info *dimm, u32 val) static int bxt_get_dram_info(struct drm_i915_private *i915) { struct dram_info *dram_info = &i915->dram_info; - u32 dram_channels; - u32 mem_freq_khz, val; - u8 num_active_channels, valid_ranks = 0; + u32 val; + u8 valid_ranks = 0; int i; - val = intel_uncore_read(&i915->uncore, BXT_P_CR_MC_BIOS_REQ_0_0_0); - mem_freq_khz = DIV_ROUND_UP((val & BXT_REQ_DATA_MASK) * - BXT_MEMORY_FREQ_MULTIPLIER_HZ, 1000); - - dram_channels = val & BXT_DRAM_CHANNEL_ACTIVE_MASK; - num_active_channels = hweight32(dram_channels); - - if (mem_freq_khz * num_active_channels == 0) { - drm_info(&i915->drm, - "Couldn't get system memory bandwidth\n"); - return -EINVAL; - } - /* * Now read each DUNIT8/9/10/11 to check the rank of each dimms. */ -- cgit v1.2.3 From 9af372dc70e9fdcbb70939dac75365e7b88580b4 Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Fri, 15 Oct 2021 10:00:36 +0800 Subject: mmc: sdhci-esdhc-imx: clear the buffer_read_ready to reset standard tuning circuit To reset standard tuning circuit completely, after clear ESDHC_MIX_CTRL_EXE_TUNE, also need to clear bit buffer_read_ready, this operation will finally clear the USDHC IP internal logic flag execute_tuning_with_clr_buf, make sure the following normal data transfer will not be impacted by standard tuning logic used before. Find this issue when do quick SD card insert/remove stress test. During standard tuning prodedure, if remove SD card, USDHC standard tuning logic can't clear the internal flag execute_tuning_with_clr_buf. Next time when insert SD card, all data related commands can't get any data related interrupts, include data transfer complete interrupt, data timeout interrupt, data CRC interrupt, data end bit interrupt. Always trigger software timeout issue. Even reset the USDHC through bits in register SYS_CTRL (0x2C, bit28 reset tuning, bit26 reset data, bit 25 reset command, bit 24 reset all) can't recover this. From the user's point of view, USDHC stuck, SD can't be recognized any more. Fixes: d9370424c948 ("mmc: sdhci-esdhc-imx: reset tuning circuit when power on mmc card") Signed-off-by: Haibo Chen Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1634263236-6111-1-git-send-email-haibo.chen@nxp.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-esdhc-imx.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index f18d169bc8ff..e658f0174242 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -1187,6 +1187,7 @@ static void esdhc_reset_tuning(struct sdhci_host *host) struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct pltfm_imx_data *imx_data = sdhci_pltfm_priv(pltfm_host); u32 ctrl; + int ret; /* Reset the tuning circuit */ if (esdhc_is_usdhc(imx_data)) { @@ -1199,7 +1200,22 @@ static void esdhc_reset_tuning(struct sdhci_host *host) } else if (imx_data->socdata->flags & ESDHC_FLAG_STD_TUNING) { ctrl = readl(host->ioaddr + SDHCI_AUTO_CMD_STATUS); ctrl &= ~ESDHC_MIX_CTRL_SMPCLK_SEL; + ctrl &= ~ESDHC_MIX_CTRL_EXE_TUNE; writel(ctrl, host->ioaddr + SDHCI_AUTO_CMD_STATUS); + /* Make sure ESDHC_MIX_CTRL_EXE_TUNE cleared */ + ret = readl_poll_timeout(host->ioaddr + SDHCI_AUTO_CMD_STATUS, + ctrl, !(ctrl & ESDHC_MIX_CTRL_EXE_TUNE), 1, 50); + if (ret == -ETIMEDOUT) + dev_warn(mmc_dev(host->mmc), + "Warning! clear execute tuning bit failed\n"); + /* + * SDHCI_INT_DATA_AVAIL is W1C bit, set this bit will clear the + * usdhc IP internal logic flag execute_tuning_with_clr_buf, which + * will finally make sure the normal data transfer logic correct. + */ + ctrl = readl(host->ioaddr + SDHCI_INT_STATUS); + ctrl |= SDHCI_INT_DATA_AVAIL; + writel(ctrl, host->ioaddr + SDHCI_INT_STATUS); } } } -- cgit v1.2.3 From 162079f2dccd02cb4b6654defd32ca387dd6d4d4 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 17 Oct 2021 10:59:49 -0700 Subject: mmc: winbond: don't build on M68K The Winbond MMC driver fails to build on ARCH=m68k so prevent that build config. Silences these build errors: ../drivers/mmc/host/wbsd.c: In function 'wbsd_request_end': ../drivers/mmc/host/wbsd.c:212:28: error: implicit declaration of function 'claim_dma_lock' [-Werror=implicit-function-declaration] 212 | dmaflags = claim_dma_lock(); ../drivers/mmc/host/wbsd.c:215:17: error: implicit declaration of function 'release_dma_lock'; did you mean 'release_task'? [-Werror=implicit-function-declaration] 215 | release_dma_lock(dmaflags); Signed-off-by: Randy Dunlap Cc: Pierre Ossman Cc: Geert Uytterhoeven Cc: Arnd Bergmann Link: https://lore.kernel.org/r/20211017175949.23838-1-rdunlap@infradead.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 95b3511b0560..ccc148cdb5ee 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -506,7 +506,7 @@ config MMC_OMAP_HS config MMC_WBSD tristate "Winbond W83L51xD SD/MMC Card Interface support" - depends on ISA_DMA_API + depends on ISA_DMA_API && !M68K help This selects the Winbond(R) W83L51xD Secure digital and Multimedia card Interface. -- cgit v1.2.3 From 60fab1076636493cfa2686e712446595fe43bc16 Mon Sep 17 00:00:00 2001 From: Prabhakar Kushwaha Date: Tue, 19 Oct 2021 11:22:12 +0300 Subject: rdma/qedr: Fix crash due to redundant release of device's qp memory Device's QP memory should only be allocated and released by IB layer. This patch removes the redundant release of the device's qp memory and uses completion APIs to make sure that .destroy_qp() only return, when qp reference becomes 0. Fixes: 514aee660df4 ("RDMA: Globally allocate and release QP memory") Link: https://lore.kernel.org/r/20211019082212.7052-1-pkushwaha@marvell.com Acked-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: Shai Malin Signed-off-by: Alok Prasad Signed-off-by: Prabhakar Kushwaha Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/qedr.h | 1 + drivers/infiniband/hw/qedr/qedr_iw_cm.c | 2 +- drivers/infiniband/hw/qedr/verbs.c | 5 ++++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 3cb4febaad0f..8def88cfa300 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -455,6 +455,7 @@ struct qedr_qp { /* synchronization objects used with iwarp ep */ struct kref refcnt; struct completion iwarp_cm_comp; + struct completion qp_rel_comp; unsigned long iwarp_cm_flags; /* enum iwarp_cm_flags */ }; diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index 1715fbe0719d..a51fc6854984 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -83,7 +83,7 @@ static void qedr_iw_free_qp(struct kref *ref) { struct qedr_qp *qp = container_of(ref, struct qedr_qp, refcnt); - kfree(qp); + complete(&qp->qp_rel_comp); } static void diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 3fbf172dbbef..dcb3653db72d 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1357,6 +1357,7 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev, if (rdma_protocol_iwarp(&dev->ibdev, 1)) { kref_init(&qp->refcnt); init_completion(&qp->iwarp_cm_comp); + init_completion(&qp->qp_rel_comp); } qp->pd = pd; @@ -2857,8 +2858,10 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) qedr_free_qp_resources(dev, qp, udata); - if (rdma_protocol_iwarp(&dev->ibdev, 1)) + if (rdma_protocol_iwarp(&dev->ibdev, 1)) { qedr_iw_qp_rem_ref(&qp->ibqp); + wait_for_completion(&qp->qp_rel_comp); + } return 0; } -- cgit v1.2.3 From 5508546631a0f555d7088203dec2614e41b5106e Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Tue, 19 Oct 2021 14:30:10 +0300 Subject: RDMA/mlx5: Initialize the ODP xarray when creating an ODP MR Normally the zero fill would hide the missing initialization, but an errant set to desc_size in reg_create() causes a crash: BUG: unable to handle page fault for address: 0000000800000000 PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI CPU: 5 PID: 890 Comm: ib_write_bw Not tainted 5.15.0-rc4+ #47 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:mlx5_ib_dereg_mr+0x14/0x3b0 [mlx5_ib] Code: 48 63 cd 4c 89 f7 48 89 0c 24 e8 37 30 03 e1 48 8b 0c 24 eb a0 90 0f 1f 44 00 00 41 56 41 55 41 54 55 53 48 89 fb 48 83 ec 30 <48> 8b 2f 65 48 8b 04 25 28 00 00 00 48 89 44 24 28 31 c0 8b 87 c8 RSP: 0018:ffff88811afa3a60 EFLAGS: 00010286 RAX: 000000000000001c RBX: 0000000800000000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000800000000 RBP: 0000000800000000 R08: 0000000000000000 R09: c0000000fffff7ff R10: ffff88811afa38f8 R11: ffff88811afa38f0 R12: ffffffffa02c7ac0 R13: 0000000000000000 R14: ffff88811afa3cd8 R15: ffff88810772fa00 FS: 00007f47b9080740(0000) GS:ffff88852cd40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000800000000 CR3: 000000010761e003 CR4: 0000000000370ea0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: mlx5_ib_free_odp_mr+0x95/0xc0 [mlx5_ib] mlx5_ib_dereg_mr+0x128/0x3b0 [mlx5_ib] ib_dereg_mr_user+0x45/0xb0 [ib_core] ? xas_load+0x8/0x80 destroy_hw_idr_uobject+0x1a/0x50 [ib_uverbs] uverbs_destroy_uobject+0x2f/0x150 [ib_uverbs] uobj_destroy+0x3c/0x70 [ib_uverbs] ib_uverbs_cmd_verbs+0x467/0xb00 [ib_uverbs] ? uverbs_finalize_object+0x60/0x60 [ib_uverbs] ? ttwu_queue_wakelist+0xa9/0xe0 ? pty_write+0x85/0x90 ? file_tty_write.isra.33+0x214/0x330 ? process_echoes+0x60/0x60 ib_uverbs_ioctl+0xa7/0x110 [ib_uverbs] __x64_sys_ioctl+0x10d/0x8e0 ? vfs_write+0x17f/0x260 do_syscall_64+0x3c/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae Add the missing xarray initialization and remove the desc_size set. Fixes: a639e66703ee ("RDMA/mlx5: Zero out ODP related items in the mlx5_ib_mr") Link: https://lore.kernel.org/r/a4846a11c9de834663e521770da895007f9f0d30.1634642730.git.leonro@nvidia.com Signed-off-by: Aharon Landau Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 3be36ebbf67a..22e2f4d79743 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1339,7 +1339,6 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, goto err_2; } mr->mmkey.type = MLX5_MKEY_MR; - mr->desc_size = sizeof(struct mlx5_mtt); mr->umem = umem; set_mr_fields(dev, mr, umem->length, access_flags); kvfree(in); @@ -1533,6 +1532,7 @@ static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length, ib_umem_release(&odp->umem); return ERR_CAST(mr); } + xa_init(&mr->implicit_children); odp->private = mr; err = mlx5r_store_odp_mkey(dev, &mr->mmkey); -- cgit v1.2.3 From cc07b73ef11d11d4359fb104d0199b22451dd3d8 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Tue, 19 Oct 2021 10:16:53 -0500 Subject: RDMA/irdma: Set VLAN in UD work completion correctly Currently VLAN is reported in UD work completion when VLAN id is zero, i.e. no VLAN case. Report VLAN in UD work completion only when VLAN id is non-zero. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Link: https://lore.kernel.org/r/20211019151654.1943-1-shiraz.saleem@intel.com Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/verbs.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 7110ebf834f9..102dc9342f2a 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -3399,9 +3399,13 @@ static void irdma_process_cqe(struct ib_wc *entry, } if (cq_poll_info->ud_vlan_valid) { - entry->vlan_id = cq_poll_info->ud_vlan & VLAN_VID_MASK; - entry->wc_flags |= IB_WC_WITH_VLAN; + u16 vlan = cq_poll_info->ud_vlan & VLAN_VID_MASK; + entry->sl = cq_poll_info->ud_vlan >> VLAN_PRIO_SHIFT; + if (vlan) { + entry->vlan_id = vlan; + entry->wc_flags |= IB_WC_WITH_VLAN; + } } else { entry->sl = 0; } -- cgit v1.2.3 From 2dace185caa580720c7cd67fec9efc5ee26108ac Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Tue, 19 Oct 2021 10:16:54 -0500 Subject: RDMA/irdma: Do not hold qos mutex twice on QP resume When irdma_ws_add fails, irdma_ws_remove is used to cleanup the leaf node. This lead to holding the qos mutex twice in the QP resume path. Fix this by avoiding the call to irdma_ws_remove and unwinding the error in irdma_ws_add. This skips the call to irdma_tc_in_use function which is not needed in the error unwind cases. Fixes: 3ae331c75128 ("RDMA/irdma: Add QoS definitions") Link: https://lore.kernel.org/r/20211019151654.1943-2-shiraz.saleem@intel.com Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/ws.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/irdma/ws.c b/drivers/infiniband/hw/irdma/ws.c index b68c575eb78e..b0d6ee0739f5 100644 --- a/drivers/infiniband/hw/irdma/ws.c +++ b/drivers/infiniband/hw/irdma/ws.c @@ -330,8 +330,10 @@ enum irdma_status_code irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri) tc_node->enable = true; ret = irdma_ws_cqp_cmd(vsi, tc_node, IRDMA_OP_WS_MODIFY_NODE); - if (ret) + if (ret) { + vsi->unregister_qset(vsi, tc_node); goto reg_err; + } } ibdev_dbg(to_ibdev(vsi->dev), "WS: Using node %d which represents VSI %d TC %d\n", @@ -350,6 +352,10 @@ enum irdma_status_code irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri) } goto exit; +reg_err: + irdma_ws_cqp_cmd(vsi, tc_node, IRDMA_OP_WS_DELETE_NODE); + list_del(&tc_node->siblings); + irdma_free_node(vsi, tc_node); leaf_add_err: if (list_empty(&vsi_node->child_list_head)) { if (irdma_ws_cqp_cmd(vsi, vsi_node, IRDMA_OP_WS_DELETE_NODE)) @@ -369,11 +375,6 @@ vsi_add_err: exit: mutex_unlock(&vsi->dev->ws_mutex); return ret; - -reg_err: - mutex_unlock(&vsi->dev->ws_mutex); - irdma_ws_remove(vsi, user_pri); - return ret; } /** -- cgit v1.2.3 From f09f6dfef8ce7b70a240cf83811e2b1909c3e47b Mon Sep 17 00:00:00 2001 From: Russ Weight Date: Mon, 18 Oct 2021 17:24:01 -0700 Subject: spi: altera: Change to dynamic allocation of spi id The spi-altera driver has two flavors: platform and dfl. I'm seeing a case where I have both device types in the same machine, and they are conflicting on the SPI ID: ... kernel: couldn't get idr ... kernel: WARNING: CPU: 28 PID: 912 at drivers/spi/spi.c:2920 spi_register_controller.cold+0x84/0xc0a Both the platform and dfl drivers use the parent's driver ID as the SPI ID. In the error case, the parent devices are dfl_dev.4 and subdev_spi_altera.4.auto. When the second spi-master is created, the failure occurs because the SPI ID of 4 has already been allocated. Change the ID allocation to dynamic (by initializing bus_num to -1) to avoid duplicate SPI IDs. Signed-off-by: Russ Weight Link: https://lore.kernel.org/r/20211019002401.24041-1-russell.h.weight@intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-altera-dfl.c | 2 +- drivers/spi/spi-altera-platform.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-altera-dfl.c b/drivers/spi/spi-altera-dfl.c index 44fc9ee13fc7..ca40923258af 100644 --- a/drivers/spi/spi-altera-dfl.c +++ b/drivers/spi/spi-altera-dfl.c @@ -134,7 +134,7 @@ static int dfl_spi_altera_probe(struct dfl_device *dfl_dev) if (!master) return -ENOMEM; - master->bus_num = dfl_dev->id; + master->bus_num = -1; hw = spi_master_get_devdata(master); diff --git a/drivers/spi/spi-altera-platform.c b/drivers/spi/spi-altera-platform.c index f7a7c14e3679..65147aae82a1 100644 --- a/drivers/spi/spi-altera-platform.c +++ b/drivers/spi/spi-altera-platform.c @@ -48,7 +48,7 @@ static int altera_spi_probe(struct platform_device *pdev) return err; /* setup the master state. */ - master->bus_num = pdev->id; + master->bus_num = -1; if (pdata) { if (pdata->num_chipselect > ALTERA_SPI_MAX_CS) { -- cgit v1.2.3 From 9b57e9d5010bbed7c0d9d445085840f7025e6f9a Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Tue, 19 Oct 2021 19:53:59 +0200 Subject: KVM: s390: clear kicked_mask before sleeping again The idea behind kicked mask is that we should not re-kick a vcpu that is already in the "kick" process, i.e. that was kicked and is is about to be dispatched if certain conditions are met. The problem with the current implementation is, that it assumes the kicked vcpu is going to enter SIE shortly. But under certain circumstances, the vcpu we just kicked will be deemed non-runnable and will remain in wait state. This can happen, if the interrupt(s) this vcpu got kicked to deal with got already cleared (because the interrupts got delivered to another vcpu). In this case kvm_arch_vcpu_runnable() would return false, and the vcpu would remain in kvm_vcpu_block(), but this time with its kicked_mask bit set. So next time around we wouldn't kick the vcpu form __airqs_kick_single_vcpu(), but would assume that we just kicked it. Let us make sure the kicked_mask is cleared before we give up on re-dispatching the vcpu. Fixes: 9f30f6216378 ("KVM: s390: add gib_alert_irq_handler()") Reported-by: Matthew Rosato Signed-off-by: Halil Pasic Reviewed-by: Christian Borntraeger Reviewed-by: Michael Mueller Reviewed-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20211019175401.3757927-2-pasic@linux.ibm.com Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 6a6dd5e1daf6..1c97493d21e1 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3363,6 +3363,7 @@ out_free_sie_block: int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { + clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask); return kvm_s390_vcpu_has_irq(vcpu, 0); } -- cgit v1.2.3 From 0e9ff65f455dfd0a8aea5e7843678ab6fe097e21 Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Tue, 19 Oct 2021 19:54:00 +0200 Subject: KVM: s390: preserve deliverable_mask in __airqs_kick_single_vcpu Changing the deliverable mask in __airqs_kick_single_vcpu() is a bug. If one idle vcpu can't take the interrupts we want to deliver, we should look for another vcpu that can, instead of saying that we don't want to deliver these interrupts by clearing the bits from the deliverable_mask. Fixes: 9f30f6216378 ("KVM: s390: add gib_alert_irq_handler()") Signed-off-by: Halil Pasic Reviewed-by: Christian Borntraeger Reviewed-by: Michael Mueller Reviewed-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20211019175401.3757927-3-pasic@linux.ibm.com Signed-off-by: Christian Borntraeger --- arch/s390/kvm/interrupt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 10722455fd02..2245f4b8d362 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -3053,13 +3053,14 @@ static void __airqs_kick_single_vcpu(struct kvm *kvm, u8 deliverable_mask) int vcpu_idx, online_vcpus = atomic_read(&kvm->online_vcpus); struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; struct kvm_vcpu *vcpu; + u8 vcpu_isc_mask; for_each_set_bit(vcpu_idx, kvm->arch.idle_mask, online_vcpus) { vcpu = kvm_get_vcpu(kvm, vcpu_idx); if (psw_ioint_disabled(vcpu)) continue; - deliverable_mask &= (u8)(vcpu->arch.sie_block->gcr[6] >> 24); - if (deliverable_mask) { + vcpu_isc_mask = (u8)(vcpu->arch.sie_block->gcr[6] >> 24); + if (deliverable_mask & vcpu_isc_mask) { /* lately kicked but not yet running */ if (test_and_set_bit(vcpu_idx, gi->kicked_mask)) return; -- cgit v1.2.3 From 5afa7898ab7a0ec9c28556a91df714bf3c2f725e Mon Sep 17 00:00:00 2001 From: Thelford Williams Date: Wed, 13 Oct 2021 16:04:13 -0400 Subject: drm/amdgpu: fix out of bounds write Size can be any value and is user controlled resulting in overwriting the 40 byte array wr_buf with an arbitrary length of data from buf. Signed-off-by: Thelford Williams Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 87daa78a32b8..17f2756a64dc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -263,7 +263,7 @@ static ssize_t dp_link_settings_write(struct file *f, const char __user *buf, if (!wr_buf) return -ENOSPC; - if (parse_write_buffer_into_params(wr_buf, size, + if (parse_write_buffer_into_params(wr_buf, wr_buf_size, (long *)param, buf, max_param_num, ¶m_nums)) { -- cgit v1.2.3 From c21b105380cf86e829c68586ca1315cfc253ad8c Mon Sep 17 00:00:00 2001 From: Nikola Cornij Date: Tue, 28 Sep 2021 22:43:52 -0400 Subject: drm/amd/display: Limit display scaling to up to true 4k for DCN 3.1 [why] The requirement is that image width up to 4096 shall be supported Reviewed-by: Aric Cyr Acked-by: Agustin Gutierrez Sanchez Signed-off-by: Nikola Cornij Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 0006bbac466c..1e3d50da1f93 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -928,7 +928,7 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_dcc = DCC_ENABLE, .vsr_support = true, .performance_trace = false, - .max_downscale_src_width = 3840,/*upto 4K*/ + .max_downscale_src_width = 4096,/*upto true 4K*/ .disable_pplib_wm_range = false, .scl_reset_length10 = true, .sanity_checks = false, -- cgit v1.2.3 From c938aed88f8259dc913b717a32319101c66e87a9 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 29 Sep 2021 11:37:33 -0400 Subject: drm/amd/display: Fix prefetch bandwidth calculation for DCN3.1 [Why] Prefetch BW calculated is lower than the DML reference because of a porting error that's excluding cursor and row bandwidth from the pixel data bandwidth. [How] Change the dml_max4 to dml_max3 and include cursor and row bandwidth in the same calculation as the rest of the pixel data during vactive. Reviewed-by: Dmytro Laktyushkin Acked-by: Agustin Gutierrez Sanchez Signed-off-by: Nicholas Kazlauskas Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c index ce55c9caf9a2..d58925cff420 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c @@ -5398,9 +5398,9 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->MaximumReadBandwidthWithPrefetch = v->MaximumReadBandwidthWithPrefetch - + dml_max4( - v->VActivePixelBandwidth[i][j][k], - v->VActiveCursorBandwidth[i][j][k] + + dml_max3( + v->VActivePixelBandwidth[i][j][k] + + v->VActiveCursorBandwidth[i][j][k] + v->NoOfDPP[i][j][k] * (v->meta_row_bandwidth[i][j][k] + v->dpte_row_bandwidth[i][j][k]), -- cgit v1.2.3 From 672437486ee9da3ed0e774937e6d0dd570921b39 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 29 Sep 2021 16:22:53 -0400 Subject: drm/amd/display: Require immediate flip support for DCN3.1 planes [Why] Immediate flip can be enabled dynamically and has higher BW requirements when validating which voltage mode to use. If we validate when it's not set then potentially DCFCLK will be too low and we will underflow. [How] DM always requires support so always require it as part of DML input parameters. This can't be enabled unconditionally on older ASIC because it blocks some expected modes so only target DCN3.1 for now. Reviewed-by: Dmytro Laktyushkin Acked-by: Agustin Gutierrez Sanchez Signed-off-by: Nicholas Kazlauskas Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 1e3d50da1f93..df6abe2b1f9e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -1590,6 +1590,13 @@ static int dcn31_populate_dml_pipes_from_context( pipe = &res_ctx->pipe_ctx[i]; timing = &pipe->stream->timing; + /* + * Immediate flip can be set dynamically after enabling the plane. + * We need to require support for immediate flip or underflow can be + * intermittently experienced depending on peak b/w requirements. + */ + pipes[pipe_cnt].pipe.src.immediate_flip = true; + pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; pipes[pipe_cnt].pipe.src.gpuvm = true; pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; -- cgit v1.2.3 From 4835ea6c173a8d8dfbfdbb21c4cd987d12681610 Mon Sep 17 00:00:00 2001 From: Eric Yang Date: Thu, 30 Sep 2021 13:46:45 -0400 Subject: drm/amd/display: increase Z9 latency to workaround underflow in Z9 [Why] Z9 latency is higher than when we originally tuned the watermark parameters, causing underflow. Increasing the value until the latency issues is resolved. Reviewed-by: Nicholas Kazlauskas Acked-by: Agustin Gutierrez Sanchez Signed-off-by: Eric Yang Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index df6abe2b1f9e..79e92ecca96c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -217,8 +217,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = { .num_states = 5, .sr_exit_time_us = 9.0, .sr_enter_plus_exit_time_us = 11.0, - .sr_exit_z8_time_us = 402.0, - .sr_enter_plus_exit_z8_time_us = 520.0, + .sr_exit_z8_time_us = 442.0, + .sr_enter_plus_exit_z8_time_us = 560.0, .writeback_latency_us = 12.0, .dram_channel_width_bytes = 4, .round_trip_ping_latency_dcfclk_cycles = 106, -- cgit v1.2.3 From dd8cb18906d97b2916fde42d32d915ae363c7e55 Mon Sep 17 00:00:00 2001 From: Nikola Cornij Date: Fri, 1 Oct 2021 13:26:05 -0400 Subject: drm/amd/display: Increase watermark latencies for DCN3.1 [why] The original latencies were causing underflow in some modes [how] Replace with the up-to-date watermark values based on new measurments Reviewed-by: Ahmad Othman Acked-by: Agustin Gutierrez Sanchez Signed-off-by: Nikola Cornij Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c index 4a4894e9d9c9..035ba0ef6369 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c @@ -366,32 +366,32 @@ static struct wm_table lpddr5_wm_table = { .wm_inst = WM_A, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 5.32, - .sr_enter_plus_exit_time_us = 6.38, + .sr_exit_time_us = 11.5, + .sr_enter_plus_exit_time_us = 14.5, .valid = true, }, { .wm_inst = WM_B, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.82, - .sr_enter_plus_exit_time_us = 11.196, + .sr_exit_time_us = 11.5, + .sr_enter_plus_exit_time_us = 14.5, .valid = true, }, { .wm_inst = WM_C, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.89, - .sr_enter_plus_exit_time_us = 11.24, + .sr_exit_time_us = 11.5, + .sr_enter_plus_exit_time_us = 14.5, .valid = true, }, { .wm_inst = WM_D, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.748, - .sr_enter_plus_exit_time_us = 11.102, + .sr_exit_time_us = 11.5, + .sr_enter_plus_exit_time_us = 14.5, .valid = true, }, } -- cgit v1.2.3 From 2ef8ea23942f4c2569930c34e7689a0cb1b232cc Mon Sep 17 00:00:00 2001 From: Jake Wang Date: Fri, 1 Oct 2021 17:14:21 -0400 Subject: drm/amd/display: Moved dccg init to after bios golden init [Why] bios_golden_init will override dccg_init during init_hw. [How] Move dccg_init to after bios_golden_init. Reviewed-by: Aric Cyr Reviewed-by: Eric Yang Acked-by: Agustin Gutierrez Sanchez Signed-off-by: Jake Wang Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c index 3f2333ec67e2..3afa1159a5f7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c @@ -76,10 +76,6 @@ void dcn31_init_hw(struct dc *dc) if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks) dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); - // Initialize the dccg - if (res_pool->dccg->funcs->dccg_init) - res_pool->dccg->funcs->dccg_init(res_pool->dccg); - if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { REG_WRITE(REFCLK_CNTL, 0); @@ -106,6 +102,9 @@ void dcn31_init_hw(struct dc *dc) hws->funcs.bios_golden_init(dc); hws->funcs.disable_vga(dc->hwseq); } + // Initialize the dccg + if (res_pool->dccg->funcs->dccg_init) + res_pool->dccg->funcs->dccg_init(res_pool->dccg); if (dc->debug.enable_mem_low_power.bits.dmcu) { // Force ERAM to shutdown if DMCU is not enabled -- cgit v1.2.3 From 53c2ff8bcb06acd07e24a62e7f5a0247bd7c6f67 Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Tue, 19 Oct 2021 11:13:25 +0800 Subject: drm/amdgpu: support B0&B1 external revision id for yellow carp B0 internal rev_id is 0x01, B1 internal rev_id is 0x02. The external rev_id for B0 and B1 is 0x20. The original expression is not suitable for B1. v2: squash in fix for display code (Alex) Signed-off-by: Aaron Liu Reviewed-by: Huang Rui Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/nv.c | 2 +- drivers/gpu/drm/amd/display/include/dal_asic_id.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index ff80786e3918..01efda4398e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -1257,7 +1257,7 @@ static int nv_common_early_init(void *handle) AMD_PG_SUPPORT_VCN_DPG | AMD_PG_SUPPORT_JPEG; if (adev->pdev->device == 0x1681) - adev->external_rev_id = adev->rev_id + 0x19; + adev->external_rev_id = 0x20; else adev->external_rev_id = adev->rev_id + 0x01; break; diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h index 5adc471bef57..3d2f0817e40a 100644 --- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h +++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h @@ -227,7 +227,7 @@ enum { #define FAMILY_YELLOW_CARP 146 #define YELLOW_CARP_A0 0x01 -#define YELLOW_CARP_B0 0x1A +#define YELLOW_CARP_B0 0x20 #define YELLOW_CARP_UNKNOWN 0xFF #ifndef ASICREV_IS_YELLOW_CARP -- cgit v1.2.3 From e20f80b9b163dc402dca115eed0affba6df5ebb5 Mon Sep 17 00:00:00 2001 From: Brian King Date: Tue, 19 Oct 2021 10:21:29 -0500 Subject: scsi: ibmvfc: Fix up duplicate response detection Commit a264cf5e81c7 ("scsi: ibmvfc: Fix command state accounting and stale response detection") introduced a regression in detecting duplicate responses. This was observed in test where a command was sent to the VIOS and completed before ibmvfc_send_event() set the active flag to 1, which resulted in the atomic_dec_if_positive() call in ibmvfc_handle_crq() thinking this was a duplicate response, which resulted in scsi_done() not getting called, so we then hit a SCSI command timeout for this command once the timeout expires. This simply ensures the active flag gets set prior to making the hcall to send the command to the VIOS, in order to close this window. Link: https://lore.kernel.org/r/20211019152129.16558-1-brking@linux.vnet.ibm.com Fixes: a264cf5e81c7 ("scsi: ibmvfc: Fix command state accounting and stale response detection") Cc: stable@vger.kernel.org Acked-by: Tyrel Datwyler Signed-off-by: Brian King Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvfc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index 1f1586ad48fe..01f79991bf4a 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -1696,6 +1696,7 @@ static int ibmvfc_send_event(struct ibmvfc_event *evt, spin_lock_irqsave(&evt->queue->l_lock, flags); list_add_tail(&evt->queue_list, &evt->queue->sent); + atomic_set(&evt->active, 1); mb(); @@ -1710,6 +1711,7 @@ static int ibmvfc_send_event(struct ibmvfc_event *evt, be64_to_cpu(crq_as_u64[1])); if (rc) { + atomic_set(&evt->active, 0); list_del(&evt->queue_list); spin_unlock_irqrestore(&evt->queue->l_lock, flags); del_timer(&evt->timer); @@ -1737,7 +1739,6 @@ static int ibmvfc_send_event(struct ibmvfc_event *evt, evt->done(evt); } else { - atomic_set(&evt->active, 1); spin_unlock_irqrestore(&evt->queue->l_lock, flags); ibmvfc_trc_start(evt); } -- cgit v1.2.3 From 282da7cef078a87b6d5e8ceba8b17e428cf0e37c Mon Sep 17 00:00:00 2001 From: Chanho Park Date: Mon, 18 Oct 2021 15:28:41 +0900 Subject: scsi: ufs: ufs-exynos: Correct timeout value setting registers PA_PWRMODEUSERDATA0 -> DL_FC0PROTTIMEOUTVAL PA_PWRMODEUSERDATA1 -> DL_TC0REPLAYTIMEOUTVAL PA_PWRMODEUSERDATA2 -> DL_AFC0REQTIMEOUTVAL Link: https://lore.kernel.org/r/20211018062841.18226-1-chanho61.park@samsung.com Fixes: a967ddb22d94 ("scsi: ufs: ufs-exynos: Apply vendor-specific values for three timeouts") Cc: Alim Akhtar Cc: Kiwoong Kim Cc: Krzysztof Kozlowski Reviewed-by: Alim Akhtar Reviewed-by: Avri Altman Signed-off-by: Chanho Park Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs-exynos.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/ufs/ufs-exynos.c b/drivers/scsi/ufs/ufs-exynos.c index a14dd8ce56d4..bb2dd79a1bcd 100644 --- a/drivers/scsi/ufs/ufs-exynos.c +++ b/drivers/scsi/ufs/ufs-exynos.c @@ -642,9 +642,9 @@ static int exynos_ufs_pre_pwr_mode(struct ufs_hba *hba, } /* setting for three timeout values for traffic class #0 */ - ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA0), 8064); - ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA1), 28224); - ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA2), 20160); + ufshcd_dme_set(hba, UIC_ARG_MIB(DL_FC0PROTTIMEOUTVAL), 8064); + ufshcd_dme_set(hba, UIC_ARG_MIB(DL_TC0REPLAYTIMEOUTVAL), 28224); + ufshcd_dme_set(hba, UIC_ARG_MIB(DL_AFC0REQTIMEOUTVAL), 20160); return 0; out: -- cgit v1.2.3 From 0db55f9a1bafbe3dac750ea669de9134922389b5 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 20 Oct 2021 19:19:46 +0200 Subject: drm/ttm: fix memleak in ttm_transfered_destroy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to cleanup the fences for ghost objects as well. Signed-off-by: Christian König Reported-by: Erhard F. Tested-by: Erhard F. Reviewed-by: Huang Rui Bug: https://bugzilla.kernel.org/show_bug.cgi?id=214029 Bug: https://bugzilla.kernel.org/show_bug.cgi?id=214447 CC: Link: https://patchwork.freedesktop.org/patch/msgid/20211020173211.2247-1-christian.koenig@amd.com --- drivers/gpu/drm/ttm/ttm_bo_util.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 1c5ffe2935af..abf2d7a4fdf1 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -190,6 +190,7 @@ static void ttm_transfered_destroy(struct ttm_buffer_object *bo) struct ttm_transfer_obj *fbo; fbo = container_of(bo, struct ttm_transfer_obj, base); + dma_resv_fini(&fbo->base.base._resv); ttm_bo_put(fbo->bo); kfree(fbo); } -- cgit v1.2.3 From def0c3697287f6e85d5ac68b21302966c95474f9 Mon Sep 17 00:00:00 2001 From: Bryant Mairs Date: Tue, 19 Oct 2021 09:24:33 -0500 Subject: drm: panel-orientation-quirks: Add quirk for Aya Neo 2021 Fixes screen orientation for the Aya Neo 2021 handheld gaming console. Signed-off-by: Bryant Mairs Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20211019142433.4295-1-bryant@mai.rs --- drivers/gpu/drm/drm_panel_orientation_quirks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index f6bdec7fa925..30c17a76f49a 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -134,6 +134,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T103HAF"), }, .driver_data = (void *)&lcd800x1280_rightside_up, + }, { /* AYA NEO 2021 */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "AYADEVICE"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "AYA NEO 2021"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, }, { /* GPD MicroPC (generic strings, also match on bios date) */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Default string"), -- cgit v1.2.3 From ee71fb6c4d99c51f2d82a32c503c872b7e40e7f7 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 21 Oct 2021 22:20:48 +0200 Subject: drm/i915/selftests: Properly reset mock object propers for each test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I forgot to do this properly in commit 6f11f37459d8f9f74ff1c299c0bedd50b458057a Author: Daniel Vetter Date: Fri Jul 23 10:34:55 2021 +0200 drm/plane: remove drm_helper_get_plane_damage_clips intel-gfx CI didn't spot this because we run each selftest in each own invocations, which means reloading i915.ko. But if you just run all the selftests in one go at boot-up, then it falls apart and eventually we cross over the hardcoded limited of how many properties can be attached to a single object. Fix this by resetting the property count. Nothing else to clean up since it's all static storage anyway. Reported-and-tested-by: Sebastian Andrzej Siewior Cc: Sebastian Andrzej Siewior Fixes: 6f11f37459d8 ("drm/plane: remove drm_helper_get_plane_damage_clips") Cc: José Roberto de Souza Cc: Ville Syrjälä Cc: Gwan-gyeong Mun Cc: Hans de Goede Cc: Daniel Vetter Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Acked-by: Thomas Zimmermann Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20211021202048.2638668-1-daniel.vetter@ffwll.ch --- drivers/gpu/drm/selftests/test-drm_damage_helper.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/selftests/test-drm_damage_helper.c b/drivers/gpu/drm/selftests/test-drm_damage_helper.c index 1c19a5d3eefb..8d8d8e214c28 100644 --- a/drivers/gpu/drm/selftests/test-drm_damage_helper.c +++ b/drivers/gpu/drm/selftests/test-drm_damage_helper.c @@ -30,6 +30,7 @@ static void mock_setup(struct drm_plane_state *state) mock_device.driver = &mock_driver; mock_device.mode_config.prop_fb_damage_clips = &mock_prop; mock_plane.dev = &mock_device; + mock_obj_props.count = 0; mock_plane.base.properties = &mock_obj_props; mock_prop.base.id = 1; /* 0 is an invalid id */ mock_prop.dev = &mock_device; -- cgit v1.2.3 From 1394103fd72ce9c67d20f882a93d59403c8da057 Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Thu, 23 Sep 2021 15:57:22 +0800 Subject: vduse: Disallow injecting interrupt before DRIVER_OK is set The interrupt callback should not be triggered before DRIVER_OK is set. Otherwise, it might break the virtio device driver. So let's add a check to avoid the unexpected behavior. Fixes: c8a6153b6c59 ("vduse: Introduce VDUSE - vDPA Device in Userspace") Signed-off-by: Xie Yongji Link: https://lore.kernel.org/r/20210923075722.98-1-xieyongji@bytedance.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_user/vduse_dev.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index 26e3d90d1e7c..cefb301b2ee4 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -966,6 +966,10 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd, break; } case VDUSE_DEV_INJECT_CONFIG_IRQ: + ret = -EINVAL; + if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK)) + break; + ret = 0; queue_work(vduse_irq_wq, &dev->inject); break; @@ -1045,6 +1049,10 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd, case VDUSE_VQ_INJECT_IRQ: { u32 index; + ret = -EINVAL; + if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK)) + break; + ret = -EFAULT; if (get_user(index, (u32 __user *)argp)) break; -- cgit v1.2.3 From 0943aacf5ae10471b68a702c022b42c89e91ba9e Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Wed, 29 Sep 2021 16:30:50 +0800 Subject: vduse: Fix race condition between resetting and irq injecting The interrupt might be triggered after a reset since there is no synchronization between resetting and irq injecting. And it might break something if the interrupt is delayed until a new round of device initialization. Fixes: c8a6153b6c59 ("vduse: Introduce VDUSE - vDPA Device in Userspace") Signed-off-by: Xie Yongji Link: https://lore.kernel.org/r/20210929083050.88-1-xieyongji@bytedance.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_user/vduse_dev.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index cefb301b2ee4..841667a896dd 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -80,6 +80,7 @@ struct vduse_dev { struct vdpa_callback config_cb; struct work_struct inject; spinlock_t irq_lock; + struct rw_semaphore rwsem; int minor; bool broken; bool connected; @@ -410,6 +411,8 @@ static void vduse_dev_reset(struct vduse_dev *dev) if (domain->bounce_map) vduse_domain_reset_bounce_map(domain); + down_write(&dev->rwsem); + dev->status = 0; dev->driver_features = 0; dev->generation++; @@ -443,6 +446,8 @@ static void vduse_dev_reset(struct vduse_dev *dev) flush_work(&vq->inject); flush_work(&vq->kick); } + + up_write(&dev->rwsem); } static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx, @@ -885,6 +890,23 @@ static void vduse_vq_irq_inject(struct work_struct *work) spin_unlock_irq(&vq->irq_lock); } +static int vduse_dev_queue_irq_work(struct vduse_dev *dev, + struct work_struct *irq_work) +{ + int ret = -EINVAL; + + down_read(&dev->rwsem); + if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK)) + goto unlock; + + ret = 0; + queue_work(vduse_irq_wq, irq_work); +unlock: + up_read(&dev->rwsem); + + return ret; +} + static long vduse_dev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -966,12 +988,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd, break; } case VDUSE_DEV_INJECT_CONFIG_IRQ: - ret = -EINVAL; - if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK)) - break; - - ret = 0; - queue_work(vduse_irq_wq, &dev->inject); + ret = vduse_dev_queue_irq_work(dev, &dev->inject); break; case VDUSE_VQ_SETUP: { struct vduse_vq_config config; @@ -1049,10 +1066,6 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd, case VDUSE_VQ_INJECT_IRQ: { u32 index; - ret = -EINVAL; - if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK)) - break; - ret = -EFAULT; if (get_user(index, (u32 __user *)argp)) break; @@ -1061,9 +1074,8 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd, if (index >= dev->vq_num) break; - ret = 0; index = array_index_nospec(index, dev->vq_num); - queue_work(vduse_irq_wq, &dev->vqs[index].inject); + ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject); break; } default: @@ -1144,6 +1156,7 @@ static struct vduse_dev *vduse_dev_create(void) INIT_LIST_HEAD(&dev->send_list); INIT_LIST_HEAD(&dev->recv_list); spin_lock_init(&dev->irq_lock); + init_rwsem(&dev->rwsem); INIT_WORK(&dev->inject, vduse_dev_irq_inject); init_waitqueue_head(&dev->waitq); -- cgit v1.2.3 From f8690a4b5a1b64f74ae5c4f7c4ea880d8a8e1a0d Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Fri, 15 Oct 2021 11:47:33 +0800 Subject: crypto: x86/sm4 - Fix invalid section entry size This fixes the following warning: vmlinux.o: warning: objtool: elf_update: invalid section entry size The size of the rodata section is 164 bytes, directly using the entry_size of 164 bytes will cause errors in some versions of the gcc compiler, while using 16 bytes directly will cause errors in the clang compiler. This patch correct it by filling the size of rodata to a 16-byte boundary. Fixes: a7ee22ee1445 ("crypto: x86/sm4 - add AES-NI/AVX/x86_64 implementation") Fixes: 5b2efa2bb865 ("crypto: x86/sm4 - add AES-NI/AVX2/x86_64 implementation") Reported-by: Peter Zijlstra Reported-by: Abaci Robot Signed-off-by: Tianjia Zhang Tested-by: Heyuan Shi Signed-off-by: Herbert Xu --- arch/x86/crypto/sm4-aesni-avx-asm_64.S | 6 +++++- arch/x86/crypto/sm4-aesni-avx2-asm_64.S | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/x86/crypto/sm4-aesni-avx-asm_64.S b/arch/x86/crypto/sm4-aesni-avx-asm_64.S index 18d2f5199194..1cc72b4804fa 100644 --- a/arch/x86/crypto/sm4-aesni-avx-asm_64.S +++ b/arch/x86/crypto/sm4-aesni-avx-asm_64.S @@ -78,7 +78,7 @@ vpxor tmp0, x, x; -.section .rodata.cst164, "aM", @progbits, 164 +.section .rodata.cst16, "aM", @progbits, 16 .align 16 /* @@ -133,6 +133,10 @@ .L0f0f0f0f: .long 0x0f0f0f0f +/* 12 bytes, only for padding */ +.Lpadding_deadbeef: + .long 0xdeadbeef, 0xdeadbeef, 0xdeadbeef + .text .align 16 diff --git a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S index d2ffd7f76ee2..9c5d3f3ad45a 100644 --- a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S @@ -93,7 +93,7 @@ vpxor tmp0, x, x; -.section .rodata.cst164, "aM", @progbits, 164 +.section .rodata.cst16, "aM", @progbits, 16 .align 16 /* @@ -148,6 +148,10 @@ .L0f0f0f0f: .long 0x0f0f0f0f +/* 12 bytes, only for padding */ +.Lpadding_deadbeef: + .long 0xdeadbeef, 0xdeadbeef, 0xdeadbeef + .text .align 16 -- cgit v1.2.3 From 7f678def99d29c520418607509bb19c7fc96a6db Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 22 Oct 2021 13:28:37 +0300 Subject: skb_expand_head() adjust skb->truesize incorrectly Christoph Paasch reports [1] about incorrect skb->truesize after skb_expand_head() call in ip6_xmit. This may happen because of two reasons: - skb_set_owner_w() for newly cloned skb is called too early, before pskb_expand_head() where truesize is adjusted for (!skb-sk) case. - pskb_expand_head() does not adjust truesize in (skb->sk) case. In this case sk->sk_wmem_alloc should be adjusted too. [1] https://lkml.org/lkml/2021/8/20/1082 Fixes: f1260ff15a71 ("skbuff: introduce skb_expand_head()") Fixes: 2d85a1b31dde ("ipv6: ip6_finish_output2: set sk into newly allocated nskb") Reported-by: Christoph Paasch Signed-off-by: Vasily Averin Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/644330dd-477e-0462-83bf-9f514c41edd1@virtuozzo.com Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 36 +++++++++++++++++++++++------------- net/core/sock_destructor.h | 12 ++++++++++++ 2 files changed, 35 insertions(+), 13 deletions(-) create mode 100644 net/core/sock_destructor.h diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2170bea2c7de..fe9358437380 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -80,6 +80,7 @@ #include #include "datagram.h" +#include "sock_destructor.h" struct kmem_cache *skbuff_head_cache __ro_after_init; static struct kmem_cache *skbuff_fclone_cache __ro_after_init; @@ -1804,30 +1805,39 @@ EXPORT_SYMBOL(skb_realloc_headroom); struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom) { int delta = headroom - skb_headroom(skb); + int osize = skb_end_offset(skb); + struct sock *sk = skb->sk; if (WARN_ONCE(delta <= 0, "%s is expecting an increase in the headroom", __func__)) return skb; - /* pskb_expand_head() might crash, if skb is shared */ - if (skb_shared(skb)) { + delta = SKB_DATA_ALIGN(delta); + /* pskb_expand_head() might crash, if skb is shared. */ + if (skb_shared(skb) || !is_skb_wmem(skb)) { struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); - if (likely(nskb)) { - if (skb->sk) - skb_set_owner_w(nskb, skb->sk); - consume_skb(skb); - } else { - kfree_skb(skb); - } + if (unlikely(!nskb)) + goto fail; + + if (sk) + skb_set_owner_w(nskb, sk); + consume_skb(skb); skb = nskb; } - if (skb && - pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) { - kfree_skb(skb); - skb = NULL; + if (pskb_expand_head(skb, delta, 0, GFP_ATOMIC)) + goto fail; + + if (sk && is_skb_wmem(skb)) { + delta = skb_end_offset(skb) - osize; + refcount_add(delta, &sk->sk_wmem_alloc); + skb->truesize += delta; } return skb; + +fail: + kfree_skb(skb); + return NULL; } EXPORT_SYMBOL(skb_expand_head); diff --git a/net/core/sock_destructor.h b/net/core/sock_destructor.h new file mode 100644 index 000000000000..2f396e6bfba5 --- /dev/null +++ b/net/core/sock_destructor.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _NET_CORE_SOCK_DESTRUCTOR_H +#define _NET_CORE_SOCK_DESTRUCTOR_H +#include + +static inline bool is_skb_wmem(const struct sk_buff *skb) +{ + return skb->destructor == sock_wfree || + skb->destructor == __sock_wfree || + (IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree); +} +#endif -- cgit v1.2.3 From 4f7019c7eb33967eb87766e0e4602b5576873680 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 20 Oct 2021 07:42:41 -0400 Subject: sctp: use init_tag from inithdr for ABORT chunk Currently Linux SCTP uses the verification tag of the existing SCTP asoc when failing to process and sending the packet with the ABORT chunk. This will result in the peer accepting the ABORT chunk and removing the SCTP asoc. One could exploit this to terminate a SCTP asoc. This patch is to fix it by always using the initiate tag of the received INIT chunk for the ABORT chunk to be sent. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- net/sctp/sm_statefuns.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 32df65f68c12..7f8306968c39 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -6348,6 +6348,7 @@ static struct sctp_packet *sctp_ootb_pkt_new( * yet. */ switch (chunk->chunk_hdr->type) { + case SCTP_CID_INIT: case SCTP_CID_INIT_ACK: { struct sctp_initack_chunk *initack; -- cgit v1.2.3 From eae5783908042a762c24e1bd11876edb91d314b1 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 20 Oct 2021 07:42:42 -0400 Subject: sctp: fix the processing for INIT chunk This patch fixes the problems below: 1. In non-shutdown_ack_sent states: in sctp_sf_do_5_1B_init() and sctp_sf_do_5_2_2_dupinit(): chunk length check should be done before any checks that may cause to send abort, as making packet for abort will access the init_tag from init_hdr in sctp_ootb_pkt_new(). 2. In shutdown_ack_sent state: in sctp_sf_do_9_2_reshutack(): The same checks as does in sctp_sf_do_5_2_2_dupinit() is needed for sctp_sf_do_9_2_reshutack(). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- net/sctp/sm_statefuns.c | 72 +++++++++++++++++++++++++++++++------------------ 1 file changed, 46 insertions(+), 26 deletions(-) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 7f8306968c39..9bfa8cca9974 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -156,6 +156,12 @@ static enum sctp_disposition __sctp_sf_do_9_1_abort( void *arg, struct sctp_cmd_seq *commands); +static enum sctp_disposition +__sctp_sf_do_9_2_reshutack(struct net *net, const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, void *arg, + struct sctp_cmd_seq *commands); + /* Small helper function that checks if the chunk length * is of the appropriate length. The 'required_length' argument * is set to be the size of a specific chunk we are testing. @@ -337,6 +343,14 @@ enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net, if (!chunk->singleton) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* Make sure that the INIT chunk has a valid length. + * Normally, this would cause an ABORT with a Protocol Violation + * error, but since we don't have an association, we'll + * just discard the packet. + */ + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* If the packet is an OOTB packet which is temporarily on the * control endpoint, respond with an ABORT. */ @@ -351,14 +365,6 @@ enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net, if (chunk->sctp_hdr->vtag != 0) return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); - /* Make sure that the INIT chunk has a valid length. - * Normally, this would cause an ABORT with a Protocol Violation - * error, but since we don't have an association, we'll - * just discard the packet. - */ - if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) - return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); - /* If the INIT is coming toward a closing socket, we'll send back * and ABORT. Essentially, this catches the race of INIT being * backloged to the socket at the same time as the user issues close(). @@ -1524,20 +1530,16 @@ static enum sctp_disposition sctp_sf_do_unexpected_init( if (!chunk->singleton) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* Make sure that the INIT chunk has a valid length. */ + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* 3.1 A packet containing an INIT chunk MUST have a zero Verification * Tag. */ if (chunk->sctp_hdr->vtag != 0) return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); - /* Make sure that the INIT chunk has a valid length. - * In this case, we generate a protocol violation since we have - * an association established. - */ - if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) - return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, - commands); - if (SCTP_INPUT_CB(chunk->skb)->encap_port != chunk->transport->encap_port) return sctp_sf_new_encap_port(net, ep, asoc, type, arg, commands); @@ -1882,9 +1884,9 @@ static enum sctp_disposition sctp_sf_do_dupcook_a( * its peer. */ if (sctp_state(asoc, SHUTDOWN_ACK_SENT)) { - disposition = sctp_sf_do_9_2_reshutack(net, ep, asoc, - SCTP_ST_CHUNK(chunk->chunk_hdr->type), - chunk, commands); + disposition = __sctp_sf_do_9_2_reshutack(net, ep, asoc, + SCTP_ST_CHUNK(chunk->chunk_hdr->type), + chunk, commands); if (SCTP_DISPOSITION_NOMEM == disposition) goto nomem; @@ -2970,13 +2972,11 @@ enum sctp_disposition sctp_sf_do_9_2_shut_ctsn( * that belong to this association, it should discard the INIT chunk and * retransmit the SHUTDOWN ACK chunk. */ -enum sctp_disposition sctp_sf_do_9_2_reshutack( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const union sctp_subtype type, - void *arg, - struct sctp_cmd_seq *commands) +static enum sctp_disposition +__sctp_sf_do_9_2_reshutack(struct net *net, const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, void *arg, + struct sctp_cmd_seq *commands) { struct sctp_chunk *chunk = arg; struct sctp_chunk *reply; @@ -3010,6 +3010,26 @@ nomem: return SCTP_DISPOSITION_NOMEM; } +enum sctp_disposition +sctp_sf_do_9_2_reshutack(struct net *net, const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, void *arg, + struct sctp_cmd_seq *commands) +{ + struct sctp_chunk *chunk = arg; + + if (!chunk->singleton) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + + if (chunk->sctp_hdr->vtag != 0) + return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); + + return __sctp_sf_do_9_2_reshutack(net, ep, asoc, type, arg, commands); +} + /* * sctp_sf_do_ecn_cwr * -- cgit v1.2.3 From 438b95a7c98f77d51cbf4db021f41b602d750a3f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 20 Oct 2021 07:42:43 -0400 Subject: sctp: fix the processing for INIT_ACK chunk Currently INIT_ACK chunk in non-cookie_echoed state is processed in sctp_sf_discard_chunk() to send an abort with the existent asoc's vtag if the chunk length is not valid. But the vtag in the chunk's sctphdr is not verified, which may be exploited by one to cook a malicious chunk to terminal a SCTP asoc. sctp_sf_discard_chunk() also is called in many other places to send an abort, and most of those have this problem. This patch is to fix it by sending abort with the existent asoc's vtag only if the vtag from the chunk's sctphdr is verified in sctp_sf_discard_chunk(). Note on sctp_sf_do_9_1_abort() and sctp_sf_shutdown_pending_abort(), the chunk length has been verified before sctp_sf_discard_chunk(), so replace it with sctp_sf_discard(). On sctp_sf_do_asconf_ack() and sctp_sf_do_asconf(), move the sctp_chunk_length_valid check ahead of sctp_sf_discard_chunk(), then replace it with sctp_sf_discard(). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- net/sctp/sm_statefuns.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 9bfa8cca9974..672e5308839b 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -2343,7 +2343,7 @@ enum sctp_disposition sctp_sf_shutdown_pending_abort( */ if (SCTP_ADDR_DEL == sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) - return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); if (!sctp_err_chunk_valid(chunk)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); @@ -2389,7 +2389,7 @@ enum sctp_disposition sctp_sf_shutdown_sent_abort( */ if (SCTP_ADDR_DEL == sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) - return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); if (!sctp_err_chunk_valid(chunk)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); @@ -2659,7 +2659,7 @@ enum sctp_disposition sctp_sf_do_9_1_abort( */ if (SCTP_ADDR_DEL == sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) - return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); if (!sctp_err_chunk_valid(chunk)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); @@ -3865,6 +3865,11 @@ enum sctp_disposition sctp_sf_do_asconf(struct net *net, return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } + /* Make sure that the ASCONF ADDIP chunk has a valid length. */ + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_addip_chunk))) + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, + commands); + /* ADD-IP: Section 4.1.1 * This chunk MUST be sent in an authenticated way by using * the mechanism defined in [I-D.ietf-tsvwg-sctp-auth]. If this chunk @@ -3873,13 +3878,7 @@ enum sctp_disposition sctp_sf_do_asconf(struct net *net, */ if (!asoc->peer.asconf_capable || (!net->sctp.addip_noauth && !chunk->auth)) - return sctp_sf_discard_chunk(net, ep, asoc, type, arg, - commands); - - /* Make sure that the ASCONF ADDIP chunk has a valid length. */ - if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_addip_chunk))) - return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, - commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); hdr = (struct sctp_addiphdr *)chunk->skb->data; serial = ntohl(hdr->serial); @@ -4008,6 +4007,12 @@ enum sctp_disposition sctp_sf_do_asconf_ack(struct net *net, return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } + /* Make sure that the ADDIP chunk has a valid length. */ + if (!sctp_chunk_length_valid(asconf_ack, + sizeof(struct sctp_addip_chunk))) + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, + commands); + /* ADD-IP, Section 4.1.2: * This chunk MUST be sent in an authenticated way by using * the mechanism defined in [I-D.ietf-tsvwg-sctp-auth]. If this chunk @@ -4016,14 +4021,7 @@ enum sctp_disposition sctp_sf_do_asconf_ack(struct net *net, */ if (!asoc->peer.asconf_capable || (!net->sctp.addip_noauth && !asconf_ack->auth)) - return sctp_sf_discard_chunk(net, ep, asoc, type, arg, - commands); - - /* Make sure that the ADDIP chunk has a valid length. */ - if (!sctp_chunk_length_valid(asconf_ack, - sizeof(struct sctp_addip_chunk))) - return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, - commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); addip_hdr = (struct sctp_addiphdr *)asconf_ack->skb->data; rcvd_serial = ntohl(addip_hdr->serial); @@ -4595,6 +4593,9 @@ enum sctp_disposition sctp_sf_discard_chunk(struct net *net, { struct sctp_chunk *chunk = arg; + if (asoc && !sctp_vtag_verify(chunk, asoc)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* Make sure that the chunk has a valid length. * Since we don't know the chunk type, we use a general * chunkhdr structure to make a comparison. -- cgit v1.2.3 From a64b341b8695e1c744dd972b39868371b4f68f83 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 20 Oct 2021 07:42:44 -0400 Subject: sctp: fix the processing for COOKIE_ECHO chunk 1. In closed state: in sctp_sf_do_5_1D_ce(): When asoc is NULL, making packet for abort will use chunk's vtag in sctp_ootb_pkt_new(). But when asoc exists, vtag from the chunk should be verified before using peer.i.init_tag to make packet for abort in sctp_ootb_pkt_new(), and just discard it if vtag is not correct. 2. In the other states: in sctp_sf_do_5_2_4_dupcook(): asoc always exists, but duplicate cookie_echo's vtag will be handled by sctp_tietags_compare() and then take actions, so before that we only verify the vtag for the abort sent for invalid chunk length. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- net/sctp/sm_statefuns.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 672e5308839b..96a069d725e9 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -710,6 +710,9 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net, struct sock *sk; int error = 0; + if (asoc && !sctp_vtag_verify(chunk, asoc)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* If the packet is an OOTB packet which is temporarily on the * control endpoint, respond with an ABORT. */ @@ -724,7 +727,8 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net, * in sctp_unpack_cookie(). */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) - return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, + commands); /* If the endpoint is not listening or if the number of associations * on the TCP-style socket exceed the max backlog, respond with an @@ -2204,9 +2208,11 @@ enum sctp_disposition sctp_sf_do_5_2_4_dupcook( * enough for the chunk header. Cookie length verification is * done later. */ - if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) - return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, - commands); + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) { + if (!sctp_vtag_verify(chunk, asoc)) + asoc = NULL; + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); + } /* "Decode" the chunk. We have no optional parameters so we * are in good shape. -- cgit v1.2.3 From aa0f697e45286a6b5f0ceca9418acf54b9099d99 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 20 Oct 2021 07:42:45 -0400 Subject: sctp: add vtag check in sctp_sf_violation sctp_sf_violation() is called when processing HEARTBEAT_ACK chunk in cookie_wait state, and some other places are also using it. The vtag in the chunk's sctphdr should be verified, otherwise, as later in chunk length check, it may send abort with the existent asoc's vtag, which can be exploited by one to cook a malicious chunk to terminate a SCTP asoc. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- net/sctp/sm_statefuns.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 96a069d725e9..36328ab88bdd 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -4669,6 +4669,9 @@ enum sctp_disposition sctp_sf_violation(struct net *net, { struct sctp_chunk *chunk = arg; + if (!sctp_vtag_verify(chunk, asoc)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* Make sure that the chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, -- cgit v1.2.3 From ef16b1734f0a176277b7bb9c71a6d977a6ef3998 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 20 Oct 2021 07:42:46 -0400 Subject: sctp: add vtag check in sctp_sf_do_8_5_1_E_sa sctp_sf_do_8_5_1_E_sa() is called when processing SHUTDOWN_ACK chunk in cookie_wait and cookie_echoed state. The vtag in the chunk's sctphdr should be verified, otherwise, as later in chunk length check, it may send abort with the existent asoc's vtag, which can be exploited by one to cook a malicious chunk to terminate a SCTP asoc. Note that when fails to verify the vtag from SHUTDOWN-ACK chunk, SHUTDOWN COMPLETE message will still be sent back to peer, but with the vtag from SHUTDOWN-ACK chunk, as said in 5) of rfc4960#section-8.4. While at it, also remove the unnecessary chunk length check from sctp_sf_shut_8_4_5(), as it's already done in both places where it calls sctp_sf_shut_8_4_5(). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- net/sctp/sm_statefuns.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 36328ab88bdd..a3545498a038 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3803,12 +3803,6 @@ static enum sctp_disposition sctp_sf_shut_8_4_5( SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); - /* If the chunk length is invalid, we don't want to process - * the reset of the packet. - */ - if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) - return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); - /* We need to discard the rest of the packet to prevent * potential boomming attacks from additional bundled chunks. * This is documented in SCTP Threats ID. @@ -3836,6 +3830,9 @@ enum sctp_disposition sctp_sf_do_8_5_1_E_sa(struct net *net, { struct sctp_chunk *chunk = arg; + if (!sctp_vtag_verify(chunk, asoc)) + asoc = NULL; + /* Make sure that the SHUTDOWN_ACK chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, -- cgit v1.2.3 From 9d02831e517aa36ee6bdb453a0eb47bd49923fe3 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 20 Oct 2021 07:42:47 -0400 Subject: sctp: add vtag check in sctp_sf_ootb sctp_sf_ootb() is called when processing DATA chunk in closed state, and many other places are also using it. The vtag in the chunk's sctphdr should be verified, otherwise, as later in chunk length check, it may send abort with the existent asoc's vtag, which can be exploited by one to cook a malicious chunk to terminate a SCTP asoc. When fails to verify the vtag from the chunk, this patch sets asoc to NULL, so that the abort will be made with the vtag from the received chunk later. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- net/sctp/sm_statefuns.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index a3545498a038..fb3da4d8f4a3 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3688,6 +3688,9 @@ enum sctp_disposition sctp_sf_ootb(struct net *net, SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES); + if (asoc && !sctp_vtag_verify(chunk, asoc)) + asoc = NULL; + ch = (struct sctp_chunkhdr *)chunk->chunk_hdr; do { /* Report violation if the chunk is less then minimal */ -- cgit v1.2.3 From 1f83b835a3eaa5ae4bd825fb07182698bfc243ba Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 21 Oct 2021 16:02:47 +0200 Subject: fcnal-test: kill hanging ping/nettest binaries on cleanup On my box I see a bunch of ping/nettest processes hanging around after fcntal-test.sh is done. Clean those up before netns deletion. Signed-off-by: Florian Westphal Acked-by: David Ahern Link: https://lore.kernel.org/r/20211021140247.29691-1-fw@strlen.de Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fcnal-test.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 8e67a252b672..3313566ce906 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -445,10 +445,13 @@ cleanup() ip -netns ${NSA} link set dev ${NSA_DEV} down ip -netns ${NSA} link del dev ${NSA_DEV} + ip netns pids ${NSA} | xargs kill 2>/dev/null ip netns del ${NSA} fi + ip netns pids ${NSB} | xargs kill 2>/dev/null ip netns del ${NSB} + ip netns pids ${NSC} | xargs kill 2>/dev/null ip netns del ${NSC} >/dev/null 2>&1 } -- cgit v1.2.3 From 8f04db78e4e36a5d4858ce841a3e9cc3d69bde36 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Thu, 14 Oct 2021 15:25:51 +0100 Subject: bpf: Define bpf_jit_alloc_exec_limit for riscv JIT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Expose the maximum amount of useable memory from the riscv JIT. Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Acked-by: Luke Nelson Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20211014142554.53120-2-lmb@cloudflare.com --- arch/riscv/net/bpf_jit_core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c index fed86f42dfbe..0fee2cbaaf53 100644 --- a/arch/riscv/net/bpf_jit_core.c +++ b/arch/riscv/net/bpf_jit_core.c @@ -166,6 +166,11 @@ out: return prog; } +u64 bpf_jit_alloc_exec_limit(void) +{ + return BPF_JIT_REGION_SIZE; +} + void *bpf_jit_alloc_exec(unsigned long size) { return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, -- cgit v1.2.3 From 5d63ae908242f028bd10860cba98450d11c079b8 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Thu, 14 Oct 2021 15:25:52 +0100 Subject: bpf: Define bpf_jit_alloc_exec_limit for arm64 JIT Expose the maximum amount of useable memory from the arm64 JIT. Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211014142554.53120-3-lmb@cloudflare.com --- arch/arm64/net/bpf_jit_comp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 41c23f474ea6..803e7773fa86 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1136,6 +1136,11 @@ out: return prog; } +u64 bpf_jit_alloc_exec_limit(void) +{ + return BPF_JIT_REGION_SIZE; +} + void *bpf_jit_alloc_exec(unsigned long size) { return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, -- cgit v1.2.3 From fadb7ff1a6c2c565af56b4aacdd086b067eed440 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Thu, 14 Oct 2021 15:25:53 +0100 Subject: bpf: Prevent increasing bpf_jit_limit above max Restrict bpf_jit_limit to the maximum supported by the arch's JIT. Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211014142554.53120-4-lmb@cloudflare.com --- include/linux/filter.h | 1 + kernel/bpf/core.c | 4 +++- net/core/sysctl_net_core.c | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 4a93c12543ee..ef03ff34234d 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1051,6 +1051,7 @@ extern int bpf_jit_enable; extern int bpf_jit_harden; extern int bpf_jit_kallsyms; extern long bpf_jit_limit; +extern long bpf_jit_limit_max; typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index d6b7dfdd8066..c1e7eb3f1876 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -524,6 +524,7 @@ int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON); int bpf_jit_kallsyms __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON); int bpf_jit_harden __read_mostly; long bpf_jit_limit __read_mostly; +long bpf_jit_limit_max __read_mostly; static void bpf_prog_ksym_set_addr(struct bpf_prog *prog) @@ -817,7 +818,8 @@ u64 __weak bpf_jit_alloc_exec_limit(void) static int __init bpf_jit_charge_init(void) { /* Only used as heuristic here to derive limit. */ - bpf_jit_limit = min_t(u64, round_up(bpf_jit_alloc_exec_limit() >> 2, + bpf_jit_limit_max = bpf_jit_alloc_exec_limit(); + bpf_jit_limit = min_t(u64, round_up(bpf_jit_limit_max >> 2, PAGE_SIZE), LONG_MAX); return 0; } diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index c8496c1142c9..5f88526ad61c 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -419,7 +419,7 @@ static struct ctl_table net_core_table[] = { .mode = 0600, .proc_handler = proc_dolongvec_minmax_bpf_restricted, .extra1 = &long_one, - .extra2 = &long_max, + .extra2 = &bpf_jit_limit_max, }, #endif { -- cgit v1.2.3 From fda7a38714f40b635f5502ec4855602c6b33dad2 Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Tue, 19 Oct 2021 03:29:34 +0000 Subject: bpf: Fix error usage of map_fd and fdget() in generic_map_update_batch() 1. The ufd in generic_map_update_batch() should be read from batch.map_fd; 2. A call to fdget() should be followed by a symmetric call to fdput(). Fixes: aa2e93b8e58e ("bpf: Add generic support for update and delete batch ops") Signed-off-by: Xu Kuohai Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211019032934.1210517-1-xukuohai@huawei.com --- kernel/bpf/syscall.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 4e50c0bfdb7d..9dab49d3f394 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1337,12 +1337,11 @@ int generic_map_update_batch(struct bpf_map *map, void __user *values = u64_to_user_ptr(attr->batch.values); void __user *keys = u64_to_user_ptr(attr->batch.keys); u32 value_size, cp, max_count; - int ufd = attr->map_fd; + int ufd = attr->batch.map_fd; void *key, *value; struct fd f; int err = 0; - f = fdget(ufd); if (attr->batch.elem_flags & ~BPF_F_LOCK) return -EINVAL; @@ -1367,6 +1366,7 @@ int generic_map_update_batch(struct bpf_map *map, return -ENOMEM; } + f = fdget(ufd); /* bpf_map_do_batch() guarantees ufd is valid */ for (cp = 0; cp < max_count; cp++) { err = -EFAULT; if (copy_from_user(key, keys + cp * map->key_size, @@ -1386,6 +1386,7 @@ int generic_map_update_batch(struct bpf_map *map, kvfree(value); kvfree(key); + fdput(f); return err; } -- cgit v1.2.3 From 04f8ef5643bcd8bcde25dfdebef998aea480b2ba Mon Sep 17 00:00:00 2001 From: Quanyang Wang Date: Mon, 18 Oct 2021 15:56:23 +0800 Subject: cgroup: Fix memory leak caused by missing cgroup_bpf_offline When enabling CONFIG_CGROUP_BPF, kmemleak can be observed by running the command as below: $mount -t cgroup -o none,name=foo cgroup cgroup/ $umount cgroup/ unreferenced object 0xc3585c40 (size 64): comm "mount", pid 425, jiffies 4294959825 (age 31.990s) hex dump (first 32 bytes): 01 00 00 80 84 8c 28 c0 00 00 00 00 00 00 00 00 ......(......... 00 00 00 00 00 00 00 00 6c 43 a0 c3 00 00 00 00 ........lC...... backtrace: [] cgroup_bpf_inherit+0x44/0x24c [<1f03679c>] cgroup_setup_root+0x174/0x37c [] cgroup1_get_tree+0x2c0/0x4a0 [] vfs_get_tree+0x24/0x108 [] path_mount+0x384/0x988 [] do_mount+0x64/0x9c [<208c9cfe>] sys_mount+0xfc/0x1f4 [<06dd06e0>] ret_fast_syscall+0x0/0x48 [] 0xbeb4daa8 This is because that since the commit 2b0d3d3e4fcf ("percpu_ref: reduce memory footprint of percpu_ref in fast path") root_cgrp->bpf.refcnt.data is allocated by the function percpu_ref_init in cgroup_bpf_inherit which is called by cgroup_setup_root when mounting, but not freed along with root_cgrp when umounting. Adding cgroup_bpf_offline which calls percpu_ref_kill to cgroup_kill_sb can free root_cgrp->bpf.refcnt.data in umount path. This patch also fixes the commit 4bfc0bb2c60e ("bpf: decouple the lifetime of cgroup_bpf from cgroup itself"). A cgroup_bpf_offline is needed to do a cleanup that frees the resources which are allocated by cgroup_bpf_inherit in cgroup_setup_root. And inside cgroup_bpf_offline, cgroup_get() is at the beginning and cgroup_put is at the end of cgroup_bpf_release which is called by cgroup_bpf_offline. So cgroup_bpf_offline can keep the balance of cgroup's refcount. Fixes: 2b0d3d3e4fcf ("percpu_ref: reduce memory footprint of percpu_ref in fast path") Fixes: 4bfc0bb2c60e ("bpf: decouple the lifetime of cgroup_bpf from cgroup itself") Signed-off-by: Quanyang Wang Signed-off-by: Alexei Starovoitov Acked-by: Roman Gushchin Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20211018075623.26884-1-quanyang.wang@windriver.com --- kernel/cgroup/cgroup.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 570b0c97392a..ea08f01d0111 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2187,8 +2187,10 @@ static void cgroup_kill_sb(struct super_block *sb) * And don't kill the default root. */ if (list_empty(&root->cgrp.self.children) && root != &cgrp_dfl_root && - !percpu_ref_is_dying(&root->cgrp.self.refcnt)) + !percpu_ref_is_dying(&root->cgrp.self.refcnt)) { + cgroup_bpf_offline(&root->cgrp); percpu_ref_kill(&root->cgrp.self.refcnt); + } cgroup_put(&root->cgrp); kernfs_kill_sb(sb); } -- cgit v1.2.3 From d6423d2ec39cce2bfca418c81ef51792891576bc Mon Sep 17 00:00:00 2001 From: Yuiko Oshino Date: Fri, 22 Oct 2021 11:13:53 -0400 Subject: net: ethernet: microchip: lan743x: Fix driver crash when lan743x_pm_resume fails The driver needs to clean up and return when the initialization fails on resume. Fixes: 23f0703c125b ("lan743x: Add main source files for new lan743x driver") Signed-off-by: Yuiko Oshino Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/lan743x_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 9e8561cdc32a..3ac968b1c6ce 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -3019,6 +3019,8 @@ static int lan743x_pm_resume(struct device *dev) if (ret) { netif_err(adapter, probe, adapter->netdev, "lan743x_hardware_init returned %d\n", ret); + lan743x_pci_cleanup(adapter); + return ret; } /* open netdev when netdev is at running state while resume. -- cgit v1.2.3 From 95a359c9553342d36d408d35331ff0bfce75272f Mon Sep 17 00:00:00 2001 From: Yuiko Oshino Date: Fri, 22 Oct 2021 11:53:43 -0400 Subject: net: ethernet: microchip: lan743x: Fix dma allocation failure by using dma_set_mask_and_coherent The dma failure was reported in the raspberry pi github (issue #4117). https://github.com/raspberrypi/linux/issues/4117 The use of dma_set_mask_and_coherent fixes the issue. Tested on 32/64-bit raspberry pi CM4 and 64-bit ubuntu x86 PC with EVB-LAN7430. Fixes: 23f0703c125b ("lan743x: Add main source files for new lan743x driver") Signed-off-by: Yuiko Oshino Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/lan743x_main.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 3ac968b1c6ce..c4f32c7e0db1 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -1743,6 +1743,16 @@ static int lan743x_tx_ring_init(struct lan743x_tx *tx) ret = -EINVAL; goto cleanup; } + if (dma_set_mask_and_coherent(&tx->adapter->pdev->dev, + DMA_BIT_MASK(64))) { + if (dma_set_mask_and_coherent(&tx->adapter->pdev->dev, + DMA_BIT_MASK(32))) { + dev_warn(&tx->adapter->pdev->dev, + "lan743x_: No suitable DMA available\n"); + ret = -ENOMEM; + goto cleanup; + } + } ring_allocation_size = ALIGN(tx->ring_size * sizeof(struct lan743x_tx_descriptor), PAGE_SIZE); @@ -2276,6 +2286,16 @@ static int lan743x_rx_ring_init(struct lan743x_rx *rx) ret = -EINVAL; goto cleanup; } + if (dma_set_mask_and_coherent(&rx->adapter->pdev->dev, + DMA_BIT_MASK(64))) { + if (dma_set_mask_and_coherent(&rx->adapter->pdev->dev, + DMA_BIT_MASK(32))) { + dev_warn(&rx->adapter->pdev->dev, + "lan743x_: No suitable DMA available\n"); + ret = -ENOMEM; + goto cleanup; + } + } ring_allocation_size = ALIGN(rx->ring_size * sizeof(struct lan743x_rx_descriptor), PAGE_SIZE); -- cgit v1.2.3 From 41ee7232fa5f3c034f22baa52bc287e494e2129a Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 21 Oct 2021 00:23:13 +1100 Subject: powerpc/pseries/iommu: Use correct vfree for it_map The it_map array is vzalloc'ed so use vfree() for it when creating a huge DMA window failed for whatever reason. While at this, write zero to it_map. Fixes: 381ceda88c4c ("powerpc/pseries/iommu: Make use of DDW for indirect mapping") Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211020132315.2287178-3-aik@ozlabs.ru --- arch/powerpc/platforms/pseries/iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index dab5c56ffd0e..c003f698eedd 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -1440,7 +1440,8 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) /* Keep default DMA window stuct if removed */ if (default_win_removed) { tbl->it_size = 0; - kfree(tbl->it_map); + vfree(tbl->it_map); + tbl->it_map = NULL; } set_iommu_table_base(&dev->dev, newtbl); -- cgit v1.2.3 From 92fe01b7c655b9767724e7d62bdded0761d232ff Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 21 Oct 2021 00:23:14 +1100 Subject: powerpc/pseries/iommu: Check if the default window in use before removing it At the moment this check is performed after we remove the default window which is late and disallows to revert whatever changes enable_ddw() has made to DMA windows. This moves the check and error exit before removing the window. This raised the message severity from "debug" to "warning" as this should not happen in practice and cannot be triggered by the userspace. Fixes: 381ceda88c4c ("powerpc/pseries/iommu: Make use of DDW for indirect mapping") Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211020132315.2287178-4-aik@ozlabs.ru --- arch/powerpc/platforms/pseries/iommu.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index c003f698eedd..8f921e38ce74 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -1302,6 +1302,12 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) struct property *default_win; int reset_win_ext; + /* DDW + IOMMU on single window may fail if there is any allocation */ + if (iommu_table_in_use(tbl)) { + dev_warn(&dev->dev, "current IOMMU table in use, can't be replaced.\n"); + goto out_failed; + } + default_win = of_find_property(pdn, "ibm,dma-window", NULL); if (!default_win) goto out_failed; @@ -1356,12 +1362,6 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) query.largest_available_block, 1ULL << page_shift); - /* DDW + IOMMU on single window may fail if there is any allocation */ - if (default_win_removed && iommu_table_in_use(tbl)) { - dev_dbg(&dev->dev, "current IOMMU table in use, can't be replaced.\n"); - goto out_failed; - } - len = order_base_2(query.largest_available_block << page_shift); win_name = DMA64_PROPNAME; } else { -- cgit v1.2.3 From d853adc7adf601d7d6823afe3ed396065a3e08d1 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 21 Oct 2021 00:23:15 +1100 Subject: powerpc/pseries/iommu: Create huge DMA window if no MMIO32 is present The iommu_init_table() helper takes an address range to reserve in the IOMMU table being initialized to exclude MMIO addresses, this is useful if the window stretches far beyond 4GB (although wastes some TCEs). At the moment the code searches for such MMIO32 range and fails if none found which is considered a problem while it really is not: it is actually better as this says there is no MMIO32 to reserve and we can use usually wasted TCEs. Furthermore PHYP never actually allows creating windows starting at busaddress=0 so this MMIO32 range is never useful. This removes error exit and initializes the table with zero range if no MMIO32 is detected. Fixes: 381ceda88c4c ("powerpc/pseries/iommu: Make use of DDW for indirect mapping") Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211020132315.2287178-5-aik@ozlabs.ru --- arch/powerpc/platforms/pseries/iommu.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 8f921e38ce74..a52af8fbf571 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -1411,18 +1411,19 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) } else { struct iommu_table *newtbl; int i; + unsigned long start = 0, end = 0; for (i = 0; i < ARRAY_SIZE(pci->phb->mem_resources); i++) { const unsigned long mask = IORESOURCE_MEM_64 | IORESOURCE_MEM; /* Look for MMIO32 */ - if ((pci->phb->mem_resources[i].flags & mask) == IORESOURCE_MEM) + if ((pci->phb->mem_resources[i].flags & mask) == IORESOURCE_MEM) { + start = pci->phb->mem_resources[i].start; + end = pci->phb->mem_resources[i].end; break; + } } - if (i == ARRAY_SIZE(pci->phb->mem_resources)) - goto out_del_list; - /* New table for using DDW instead of the default DMA window */ newtbl = iommu_pseries_alloc_table(pci->phb->node); if (!newtbl) { @@ -1432,8 +1433,7 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) iommu_table_setparms_common(newtbl, pci->phb->bus->number, create.liobn, win_addr, 1UL << len, page_shift, NULL, &iommu_table_lpar_multi_ops); - iommu_init_table(newtbl, pci->phb->node, pci->phb->mem_resources[i].start, - pci->phb->mem_resources[i].end); + iommu_init_table(newtbl, pci->phb->node, start, end); pci->table_group->tables[1] = newtbl; -- cgit v1.2.3 From 85fe6415c146d5d42ce300c12f1ecf4d4af47d40 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Thu, 14 Oct 2021 14:33:42 +0200 Subject: gpio: xgs-iproc: fix parsing of ngpios property of_property_read_u32 returns 0 on success, not true, so we need to invert the check to actually take over the provided ngpio value. Fixes: 6a41b6c5fc20 ("gpio: Add xgs-iproc driver") Signed-off-by: Jonas Gorski Reviewed-by: Chris Packham Reviewed-by: Florian Fainelli Reviewed-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-xgs-iproc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-xgs-iproc.c b/drivers/gpio/gpio-xgs-iproc.c index fa9b4d8c3ff5..43ca52fa6f9a 100644 --- a/drivers/gpio/gpio-xgs-iproc.c +++ b/drivers/gpio/gpio-xgs-iproc.c @@ -224,7 +224,7 @@ static int iproc_gpio_probe(struct platform_device *pdev) } chip->gc.label = dev_name(dev); - if (of_property_read_u32(dn, "ngpios", &num_gpios)) + if (!of_property_read_u32(dn, "ngpios", &num_gpios)) chip->gc.ngpio = num_gpios; irq = platform_get_irq(pdev, 0); -- cgit v1.2.3 From c0eee6fbfa2b3377f1efed10dad539abeb7312aa Mon Sep 17 00:00:00 2001 From: Asmaa Mnebhi Date: Fri, 22 Oct 2021 09:44:38 -0400 Subject: gpio: mlxbf2.c: Add check for bgpio_init failure Add a check if bgpio_init fails. Signed-off-by: Asmaa Mnebhi Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-mlxbf2.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpio/gpio-mlxbf2.c b/drivers/gpio/gpio-mlxbf2.c index 177d03ef4529..40a052bc6784 100644 --- a/drivers/gpio/gpio-mlxbf2.c +++ b/drivers/gpio/gpio-mlxbf2.c @@ -256,6 +256,11 @@ mlxbf2_gpio_probe(struct platform_device *pdev) NULL, 0); + if (ret) { + dev_err(dev, "bgpio_init failed\n"); + return ret; + } + gc->direction_input = mlxbf2_gpio_direction_input; gc->direction_output = mlxbf2_gpio_direction_output; gc->ngpio = npins; -- cgit v1.2.3 From 8228c77d8b56e3f735baf71fefb1b548c23691a7 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 23 Oct 2021 21:29:22 +0100 Subject: KVM: x86: switch pvclock_gtod_sync_lock to a raw spinlock On the preemption path when updating a Xen guest's runstate times, this lock is taken inside the scheduler rq->lock, which is a raw spinlock. This was shown in a lockdep warning: [ 89.138354] ============================= [ 89.138356] [ BUG: Invalid wait context ] [ 89.138358] 5.15.0-rc5+ #834 Tainted: G S I E [ 89.138360] ----------------------------- [ 89.138361] xen_shinfo_test/2575 is trying to lock: [ 89.138363] ffffa34a0364efd8 (&kvm->arch.pvclock_gtod_sync_lock){....}-{3:3}, at: get_kvmclock_ns+0x1f/0x130 [kvm] [ 89.138442] other info that might help us debug this: [ 89.138444] context-{5:5} [ 89.138445] 4 locks held by xen_shinfo_test/2575: [ 89.138447] #0: ffff972bdc3b8108 (&vcpu->mutex){+.+.}-{4:4}, at: kvm_vcpu_ioctl+0x77/0x6f0 [kvm] [ 89.138483] #1: ffffa34a03662e90 (&kvm->srcu){....}-{0:0}, at: kvm_arch_vcpu_ioctl_run+0xdc/0x8b0 [kvm] [ 89.138526] #2: ffff97331fdbac98 (&rq->__lock){-.-.}-{2:2}, at: __schedule+0xff/0xbd0 [ 89.138534] #3: ffffa34a03662e90 (&kvm->srcu){....}-{0:0}, at: kvm_arch_vcpu_put+0x26/0x170 [kvm] ... [ 89.138695] get_kvmclock_ns+0x1f/0x130 [kvm] [ 89.138734] kvm_xen_update_runstate+0x14/0x90 [kvm] [ 89.138783] kvm_xen_update_runstate_guest+0x15/0xd0 [kvm] [ 89.138830] kvm_arch_vcpu_put+0xe6/0x170 [kvm] [ 89.138870] kvm_sched_out+0x2f/0x40 [kvm] [ 89.138900] __schedule+0x5de/0xbd0 Cc: stable@vger.kernel.org Reported-by: syzbot+b282b65c2c68492df769@syzkaller.appspotmail.com Fixes: 30b5c851af79 ("KVM: x86/xen: Add support for vCPU runstate information") Signed-off-by: David Woodhouse Message-Id: <1b02a06421c17993df337493a68ba923f3bd5c0f.camel@infradead.org> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/x86.c | 28 ++++++++++++++-------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f8f48a7ec577..70771376e246 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1097,7 +1097,7 @@ struct kvm_arch { u64 cur_tsc_generation; int nr_vcpus_matched_tsc; - spinlock_t pvclock_gtod_sync_lock; + raw_spinlock_t pvclock_gtod_sync_lock; bool use_master_clock; u64 master_kernel_ns; u64 master_cycle_now; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0c8b5129effd..5c82eff19e4a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2542,7 +2542,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data) kvm_vcpu_write_tsc_offset(vcpu, offset); raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); - spin_lock_irqsave(&kvm->arch.pvclock_gtod_sync_lock, flags); + raw_spin_lock_irqsave(&kvm->arch.pvclock_gtod_sync_lock, flags); if (!matched) { kvm->arch.nr_vcpus_matched_tsc = 0; } else if (!already_matched) { @@ -2550,7 +2550,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data) } kvm_track_tsc_matching(vcpu); - spin_unlock_irqrestore(&kvm->arch.pvclock_gtod_sync_lock, flags); + raw_spin_unlock_irqrestore(&kvm->arch.pvclock_gtod_sync_lock, flags); } static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, @@ -2780,9 +2780,9 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) kvm_make_mclock_inprogress_request(kvm); /* no guest entries from this point */ - spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); + raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); pvclock_update_vm_gtod_copy(kvm); - spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); + raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); kvm_for_each_vcpu(i, vcpu, kvm) kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); @@ -2800,15 +2800,15 @@ u64 get_kvmclock_ns(struct kvm *kvm) unsigned long flags; u64 ret; - spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); + raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); if (!ka->use_master_clock) { - spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); + raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); return get_kvmclock_base_ns() + ka->kvmclock_offset; } hv_clock.tsc_timestamp = ka->master_cycle_now; hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset; - spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); + raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); /* both __this_cpu_read() and rdtsc() should be on the same cpu */ get_cpu(); @@ -2902,13 +2902,13 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) * If the host uses TSC clock, then passthrough TSC as stable * to the guest. */ - spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); + raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); use_master_clock = ka->use_master_clock; if (use_master_clock) { host_tsc = ka->master_cycle_now; kernel_ns = ka->master_kernel_ns; } - spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); + raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); /* Keep irq disabled to prevent changes to the clock */ local_irq_save(flags); @@ -6100,13 +6100,13 @@ set_pit2_out: * is slightly ahead) here we risk going negative on unsigned * 'system_time' when 'user_ns.clock' is very small. */ - spin_lock_irq(&ka->pvclock_gtod_sync_lock); + raw_spin_lock_irq(&ka->pvclock_gtod_sync_lock); if (kvm->arch.use_master_clock) now_ns = ka->master_kernel_ns; else now_ns = get_kvmclock_base_ns(); ka->kvmclock_offset = user_ns.clock - now_ns; - spin_unlock_irq(&ka->pvclock_gtod_sync_lock); + raw_spin_unlock_irq(&ka->pvclock_gtod_sync_lock); kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE); break; @@ -8139,9 +8139,9 @@ static void kvm_hyperv_tsc_notifier(void) list_for_each_entry(kvm, &vm_list, vm_list) { struct kvm_arch *ka = &kvm->arch; - spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); + raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); pvclock_update_vm_gtod_copy(kvm); - spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); + raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); kvm_for_each_vcpu(cpu, vcpu, kvm) kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); @@ -11182,7 +11182,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) raw_spin_lock_init(&kvm->arch.tsc_write_lock); mutex_init(&kvm->arch.apic_map_lock); - spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock); + raw_spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock); kvm->arch.kvmclock_offset = -get_kvmclock_base_ns(); pvclock_update_vm_gtod_copy(kvm); -- cgit v1.2.3 From c10a485c3de5ccbf1fff65a382cebcb2730c6b06 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 24 Oct 2021 21:48:02 +0200 Subject: phy: phy_ethtool_ksettings_get: Lock the phy for consistency The PHY structure should be locked while copying information out if it, otherwise there is no guarantee of self consistency. Without the lock the PHY state machine could be updating the structure. Fixes: 2d55173e71b0 ("phy: add generic function to support ksetting support") Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index f124a8a58bd4..8457b829667e 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -299,6 +299,7 @@ EXPORT_SYMBOL(phy_ethtool_ksettings_set); void phy_ethtool_ksettings_get(struct phy_device *phydev, struct ethtool_link_ksettings *cmd) { + mutex_lock(&phydev->lock); linkmode_copy(cmd->link_modes.supported, phydev->supported); linkmode_copy(cmd->link_modes.advertising, phydev->advertising); linkmode_copy(cmd->link_modes.lp_advertising, phydev->lp_advertising); @@ -317,6 +318,7 @@ void phy_ethtool_ksettings_get(struct phy_device *phydev, cmd->base.autoneg = phydev->autoneg; cmd->base.eth_tp_mdix_ctrl = phydev->mdix_ctrl; cmd->base.eth_tp_mdix = phydev->mdix; + mutex_unlock(&phydev->lock); } EXPORT_SYMBOL(phy_ethtool_ksettings_get); -- cgit v1.2.3 From 64cd92d5e8180c2ded3fdea76862de6f596ae2c9 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 24 Oct 2021 21:48:03 +0200 Subject: phy: phy_ethtool_ksettings_set: Move after phy_start_aneg This allows it to make use of a helper which assume the PHY is already locked. Fixes: 2d55173e71b0 ("phy: add generic function to support ksetting support") Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 106 +++++++++++++++++++++++++------------------------- 1 file changed, 53 insertions(+), 53 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 8457b829667e..ee584fa8b76d 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -243,59 +243,6 @@ static void phy_sanitize_settings(struct phy_device *phydev) } } -int phy_ethtool_ksettings_set(struct phy_device *phydev, - const struct ethtool_link_ksettings *cmd) -{ - __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising); - u8 autoneg = cmd->base.autoneg; - u8 duplex = cmd->base.duplex; - u32 speed = cmd->base.speed; - - if (cmd->base.phy_address != phydev->mdio.addr) - return -EINVAL; - - linkmode_copy(advertising, cmd->link_modes.advertising); - - /* We make sure that we don't pass unsupported values in to the PHY */ - linkmode_and(advertising, advertising, phydev->supported); - - /* Verify the settings we care about. */ - if (autoneg != AUTONEG_ENABLE && autoneg != AUTONEG_DISABLE) - return -EINVAL; - - if (autoneg == AUTONEG_ENABLE && linkmode_empty(advertising)) - return -EINVAL; - - if (autoneg == AUTONEG_DISABLE && - ((speed != SPEED_1000 && - speed != SPEED_100 && - speed != SPEED_10) || - (duplex != DUPLEX_HALF && - duplex != DUPLEX_FULL))) - return -EINVAL; - - phydev->autoneg = autoneg; - - if (autoneg == AUTONEG_DISABLE) { - phydev->speed = speed; - phydev->duplex = duplex; - } - - linkmode_copy(phydev->advertising, advertising); - - linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, - phydev->advertising, autoneg == AUTONEG_ENABLE); - - phydev->master_slave_set = cmd->base.master_slave_cfg; - phydev->mdix_ctrl = cmd->base.eth_tp_mdix_ctrl; - - /* Restart the PHY */ - phy_start_aneg(phydev); - - return 0; -} -EXPORT_SYMBOL(phy_ethtool_ksettings_set); - void phy_ethtool_ksettings_get(struct phy_device *phydev, struct ethtool_link_ksettings *cmd) { @@ -802,6 +749,59 @@ static int phy_poll_aneg_done(struct phy_device *phydev) return ret < 0 ? ret : 0; } +int phy_ethtool_ksettings_set(struct phy_device *phydev, + const struct ethtool_link_ksettings *cmd) +{ + __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising); + u8 autoneg = cmd->base.autoneg; + u8 duplex = cmd->base.duplex; + u32 speed = cmd->base.speed; + + if (cmd->base.phy_address != phydev->mdio.addr) + return -EINVAL; + + linkmode_copy(advertising, cmd->link_modes.advertising); + + /* We make sure that we don't pass unsupported values in to the PHY */ + linkmode_and(advertising, advertising, phydev->supported); + + /* Verify the settings we care about. */ + if (autoneg != AUTONEG_ENABLE && autoneg != AUTONEG_DISABLE) + return -EINVAL; + + if (autoneg == AUTONEG_ENABLE && linkmode_empty(advertising)) + return -EINVAL; + + if (autoneg == AUTONEG_DISABLE && + ((speed != SPEED_1000 && + speed != SPEED_100 && + speed != SPEED_10) || + (duplex != DUPLEX_HALF && + duplex != DUPLEX_FULL))) + return -EINVAL; + + phydev->autoneg = autoneg; + + if (autoneg == AUTONEG_DISABLE) { + phydev->speed = speed; + phydev->duplex = duplex; + } + + linkmode_copy(phydev->advertising, advertising); + + linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, + phydev->advertising, autoneg == AUTONEG_ENABLE); + + phydev->master_slave_set = cmd->base.master_slave_cfg; + phydev->mdix_ctrl = cmd->base.eth_tp_mdix_ctrl; + + /* Restart the PHY */ + phy_start_aneg(phydev); + + return 0; +} +EXPORT_SYMBOL(phy_ethtool_ksettings_set); + /** * phy_speed_down - set speed to lowest speed supported by both link partners * @phydev: the phy_device struct -- cgit v1.2.3 From 707293a56f95f8e7e0cfae008010c7933fb68973 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 24 Oct 2021 21:48:04 +0200 Subject: phy: phy_start_aneg: Add an unlocked version Split phy_start_aneg into a wrapper which takes the PHY lock, and a helper doing the real work. This will be needed when phy_ethtook_ksettings_set takes the lock. Fixes: 2d55173e71b0 ("phy: add generic function to support ksetting support") Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index ee584fa8b76d..d845afab1af7 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -700,7 +700,7 @@ static int phy_check_link_status(struct phy_device *phydev) } /** - * phy_start_aneg - start auto-negotiation for this PHY device + * _phy_start_aneg - start auto-negotiation for this PHY device * @phydev: the phy_device struct * * Description: Sanitizes the settings (if we're not autonegotiating @@ -708,25 +708,43 @@ static int phy_check_link_status(struct phy_device *phydev) * If the PHYCONTROL Layer is operating, we change the state to * reflect the beginning of Auto-negotiation or forcing. */ -int phy_start_aneg(struct phy_device *phydev) +static int _phy_start_aneg(struct phy_device *phydev) { int err; + lockdep_assert_held(&phydev->lock); + if (!phydev->drv) return -EIO; - mutex_lock(&phydev->lock); - if (AUTONEG_DISABLE == phydev->autoneg) phy_sanitize_settings(phydev); err = phy_config_aneg(phydev); if (err < 0) - goto out_unlock; + return err; if (phy_is_started(phydev)) err = phy_check_link_status(phydev); -out_unlock: + + return err; +} + +/** + * phy_start_aneg - start auto-negotiation for this PHY device + * @phydev: the phy_device struct + * + * Description: Sanitizes the settings (if we're not autonegotiating + * them), and then calls the driver's config_aneg function. + * If the PHYCONTROL Layer is operating, we change the state to + * reflect the beginning of Auto-negotiation or forcing. + */ +int phy_start_aneg(struct phy_device *phydev) +{ + int err; + + mutex_lock(&phydev->lock); + err = _phy_start_aneg(phydev); mutex_unlock(&phydev->lock); return err; -- cgit v1.2.3 From af1a02aa23c37045e6adfcf074cf7dbac167a403 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 24 Oct 2021 21:48:05 +0200 Subject: phy: phy_ethtool_ksettings_set: Lock the PHY while changing settings There is a race condition where the PHY state machine can change members of the phydev structure at the same time userspace requests a change via ethtool. To prevent this, have phy_ethtool_ksettings_set take the PHY lock. Fixes: 2d55173e71b0 ("phy: add generic function to support ksetting support") Reported-by: Walter Stoll Suggested-by: Walter Stoll Tested-by: Walter Stoll Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index d845afab1af7..a3bfb156c83d 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -798,6 +798,7 @@ int phy_ethtool_ksettings_set(struct phy_device *phydev, duplex != DUPLEX_FULL))) return -EINVAL; + mutex_lock(&phydev->lock); phydev->autoneg = autoneg; if (autoneg == AUTONEG_DISABLE) { @@ -814,8 +815,9 @@ int phy_ethtool_ksettings_set(struct phy_device *phydev, phydev->mdix_ctrl = cmd->base.eth_tp_mdix_ctrl; /* Restart the PHY */ - phy_start_aneg(phydev); + _phy_start_aneg(phydev); + mutex_unlock(&phydev->lock); return 0; } EXPORT_SYMBOL(phy_ethtool_ksettings_set); -- cgit v1.2.3 From 0985dba842eaa391858972cfe2724c3c174a2827 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 23 Oct 2021 20:47:19 +0100 Subject: KVM: x86/xen: Fix kvm_xen_has_interrupt() sleeping in kvm_vcpu_block() In kvm_vcpu_block, the current task is set to TASK_INTERRUPTIBLE before making a final check whether the vCPU should be woken from HLT by any incoming interrupt. This is a problem for the get_user() in __kvm_xen_has_interrupt(), which really shouldn't be sleeping when the task state has already been set. I think it's actually harmless as it would just manifest itself as a spurious wakeup, but it's causing a debug warning: [ 230.963649] do not call blocking ops when !TASK_RUNNING; state=1 set at [<00000000b6bcdbc9>] prepare_to_swait_exclusive+0x30/0x80 Fix the warning by turning it into an *explicit* spurious wakeup. When invoked with !task_is_running(current) (and we might as well add in_atomic() there while we're at it), just return 1 to indicate that an IRQ is pending, which will cause a wakeup and then something will call it again in a context that *can* sleep so it can fault the page back in. Cc: stable@vger.kernel.org Fixes: 40da8ccd724f ("KVM: x86/xen: Add event channel interrupt vector upcall") Signed-off-by: David Woodhouse Message-Id: <168bf8c689561da904e48e2ff5ae4713eaef9e2d.camel@infradead.org> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/xen.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 9ea9c3dabe37..8f62baebd028 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -190,6 +190,7 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) int __kvm_xen_has_interrupt(struct kvm_vcpu *v) { + int err; u8 rc = 0; /* @@ -216,13 +217,29 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v) if (likely(slots->generation == ghc->generation && !kvm_is_error_hva(ghc->hva) && ghc->memslot)) { /* Fast path */ - __get_user(rc, (u8 __user *)ghc->hva + offset); - } else { - /* Slow path */ - kvm_read_guest_offset_cached(v->kvm, ghc, &rc, offset, - sizeof(rc)); + pagefault_disable(); + err = __get_user(rc, (u8 __user *)ghc->hva + offset); + pagefault_enable(); + if (!err) + return rc; } + /* Slow path */ + + /* + * This function gets called from kvm_vcpu_block() after setting the + * task to TASK_INTERRUPTIBLE, to see if it needs to wake immediately + * from a HLT. So we really mustn't sleep. If the page ended up absent + * at that point, just return 1 in order to trigger an immediate wake, + * and we'll end up getting called again from a context where we *can* + * fault in the page and wait for it. + */ + if (in_atomic() || !task_is_running(current)) + return 1; + + kvm_read_guest_offset_cached(v->kvm, ghc, &rc, offset, + sizeof(rc)); + return rc; } -- cgit v1.2.3 From 09b1d5dc6ce1c9151777f6c4e128a59457704c97 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 25 Oct 2021 13:31:12 +0200 Subject: cfg80211: fix management registrations locking The management registrations locking was broken, the list was locked for each wdev, but cfg80211_mgmt_registrations_update() iterated it without holding all the correct spinlocks, causing list corruption. Rather than trying to fix it with fine-grained locking, just move the lock to the wiphy/rdev (still need the list on each wdev), we already need to hold the wdev lock to change it, so there's no contention on the lock in any case. This trivially fixes the bug since we hold one wdev's lock already, and now will hold the lock that protects all lists. Cc: stable@vger.kernel.org Reported-by: Jouni Malinen Fixes: 6cd536fe62ef ("cfg80211: change internal management frame registration API") Link: https://lore.kernel.org/r/20211025133111.5cf733eab0f4.I7b0abb0494ab712f74e2efcd24bb31ac33f7eee9@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 -- net/wireless/core.c | 2 +- net/wireless/core.h | 2 ++ net/wireless/mlme.c | 26 ++++++++++++++------------ 4 files changed, 17 insertions(+), 15 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 62dd8422e0dc..27336fc70467 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5376,7 +5376,6 @@ static inline void wiphy_unlock(struct wiphy *wiphy) * netdev and may otherwise be used by driver read-only, will be update * by cfg80211 on change_interface * @mgmt_registrations: list of registrations for management frames - * @mgmt_registrations_lock: lock for the list * @mgmt_registrations_need_update: mgmt registrations were updated, * need to propagate the update to the driver * @mtx: mutex used to lock data in this struct, may be used by drivers @@ -5423,7 +5422,6 @@ struct wireless_dev { u32 identifier; struct list_head mgmt_registrations; - spinlock_t mgmt_registrations_lock; u8 mgmt_registrations_need_update:1; struct mutex mtx; diff --git a/net/wireless/core.c b/net/wireless/core.c index 03323121ca50..aaba847d79eb 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -524,6 +524,7 @@ use_default_name: INIT_WORK(&rdev->propagate_cac_done_wk, cfg80211_propagate_cac_done_wk); INIT_WORK(&rdev->mgmt_registrations_update_wk, cfg80211_mgmt_registrations_update_wk); + spin_lock_init(&rdev->mgmt_registrations_lock); #ifdef CONFIG_CFG80211_DEFAULT_PS rdev->wiphy.flags |= WIPHY_FLAG_PS_ON_BY_DEFAULT; @@ -1279,7 +1280,6 @@ void cfg80211_init_wdev(struct wireless_dev *wdev) INIT_LIST_HEAD(&wdev->event_list); spin_lock_init(&wdev->event_lock); INIT_LIST_HEAD(&wdev->mgmt_registrations); - spin_lock_init(&wdev->mgmt_registrations_lock); INIT_LIST_HEAD(&wdev->pmsr_list); spin_lock_init(&wdev->pmsr_lock); INIT_WORK(&wdev->pmsr_free_wk, cfg80211_pmsr_free_wk); diff --git a/net/wireless/core.h b/net/wireless/core.h index b35d0db12f1d..1720abf36f92 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -100,6 +100,8 @@ struct cfg80211_registered_device { struct work_struct propagate_cac_done_wk; struct work_struct mgmt_registrations_update_wk; + /* lock for all wdev lists */ + spinlock_t mgmt_registrations_lock; /* must be last because of the way we do wiphy_priv(), * and it should at least be aligned to NETDEV_ALIGN */ diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 3aa69b375a10..783acd2c4211 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -452,9 +452,9 @@ static void cfg80211_mgmt_registrations_update(struct wireless_dev *wdev) lockdep_assert_held(&rdev->wiphy.mtx); - spin_lock_bh(&wdev->mgmt_registrations_lock); + spin_lock_bh(&rdev->mgmt_registrations_lock); if (!wdev->mgmt_registrations_need_update) { - spin_unlock_bh(&wdev->mgmt_registrations_lock); + spin_unlock_bh(&rdev->mgmt_registrations_lock); return; } @@ -479,7 +479,7 @@ static void cfg80211_mgmt_registrations_update(struct wireless_dev *wdev) rcu_read_unlock(); wdev->mgmt_registrations_need_update = 0; - spin_unlock_bh(&wdev->mgmt_registrations_lock); + spin_unlock_bh(&rdev->mgmt_registrations_lock); rdev_update_mgmt_frame_registrations(rdev, wdev, &upd); } @@ -503,6 +503,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, int match_len, bool multicast_rx, struct netlink_ext_ack *extack) { + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_mgmt_registration *reg, *nreg; int err = 0; u16 mgmt_type; @@ -548,7 +549,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, if (!nreg) return -ENOMEM; - spin_lock_bh(&wdev->mgmt_registrations_lock); + spin_lock_bh(&rdev->mgmt_registrations_lock); list_for_each_entry(reg, &wdev->mgmt_registrations, list) { int mlen = min(match_len, reg->match_len); @@ -583,7 +584,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, list_add(&nreg->list, &wdev->mgmt_registrations); } wdev->mgmt_registrations_need_update = 1; - spin_unlock_bh(&wdev->mgmt_registrations_lock); + spin_unlock_bh(&rdev->mgmt_registrations_lock); cfg80211_mgmt_registrations_update(wdev); @@ -591,7 +592,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, out: kfree(nreg); - spin_unlock_bh(&wdev->mgmt_registrations_lock); + spin_unlock_bh(&rdev->mgmt_registrations_lock); return err; } @@ -602,7 +603,7 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid) struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct cfg80211_mgmt_registration *reg, *tmp; - spin_lock_bh(&wdev->mgmt_registrations_lock); + spin_lock_bh(&rdev->mgmt_registrations_lock); list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) { if (reg->nlportid != nlportid) @@ -615,7 +616,7 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid) schedule_work(&rdev->mgmt_registrations_update_wk); } - spin_unlock_bh(&wdev->mgmt_registrations_lock); + spin_unlock_bh(&rdev->mgmt_registrations_lock); if (nlportid && rdev->crit_proto_nlportid == nlportid) { rdev->crit_proto_nlportid = 0; @@ -628,15 +629,16 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid) void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev) { + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_mgmt_registration *reg, *tmp; - spin_lock_bh(&wdev->mgmt_registrations_lock); + spin_lock_bh(&rdev->mgmt_registrations_lock); list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) { list_del(®->list); kfree(reg); } wdev->mgmt_registrations_need_update = 1; - spin_unlock_bh(&wdev->mgmt_registrations_lock); + spin_unlock_bh(&rdev->mgmt_registrations_lock); cfg80211_mgmt_registrations_update(wdev); } @@ -784,7 +786,7 @@ bool cfg80211_rx_mgmt_khz(struct wireless_dev *wdev, int freq, int sig_dbm, data = buf + ieee80211_hdrlen(mgmt->frame_control); data_len = len - ieee80211_hdrlen(mgmt->frame_control); - spin_lock_bh(&wdev->mgmt_registrations_lock); + spin_lock_bh(&rdev->mgmt_registrations_lock); list_for_each_entry(reg, &wdev->mgmt_registrations, list) { if (reg->frame_type != ftype) @@ -808,7 +810,7 @@ bool cfg80211_rx_mgmt_khz(struct wireless_dev *wdev, int freq, int sig_dbm, break; } - spin_unlock_bh(&wdev->mgmt_registrations_lock); + spin_unlock_bh(&rdev->mgmt_registrations_lock); trace_cfg80211_return_bool(result); return result; -- cgit v1.2.3 From 689a0a9f505f7bffdefe6f17fddb41c8ab6344f6 Mon Sep 17 00:00:00 2001 From: Janusz Dziedzic Date: Sun, 24 Oct 2021 22:15:46 +0200 Subject: cfg80211: correct bridge/4addr mode check Without the patch we fail: $ sudo brctl addbr br0 $ sudo brctl addif br0 wlp1s0 $ sudo iw wlp1s0 set 4addr on command failed: Device or resource busy (-16) Last command failed but iface was already in 4addr mode. Fixes: ad4bb6f8883a ("cfg80211: disallow bridging managed/adhoc interfaces") Signed-off-by: Janusz Dziedzic Link: https://lore.kernel.org/r/20211024201546.614379-1-janusz.dziedzic@gmail.com [add fixes tag, fix indentation, edit commit log] Signed-off-by: Johannes Berg --- net/wireless/util.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/wireless/util.c b/net/wireless/util.c index 18dba3d7c638..a1a99a574984 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1028,14 +1028,14 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, !(rdev->wiphy.interface_modes & (1 << ntype))) return -EOPNOTSUPP; - /* if it's part of a bridge, reject changing type to station/ibss */ - if (netif_is_bridge_port(dev) && - (ntype == NL80211_IFTYPE_ADHOC || - ntype == NL80211_IFTYPE_STATION || - ntype == NL80211_IFTYPE_P2P_CLIENT)) - return -EBUSY; - if (ntype != otype) { + /* if it's part of a bridge, reject changing type to station/ibss */ + if (netif_is_bridge_port(dev) && + (ntype == NL80211_IFTYPE_ADHOC || + ntype == NL80211_IFTYPE_STATION || + ntype == NL80211_IFTYPE_P2P_CLIENT)) + return -EBUSY; + dev->ieee80211_ptr->use_4addr = false; dev->ieee80211_ptr->mesh_id_up_len = 0; wdev_lock(dev->ieee80211_ptr); -- cgit v1.2.3 From ace19b992436a257d9a793672e57abc28fe83e2e Mon Sep 17 00:00:00 2001 From: Trevor Woerner Date: Sun, 24 Oct 2021 13:50:02 -0400 Subject: net: nxp: lpc_eth.c: avoid hang when bringing interface down A hard hang is observed whenever the ethernet interface is brought down. If the PHY is stopped before the LPC core block is reset, the SoC will hang. Comparing lpc_eth_close() and lpc_eth_open() I re-arranged the ordering of the functions calls in lpc_eth_close() to reset the hardware before stopping the PHY. Fixes: b7370112f519 ("lpc32xx: Added ethernet driver") Signed-off-by: Trevor Woerner Acked-by: Vladimir Zapolskiy Signed-off-by: David S. Miller --- drivers/net/ethernet/nxp/lpc_eth.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index d29fe562b3de..c910fa2f40a4 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -1015,9 +1015,6 @@ static int lpc_eth_close(struct net_device *ndev) napi_disable(&pldat->napi); netif_stop_queue(ndev); - if (ndev->phydev) - phy_stop(ndev->phydev); - spin_lock_irqsave(&pldat->lock, flags); __lpc_eth_reset(pldat); netif_carrier_off(ndev); @@ -1025,6 +1022,8 @@ static int lpc_eth_close(struct net_device *ndev) writel(0, LPC_ENET_MAC2(pldat->net_base)); spin_unlock_irqrestore(&pldat->lock, flags); + if (ndev->phydev) + phy_stop(ndev->phydev); clk_disable_unprepare(pldat->clk); return 0; -- cgit v1.2.3 From 64733956ebba7cc629856f4a6ee35a52bc9c023f Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Sun, 24 Oct 2021 09:08:20 +0300 Subject: RDMA/sa_query: Use strscpy_pad instead of memcpy to copy a string When copying the device name, the length of the data memcpy copied exceeds the length of the source buffer, which cause the KASAN issue below. Use strscpy_pad() instead. BUG: KASAN: slab-out-of-bounds in ib_nl_set_path_rec_attrs+0x136/0x320 [ib_core] Read of size 64 at addr ffff88811a10f5e0 by task rping/140263 CPU: 3 PID: 140263 Comm: rping Not tainted 5.15.0-rc1+ #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x57/0x7d print_address_description.constprop.0+0x1d/0xa0 kasan_report+0xcb/0x110 kasan_check_range+0x13d/0x180 memcpy+0x20/0x60 ib_nl_set_path_rec_attrs+0x136/0x320 [ib_core] ib_nl_make_request+0x1c6/0x380 [ib_core] send_mad+0x20a/0x220 [ib_core] ib_sa_path_rec_get+0x3e3/0x800 [ib_core] cma_query_ib_route+0x29b/0x390 [rdma_cm] rdma_resolve_route+0x308/0x3e0 [rdma_cm] ucma_resolve_route+0xe1/0x150 [rdma_ucm] ucma_write+0x17b/0x1f0 [rdma_ucm] vfs_write+0x142/0x4d0 ksys_write+0x133/0x160 do_syscall_64+0x43/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f26499aa90f Code: 89 54 24 18 48 89 74 24 10 89 7c 24 08 e8 29 fd ff ff 48 8b 54 24 18 48 8b 74 24 10 41 89 c0 8b 7c 24 08 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 31 44 89 c7 48 89 44 24 08 e8 5c fd ff ff 48 RSP: 002b:00007f26495f2dc0 EFLAGS: 00000293 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 00000000000007d0 RCX: 00007f26499aa90f RDX: 0000000000000010 RSI: 00007f26495f2e00 RDI: 0000000000000003 RBP: 00005632a8315440 R08: 0000000000000000 R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000293 R12: 00007f26495f2e00 R13: 00005632a83154e0 R14: 00005632a8315440 R15: 00005632a830a810 Allocated by task 131419: kasan_save_stack+0x1b/0x40 __kasan_kmalloc+0x7c/0x90 proc_self_get_link+0x8b/0x100 pick_link+0x4f1/0x5c0 step_into+0x2eb/0x3d0 walk_component+0xc8/0x2c0 link_path_walk+0x3b8/0x580 path_openat+0x101/0x230 do_filp_open+0x12e/0x240 do_sys_openat2+0x115/0x280 __x64_sys_openat+0xce/0x140 do_syscall_64+0x43/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes: 2ca546b92a02 ("IB/sa: Route SA pathrecord query through netlink") Link: https://lore.kernel.org/r/72ede0f6dab61f7f23df9ac7a70666e07ef314b0.1635055496.git.leonro@nvidia.com Signed-off-by: Mark Zhang Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/sa_query.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index a20b8108e160..c00f8e28aab7 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -706,8 +706,9 @@ static void ib_nl_set_path_rec_attrs(struct sk_buff *skb, /* Construct the family header first */ header = skb_put(skb, NLMSG_ALIGN(sizeof(*header))); - memcpy(header->device_name, dev_name(&query->port->agent->device->dev), - LS_DEVICE_NAME_MAX); + strscpy_pad(header->device_name, + dev_name(&query->port->agent->device->dev), + LS_DEVICE_NAME_MAX); header->port_num = query->port->port_num; if ((comp_mask & IB_SA_PATH_REC_REVERSIBLE) && -- cgit v1.2.3 From 0c57eeecc559ca6bc18b8c4e2808bc78dbe769b0 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 25 Oct 2021 05:05:28 -0400 Subject: net: Prevent infinite while loop in skb_tx_hash() Drivers call netdev_set_num_tc() and then netdev_set_tc_queue() to set the queue count and offset for each TC. So the queue count and offset for the TCs may be zero for a short period after dev->num_tc has been set. If a TX packet is being transmitted at this time in the code path netdev_pick_tx() -> skb_tx_hash(), skb_tx_hash() may see nonzero dev->num_tc but zero qcount for the TC. The while loop that keeps looping while hash >= qcount will not end. Fix it by checking the TC's qcount to be nonzero before using it. Fixes: eadec877ce9c ("net: Add support for subordinate traffic classes to netdev_pick_tx") Reviewed-by: Andy Gospodarek Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- net/core/dev.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index 7ee9fecd3aff..ea2366497697 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3163,6 +3163,12 @@ static u16 skb_tx_hash(const struct net_device *dev, qoffset = sb_dev->tc_to_txq[tc].offset; qcount = sb_dev->tc_to_txq[tc].count; + if (unlikely(!qcount)) { + net_warn_ratelimited("%s: invalid qcount, qoffset %u for tc %u\n", + sb_dev->name, qoffset, tc); + qoffset = 0; + qcount = dev->real_num_tx_queues; + } } if (skb_rx_queue_recorded(skb)) { -- cgit v1.2.3 From 042b2046d0f05cf8124c26ff65dbb6148a4404fb Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Fri, 22 Oct 2021 16:31:39 -0700 Subject: xen/netfront: stop tx queues during live migration The tx queues are not stopped during the live migration. As a result, the ndo_start_xmit() may access netfront_info->queues which is freed by talk_to_netback()->xennet_destroy_queues(). This patch is to netif_device_detach() at the beginning of xen-netfront resuming, and netif_device_attach() at the end of resuming. CPU A CPU B talk_to_netback() -> if (info->queues) xennet_destroy_queues(info); to free netfront_info->queues xennet_start_xmit() to access netfront_info->queues -> err = xennet_create_queues(info, &num_queues); The idea is borrowed from virtio-net. Cc: Joe Jin Signed-off-by: Dongli Zhang Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index e31b98403f31..fc41ba95f81d 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1730,6 +1730,10 @@ static int netfront_resume(struct xenbus_device *dev) dev_dbg(&dev->dev, "%s\n", dev->nodename); + netif_tx_lock_bh(info->netdev); + netif_device_detach(info->netdev); + netif_tx_unlock_bh(info->netdev); + xennet_disconnect_backend(info); return 0; } @@ -2349,6 +2353,10 @@ static int xennet_connect(struct net_device *dev) * domain a kick because we've probably just requeued some * packets. */ + netif_tx_lock_bh(np->netdev); + netif_device_attach(np->netdev); + netif_tx_unlock_bh(np->netdev); + netif_carrier_on(np->netdev); for (j = 0; j < num_queues; ++j) { queue = &np->queues[j]; -- cgit v1.2.3 From f7a1e76d0f608961cc2fc681f867a834f2746bce Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 25 Oct 2021 02:31:48 -0400 Subject: net-sysfs: initialize uid and gid before calling net_ns_get_ownership Currently in net_ns_get_ownership() it may not be able to set uid or gid if make_kuid or make_kgid returns an invalid value, and an uninit-value issue can be triggered by this. This patch is to fix it by initializing the uid and gid before calling net_ns_get_ownership(), as it does in kobject_get_ownership() Fixes: e6dee9f3893c ("net-sysfs: add netdev_change_owner()") Reported-by: Paolo Abeni Signed-off-by: Xin Long Acked-by: Christian Brauner Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index f6197774048b..b2e49eb7001d 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1973,9 +1973,9 @@ int netdev_register_kobject(struct net_device *ndev) int netdev_change_owner(struct net_device *ndev, const struct net *net_old, const struct net *net_new) { + kuid_t old_uid = GLOBAL_ROOT_UID, new_uid = GLOBAL_ROOT_UID; + kgid_t old_gid = GLOBAL_ROOT_GID, new_gid = GLOBAL_ROOT_GID; struct device *dev = &ndev->dev; - kuid_t old_uid, new_uid; - kgid_t old_gid, new_gid; int error; net_ns_get_ownership(net_old, &old_uid, &old_gid); -- cgit v1.2.3 From e091b836a3baee4c8b1423a969589196b88a9e06 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Thu, 14 Oct 2021 16:54:10 +0300 Subject: Revert "arm64: dts: qcom: sm8250: remove bus clock from the mdss node for sm8250 target" This reverts commit 001ce9785c0674d913531345e86222c965fc8bf4. This upstream commit broke AOSP (post Android 12 merge) build on RB5. The device either silently crashes into USB crash mode after android boot animation or we see a blank blue screen with following dpu errors in dmesg: [ T444] hw recovery is not complete for ctl:3 [ T444] [drm:dpu_encoder_phys_vid_prepare_for_kickoff:539] [dpu error]enc31 intf1 ctl 3 reset failure: -22 [ T444] [drm:dpu_encoder_phys_vid_wait_for_commit_done:513] [dpu error]vblank timeout [ T444] [drm:dpu_kms_wait_for_commit_done:454] [dpu error]wait for commit done returned -110 [ C7] [drm:dpu_encoder_frame_done_timeout:2127] [dpu error]enc31 frame done timeout [ T444] [drm:dpu_encoder_phys_vid_wait_for_commit_done:513] [dpu error]vblank timeout [ T444] [drm:dpu_kms_wait_for_commit_done:454] [dpu error]wait for commit done returned -110 Fixes: 001ce9785c06 ("arm64: dts: qcom: sm8250: remove bus clock from the mdss node for sm8250 target") Signed-off-by: Amit Pundir Signed-off-by: Dmitry Baryshkov Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20211014135410.4136412-1-dmitry.baryshkov@linaro.org --- arch/arm64/boot/dts/qcom/sm8250.dtsi | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sm8250.dtsi b/arch/arm64/boot/dts/qcom/sm8250.dtsi index 8c15d9fed08f..d12e4cbfc852 100644 --- a/arch/arm64/boot/dts/qcom/sm8250.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8250.dtsi @@ -2590,9 +2590,10 @@ power-domains = <&dispcc MDSS_GDSC>; clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>, + <&gcc GCC_DISP_HF_AXI_CLK>, <&gcc GCC_DISP_SF_AXI_CLK>, <&dispcc DISP_CC_MDSS_MDP_CLK>; - clock-names = "iface", "nrt_bus", "core"; + clock-names = "iface", "bus", "nrt_bus", "core"; assigned-clocks = <&dispcc DISP_CC_MDSS_MDP_CLK>; assigned-clock-rates = <460000000>; -- cgit v1.2.3 From 6a8b357278f5f8b9817147277ab8f12879dce8a8 Mon Sep 17 00:00:00 2001 From: Dave Ertman Date: Thu, 7 Oct 2021 08:40:31 -0700 Subject: ice: Respond to a NETDEV_UNREGISTER event for LAG When the PF is a member of a link aggregate, and the driver is removed, the process will hang unless we respond to the NETDEV_UNREGISTER event that is sent to the event_handler for LAG. Add a case statement for the ice_lag_event_handler to unlink the PF from the link aggregate. Also remove code that was incorrectly applying a dev_hold to peer_netdevs that were associated with the ice driver. Fixes: df006dd4b1dc ("ice: Add initial support framework for LAG") Signed-off-by: Dave Ertman Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lag.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index 37c18c66b5c7..e375ac849aec 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -100,9 +100,9 @@ static void ice_display_lag_info(struct ice_lag *lag) */ static void ice_lag_info_event(struct ice_lag *lag, void *ptr) { - struct net_device *event_netdev, *netdev_tmp; struct netdev_notifier_bonding_info *info; struct netdev_bonding_info *bonding_info; + struct net_device *event_netdev; const char *lag_netdev_name; event_netdev = netdev_notifier_info_to_dev(ptr); @@ -123,19 +123,6 @@ static void ice_lag_info_event(struct ice_lag *lag, void *ptr) goto lag_out; } - rcu_read_lock(); - for_each_netdev_in_bond_rcu(lag->upper_netdev, netdev_tmp) { - if (!netif_is_ice(netdev_tmp)) - continue; - - if (netdev_tmp && netdev_tmp != lag->netdev && - lag->peer_netdev != netdev_tmp) { - dev_hold(netdev_tmp); - lag->peer_netdev = netdev_tmp; - } - } - rcu_read_unlock(); - if (bonding_info->slave.state) ice_lag_set_backup(lag); else @@ -319,6 +306,9 @@ ice_lag_event_handler(struct notifier_block *notif_blk, unsigned long event, case NETDEV_BONDING_INFO: ice_lag_info_event(lag, ptr); break; + case NETDEV_UNREGISTER: + ice_lag_unlink(lag, ptr); + break; default: break; } -- cgit v1.2.3 From fd1b5beb177a8372cea2a0d014835491e4886f77 Mon Sep 17 00:00:00 2001 From: Yongxin Liu Date: Mon, 11 Oct 2021 15:02:16 +0800 Subject: ice: check whether PTP is initialized in ice_ptp_release() PTP is currently only supported on E810 devices, it is checked in ice_ptp_init(). However, there is no check in ice_ptp_release(). For other E800 series devices, ice_ptp_release() will be wrongly executed. Fix the following calltrace. INFO: trying to register non-static key. The code is fine but needs lockdep annotation, or maybe you didn't initialize this object before use? turning off the locking correctness validator. Workqueue: ice ice_service_task [ice] Call Trace: dump_stack_lvl+0x5b/0x82 dump_stack+0x10/0x12 register_lock_class+0x495/0x4a0 ? find_held_lock+0x3c/0xb0 __lock_acquire+0x71/0x1830 lock_acquire+0x1e6/0x330 ? ice_ptp_release+0x3c/0x1e0 [ice] ? _raw_spin_lock+0x19/0x70 ? ice_ptp_release+0x3c/0x1e0 [ice] _raw_spin_lock+0x38/0x70 ? ice_ptp_release+0x3c/0x1e0 [ice] ice_ptp_release+0x3c/0x1e0 [ice] ice_prepare_for_reset+0xcb/0xe0 [ice] ice_do_reset+0x38/0x110 [ice] ice_service_task+0x138/0xf10 [ice] ? __this_cpu_preempt_check+0x13/0x20 process_one_work+0x26a/0x650 worker_thread+0x3f/0x3b0 ? __kthread_parkme+0x51/0xb0 ? process_one_work+0x650/0x650 kthread+0x161/0x190 ? set_kthread_struct+0x40/0x40 ret_from_fork+0x1f/0x30 Fixes: 4dd0d5c33c3e ("ice: add lock around Tx timestamp tracker flush") Signed-off-by: Yongxin Liu Reviewed-by: Jacob Keller Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ptp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 80380aed8882..d1ef3d48a4b0 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -1571,6 +1571,9 @@ err_kworker: */ void ice_ptp_release(struct ice_pf *pf) { + if (!test_bit(ICE_FLAG_PTP, pf->flags)) + return; + /* Disable timestamping for both Tx and Rx */ ice_ptp_cfg_timestamp(pf, false); -- cgit v1.2.3 From 3dd60fb9d95db9c78fec86ba4df20852a7b974ba Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Oct 2021 09:36:40 +0200 Subject: nvdimm/pmem: stop using q_usage_count as external pgmap refcount Originally all DAX access when through block_device operations and thus needed a queue reference. But since commit cccbce671582 ("filesystem-dax: convert to dax_direct_access()") all this happens at the DAX device level which uses its own refcounting. Having the external refcount thus wasn't needed but has otherwise been harmless for long time. But now that "block: drain file system I/O on del_gendisk" waits for q_usage_count to reach 0 in del_gendisk this whole scheme can't work anymore (and pmem is the only driver abusing q_usage_count like that). So switch to the internal reference and remove the unbalanced blk_freeze_queue_start that is taken care of by del_gendisk. Fixes: 8e141f9eb803 ("block: drain file system I/O on del_gendisk") Reported-by: Yi Zhang Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20211019073641.2323410-2-hch@lst.de Signed-off-by: Dan Williams --- drivers/nvdimm/pmem.c | 33 ++------------------------------- 1 file changed, 2 insertions(+), 31 deletions(-) diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index ef4950f80832..054154c22899 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -333,26 +333,6 @@ static const struct attribute_group *pmem_attribute_groups[] = { NULL, }; -static void pmem_pagemap_cleanup(struct dev_pagemap *pgmap) -{ - struct pmem_device *pmem = pgmap->owner; - - blk_cleanup_disk(pmem->disk); -} - -static void pmem_release_queue(void *pgmap) -{ - pmem_pagemap_cleanup(pgmap); -} - -static void pmem_pagemap_kill(struct dev_pagemap *pgmap) -{ - struct request_queue *q = - container_of(pgmap->ref, struct request_queue, q_usage_counter); - - blk_freeze_queue_start(q); -} - static void pmem_release_disk(void *__pmem) { struct pmem_device *pmem = __pmem; @@ -360,12 +340,9 @@ static void pmem_release_disk(void *__pmem) kill_dax(pmem->dax_dev); put_dax(pmem->dax_dev); del_gendisk(pmem->disk); -} -static const struct dev_pagemap_ops fsdax_pagemap_ops = { - .kill = pmem_pagemap_kill, - .cleanup = pmem_pagemap_cleanup, -}; + blk_cleanup_disk(pmem->disk); +} static int pmem_attach_disk(struct device *dev, struct nd_namespace_common *ndns) @@ -427,10 +404,8 @@ static int pmem_attach_disk(struct device *dev, pmem->disk = disk; pmem->pgmap.owner = pmem; pmem->pfn_flags = PFN_DEV; - pmem->pgmap.ref = &q->q_usage_counter; if (is_nd_pfn(dev)) { pmem->pgmap.type = MEMORY_DEVICE_FS_DAX; - pmem->pgmap.ops = &fsdax_pagemap_ops; addr = devm_memremap_pages(dev, &pmem->pgmap); pfn_sb = nd_pfn->pfn_sb; pmem->data_offset = le64_to_cpu(pfn_sb->dataoff); @@ -444,16 +419,12 @@ static int pmem_attach_disk(struct device *dev, pmem->pgmap.range.end = res->end; pmem->pgmap.nr_range = 1; pmem->pgmap.type = MEMORY_DEVICE_FS_DAX; - pmem->pgmap.ops = &fsdax_pagemap_ops; addr = devm_memremap_pages(dev, &pmem->pgmap); pmem->pfn_flags |= PFN_MAP; bb_range = pmem->pgmap.range; } else { addr = devm_memremap(dev, pmem->phys_addr, pmem->size, ARCH_MEMREMAP_PMEM); - if (devm_add_action_or_reset(dev, pmem_release_queue, - &pmem->pgmap)) - return -ENOMEM; bb_range.start = res->start; bb_range.end = res->end; } -- cgit v1.2.3 From 759635760a804b0d8ad0cc677b650f1544cae22f Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 24 Oct 2021 09:40:14 +0300 Subject: mlxsw: pci: Recycle received packet upon allocation failure When the driver fails to allocate a new Rx buffer, it passes an empty Rx descriptor (contains zero address and size) to the device and marks it as invalid by setting the skb pointer in the descriptor's metadata to NULL. After processing enough Rx descriptors, the driver will try to process the invalid descriptor, but will return immediately seeing that the skb pointer is NULL. Since the driver no longer passes new Rx descriptors to the device, the Rx queue will eventually become full and the device will start to drop packets. Fix this by recycling the received packet if allocation of the new packet failed. This means that allocation is no longer performed at the end of the Rx routine, but at the start, before tearing down the DMA mapping of the received packet. Remove the comment about the descriptor being zeroed as it is no longer correct. This is OK because we either use the descriptor as-is (when recycling) or overwrite its address and size fields with that of the newly allocated Rx buffer. The issue was discovered when a process ("perf") consumed too much memory and put the system under memory pressure. It can be reproduced by injecting slab allocation failures [1]. After the fix, the Rx queue no longer comes to a halt. [1] # echo 10 > /sys/kernel/debug/failslab/times # echo 1000 > /sys/kernel/debug/failslab/interval # echo 100 > /sys/kernel/debug/failslab/probability FAULT_INJECTION: forcing a failure. name failslab, interval 1000, probability 100, space 0, times 8 [...] Call Trace: dump_stack_lvl+0x34/0x44 should_fail.cold+0x32/0x37 should_failslab+0x5/0x10 kmem_cache_alloc_node+0x23/0x190 __alloc_skb+0x1f9/0x280 __netdev_alloc_skb+0x3a/0x150 mlxsw_pci_rdq_skb_alloc+0x24/0x90 mlxsw_pci_cq_tasklet+0x3dc/0x1200 tasklet_action_common.constprop.0+0x9f/0x100 __do_softirq+0xb5/0x252 irq_exit_rcu+0x7a/0xa0 common_interrupt+0x83/0xa0 asm_common_interrupt+0x1e/0x40 RIP: 0010:cpuidle_enter_state+0xc8/0x340 [...] mlxsw_spectrum2 0000:06:00.0: Failed to alloc skb for RDQ Fixes: eda6500a987a ("mlxsw: Add PCI bus implementation") Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Link: https://lore.kernel.org/r/20211024064014.1060919-1-idosch@idosch.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 13b0259f7ea6..fcace73eae40 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -353,13 +353,10 @@ static int mlxsw_pci_rdq_skb_alloc(struct mlxsw_pci *mlxsw_pci, struct sk_buff *skb; int err; - elem_info->u.rdq.skb = NULL; skb = netdev_alloc_skb_ip_align(NULL, buf_len); if (!skb) return -ENOMEM; - /* Assume that wqe was previously zeroed. */ - err = mlxsw_pci_wqe_frag_map(mlxsw_pci, wqe, 0, skb->data, buf_len, DMA_FROM_DEVICE); if (err) @@ -597,21 +594,26 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, struct pci_dev *pdev = mlxsw_pci->pdev; struct mlxsw_pci_queue_elem_info *elem_info; struct mlxsw_rx_info rx_info = {}; - char *wqe; + char wqe[MLXSW_PCI_WQE_SIZE]; struct sk_buff *skb; u16 byte_count; int err; elem_info = mlxsw_pci_queue_elem_info_consumer_get(q); - skb = elem_info->u.sdq.skb; - if (!skb) - return; - wqe = elem_info->elem; - mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, 0, DMA_FROM_DEVICE); + skb = elem_info->u.rdq.skb; + memcpy(wqe, elem_info->elem, MLXSW_PCI_WQE_SIZE); if (q->consumer_counter++ != consumer_counter_limit) dev_dbg_ratelimited(&pdev->dev, "Consumer counter does not match limit in RDQ\n"); + err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info); + if (err) { + dev_err_ratelimited(&pdev->dev, "Failed to alloc skb for RDQ\n"); + goto out; + } + + mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, 0, DMA_FROM_DEVICE); + if (mlxsw_pci_cqe_lag_get(cqe_v, cqe)) { rx_info.is_lag = true; rx_info.u.lag_id = mlxsw_pci_cqe_lag_id_get(cqe_v, cqe); @@ -647,10 +649,7 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, skb_put(skb, byte_count); mlxsw_core_skb_receive(mlxsw_pci->core, skb, &rx_info); - memset(wqe, 0, q->elem_size); - err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info); - if (err) - dev_dbg_ratelimited(&pdev->dev, "Failed to alloc skb for RDQ\n"); +out: /* Everything is set up, ring doorbell to pass elem to HW */ q->producer_counter++; mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q); -- cgit v1.2.3 From fcf918ffd3b35e288097036c04af7446b2c6f2f1 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 14 Oct 2021 12:09:39 +0300 Subject: drm/i915: Convert unconditional clflush to drm_clflush_virt_range() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This one is apparently a "clflush for good measure", so bit more justification (if you can call it that) than some of the others. Convert to drm_clflush_virt_range() again so that machines without clflush will survive the ordeal. Cc: stable@vger.kernel.org Cc: Maarten Lankhorst Cc: Thomas Hellström #v1 Fixes: 12ca695d2c1e ("drm/i915: Do not share hwsp across contexts any more, v8.") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20211014090941.12159-3-ville.syrjala@linux.intel.com Reviewed-by: Dave Airlie (cherry picked from commit af7b6d234eefa30c461cc16912bafb32b9e6141c) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_timeline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 1257f4f11e66..23d7328892ed 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -225,7 +225,7 @@ void intel_timeline_reset_seqno(const struct intel_timeline *tl) memset(hwsp_seqno + 1, 0, TIMELINE_SEQNO_BYTES - sizeof(*hwsp_seqno)); WRITE_ONCE(*hwsp_seqno, tl->seqno); - clflush(hwsp_seqno); + drm_clflush_virt_range(hwsp_seqno, TIMELINE_SEQNO_BYTES); } void intel_timeline_enter(struct intel_timeline *tl) -- cgit v1.2.3 From 9761ffb8f1090289b908590039e2c363cc35cf45 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 14 Oct 2021 12:09:40 +0300 Subject: drm/i915: Catch yet another unconditioal clflush MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the unconditional clflush() with drm_clflush_virt_range() which does the wbinvd() fallback when clflush is not available. This time no justification is given for the clflush in the offending commit. Cc: stable@vger.kernel.org Cc: Maarten Lankhorst Cc: Thomas Hellström Fixes: 2c8ab3339e39 ("drm/i915: Pin timeline map after first timeline pin, v4.") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20211014090941.12159-4-ville.syrjala@linux.intel.com Reviewed-by: Dave Airlie (cherry picked from commit 9ced12182d0d8401d821e9602e56e276459900fc) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_timeline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 23d7328892ed..438bbc7b8147 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -64,7 +64,7 @@ intel_timeline_pin_map(struct intel_timeline *timeline) timeline->hwsp_map = vaddr; timeline->hwsp_seqno = memset(vaddr + ofs, 0, TIMELINE_SEQNO_BYTES); - clflush(vaddr + ofs); + drm_clflush_virt_range(vaddr + ofs, TIMELINE_SEQNO_BYTES); return 0; } -- cgit v1.2.3 From 6e6f96630805874fa80b0067e1a57aafc06225f6 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 18 Oct 2021 12:41:49 +0300 Subject: drm/i915/dp: Skip the HW readout of DPCD on disabled encoders MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reading out the DP encoders' DPCD during booting or resume is only required for enabled encoders: such encoders may be modesetted during the initial commit and the link training this involves depends on an initialized DPCD. For DDI encoders reading out the DPCD is skipped, do the same on pre-DDI platforms. Atm, the first DPCD readout without a sink connected - which is a likely scneario if the encoder is disabled - leaves intel_dp->num_common_rates at 0, which resulted in intel_dp_sync_state()->intel_dp_max_common_rate() in a intel_dp->common_rates[-1] access. This by definition results in an undefined behaviour, though to my best knowledge in all HW/compiler configurations it actually results in accessing the array item type value preceding the array. In this case the preceding value happens to be intel_dp->num_common_rates, which is 0, so this issue - by luck - didn't cause a user visible problem. Nevertheless it's still an undefined behaviour and in CONFIG_UBSAN builds leads to a kernel BUG() (which revealed this problem for us), hence CC:stable. A related problem in case the encoder is enabled but the sink is not connected or the DPCD readout fails is fixed by the next patch. v2: Amend the commit message describing the root cause of the CONFIG_UBSAN BUG(). Fixes: a532cde31de3 ("drm/i915/tc: Fix TypeC port init/resume time sanitization") References: https://gitlab.freedesktop.org/drm/intel/-/issues/4297 Reported-and-tested-by: Mat Jonczyk Cc: Mat Jonczyk Cc: José Roberto de Souza Cc: Jani Nikula Cc: Ville Syrjälä Cc: Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Acked-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20211018094154.1407705-2-imre.deak@intel.com (cherry picked from commit 4ec5ffc341cecbea060739aea1d53398ac2ec3f8) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index abe3d61b6243..5cf152be4487 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -1916,6 +1916,9 @@ void intel_dp_sync_state(struct intel_encoder *encoder, { struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + if (!crtc_state) + return; + /* * Don't clobber DPCD if it's been already read out during output * setup (eDP) or detect. -- cgit v1.2.3 From 926245c7d22271307606c88b1fbb2539a8550e94 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Fri, 15 Oct 2021 10:26:34 +0200 Subject: nvmet-tcp: fix a memory leak when releasing a queue page_frag_free() won't completely release the memory allocated for the commands, the cache page must be explicitly freed by calling __page_frag_cache_drain(). This bug can be easily reproduced by repeatedly executing the following command on the initiator: $echo 1 > /sys/devices/virtual/nvme-fabrics/ctl/nvme0/reset_controller Signed-off-by: Maurizio Lombardi Reviewed-by: Sagi Grimberg Reviewed-by: John Meneghini Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 07ee347ea3f3..c33a0464346f 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1428,6 +1428,7 @@ static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue) static void nvmet_tcp_release_queue_work(struct work_struct *w) { + struct page *page; struct nvmet_tcp_queue *queue = container_of(w, struct nvmet_tcp_queue, release_work); @@ -1447,6 +1448,8 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w) nvmet_tcp_free_crypto(queue); ida_simple_remove(&nvmet_tcp_queue_ida, queue->idx); + page = virt_to_head_page(queue->pf_cache.va); + __page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias); kfree(queue); } -- cgit v1.2.3 From 25e1f67eda4a19c91dc05c84d6d413c53efb447b Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 24 Oct 2021 10:43:31 +0300 Subject: nvme-tcp: fix H2CData PDU send accounting (again) We should not access request members after the last send, even to determine if indeed it was the last data payload send. The reason is that a completion could have arrived and trigger a new execution of the request which overridden these members. This was fixed by commit 825619b09ad3 ("nvme-tcp: fix possible use-after-completion"). Commit e371af033c56 broke that assumption again to address cases where multiple r2t pdus are sent per request. To fix it, we need to record the request data_sent and data_len and after the payload network send we reference these counters to determine weather we should advance the request iterator. Fixes: e371af033c56 ("nvme-tcp: fix incorrect h2cdata pdu offset accounting") Reported-by: Keith Busch Cc: stable@vger.kernel.org # 5.10+ Signed-off-by: Sagi Grimberg Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 3c1c29dd3020..0626d14e6d4c 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -926,12 +926,14 @@ static void nvme_tcp_fail_request(struct nvme_tcp_request *req) static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) { struct nvme_tcp_queue *queue = req->queue; + int req_data_len = req->data_len; while (true) { struct page *page = nvme_tcp_req_cur_page(req); size_t offset = nvme_tcp_req_cur_offset(req); size_t len = nvme_tcp_req_cur_length(req); bool last = nvme_tcp_pdu_last_send(req, len); + int req_data_sent = req->data_sent; int ret, flags = MSG_DONTWAIT; if (last && !queue->data_digest && !nvme_tcp_queue_more(queue)) @@ -958,7 +960,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) * in the request where we don't want to modify it as we may * compete with the RX path completing the request. */ - if (req->data_sent + ret < req->data_len) + if (req_data_sent + ret < req_data_len) nvme_tcp_advance_req(req, ret); /* fully successful last send in current PDU */ -- cgit v1.2.3 From d81d0e41ed5fe7229a2c9a29d13bad288c7cf2d2 Mon Sep 17 00:00:00 2001 From: Thomas Perrot Date: Fri, 22 Oct 2021 16:21:04 +0200 Subject: spi: spl022: fix Microwire full duplex mode There are missing braces in the function that verify controller parameters, then an error is always returned when the parameter to select Microwire frames operation is used on devices allowing it. Signed-off-by: Thomas Perrot Link: https://lore.kernel.org/r/20211022142104.1386379-1-thomas.perrot@bootlin.com Signed-off-by: Mark Brown --- drivers/spi/spi-pl022.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-pl022.c b/drivers/spi/spi-pl022.c index feebda66f56e..e4484ace584e 100644 --- a/drivers/spi/spi-pl022.c +++ b/drivers/spi/spi-pl022.c @@ -1716,12 +1716,13 @@ static int verify_controller_parameters(struct pl022 *pl022, return -EINVAL; } } else { - if (chip_info->duplex != SSP_MICROWIRE_CHANNEL_FULL_DUPLEX) + if (chip_info->duplex != SSP_MICROWIRE_CHANNEL_FULL_DUPLEX) { dev_err(&pl022->adev->dev, "Microwire half duplex mode requested," " but this is only available in the" " ST version of PL022\n"); - return -EINVAL; + return -EINVAL; + } } } return 0; -- cgit v1.2.3 From 9122a70a6333705c0c35614ddc51c274ed1d3637 Mon Sep 17 00:00:00 2001 From: Cyril Strejc Date: Sun, 24 Oct 2021 22:14:25 +0200 Subject: net: multicast: calculate csum of looped-back and forwarded packets During a testing of an user-space application which transmits UDP multicast datagrams and utilizes multicast routing to send the UDP datagrams out of defined network interfaces, I've found a multicast router does not fill-in UDP checksum into locally produced, looped-back and forwarded UDP datagrams, if an original output NIC the datagrams are sent to has UDP TX checksum offload enabled. The datagrams are sent malformed out of the NIC the datagrams have been forwarded to. It is because: 1. If TX checksum offload is enabled on the output NIC, UDP checksum is not calculated by kernel and is not filled into skb data. 2. dev_loopback_xmit(), which is called solely by ip_mc_finish_output(), sets skb->ip_summed = CHECKSUM_UNNECESSARY unconditionally. 3. Since 35fc92a9 ("[NET]: Allow forwarding of ip_summed except CHECKSUM_COMPLETE"), the ip_summed value is preserved during forwarding. 4. If ip_summed != CHECKSUM_PARTIAL, checksum is not calculated during a packet egress. The minimum fix in dev_loopback_xmit(): 1. Preserves skb->ip_summed CHECKSUM_PARTIAL. This is the case when the original output NIC has TX checksum offload enabled. The effects are: a) If the forwarding destination interface supports TX checksum offloading, the NIC driver is responsible to fill-in the checksum. b) If the forwarding destination interface does NOT support TX checksum offloading, checksums are filled-in by kernel before skb is submitted to the NIC driver. c) For local delivery, checksum validation is skipped as in the case of CHECKSUM_UNNECESSARY, thanks to skb_csum_unnecessary(). 2. Translates ip_summed CHECKSUM_NONE to CHECKSUM_UNNECESSARY. It means, for CHECKSUM_NONE, the behavior is unmodified and is there to skip a looped-back packet local delivery checksum validation. Signed-off-by: Cyril Strejc Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/net/udp.h | 5 +++-- net/core/dev.c | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/include/net/udp.h b/include/net/udp.h index 360df454356c..909ecf447e0f 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -494,8 +494,9 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk, * CHECKSUM_NONE in __udp_gso_segment. UDP GRO indeed builds partial * packets in udp_gro_complete_segment. As does UDP GSO, verified by * udp_send_skb. But when those packets are looped in dev_loopback_xmit - * their ip_summed is set to CHECKSUM_UNNECESSARY. Reset in this - * specific case, where PARTIAL is both correct and required. + * their ip_summed CHECKSUM_NONE is changed to CHECKSUM_UNNECESSARY. + * Reset in this specific case, where PARTIAL is both correct and + * required. */ if (skb->pkt_type == PACKET_LOOPBACK) skb->ip_summed = CHECKSUM_PARTIAL; diff --git a/net/core/dev.c b/net/core/dev.c index ea2366497697..eb3a366bf212 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3912,7 +3912,8 @@ int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) skb_reset_mac_header(skb); __skb_pull(skb, skb_network_offset(skb)); skb->pkt_type = PACKET_LOOPBACK; - skb->ip_summed = CHECKSUM_UNNECESSARY; + if (skb->ip_summed == CHECKSUM_NONE) + skb->ip_summed = CHECKSUM_UNNECESSARY; WARN_ON(!skb_dst(skb)); skb_dst_force(skb); netif_rx_ni(skb); -- cgit v1.2.3 From 2195f2062e4cc93870da8e71c318ef98a1c51cef Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 25 Oct 2021 16:49:36 +0200 Subject: nfc: port100: fix using -ERRNO as command type mask During probing, the driver tries to get a list (mask) of supported command types in port100_get_command_type_mask() function. The value is u64 and 0 is treated as invalid mask (no commands supported). The function however returns also -ERRNO as u64 which will be interpret as valid command mask. Return 0 on every error case of port100_get_command_type_mask(), so the probing will stop. Cc: Fixes: 0347a6ab300a ("NFC: port100: Commands mechanism implementation") Signed-off-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- drivers/nfc/port100.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c index 517376c43b86..16ceb763594f 100644 --- a/drivers/nfc/port100.c +++ b/drivers/nfc/port100.c @@ -1006,11 +1006,11 @@ static u64 port100_get_command_type_mask(struct port100 *dev) skb = port100_alloc_skb(dev, 0); if (!skb) - return -ENOMEM; + return 0; resp = port100_send_cmd_sync(dev, PORT100_CMD_GET_COMMAND_TYPE, skb); if (IS_ERR(resp)) - return PTR_ERR(resp); + return 0; if (resp->len < 8) mask = 0; -- cgit v1.2.3 From fa40d9734a57bcbfa79a280189799f76c88f7bb0 Mon Sep 17 00:00:00 2001 From: Max VA Date: Mon, 25 Oct 2021 17:31:53 +0200 Subject: tipc: fix size validations for the MSG_CRYPTO type The function tipc_crypto_key_rcv is used to parse MSG_CRYPTO messages to receive keys from other nodes in the cluster in order to decrypt any further messages from them. This patch verifies that any supplied sizes in the message body are valid for the received message. Fixes: 1ef6f7c9390f ("tipc: add automatic session key exchange") Signed-off-by: Max VA Acked-by: Ying Xue Signed-off-by: Greg Kroah-Hartman Acked-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/crypto.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index c9391d38de85..dc60c32bb70d 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -2285,43 +2285,53 @@ static bool tipc_crypto_key_rcv(struct tipc_crypto *rx, struct tipc_msg *hdr) u16 key_gen = msg_key_gen(hdr); u16 size = msg_data_sz(hdr); u8 *data = msg_data(hdr); + unsigned int keylen; + + /* Verify whether the size can exist in the packet */ + if (unlikely(size < sizeof(struct tipc_aead_key) + TIPC_AEAD_KEYLEN_MIN)) { + pr_debug("%s: message data size is too small\n", rx->name); + goto exit; + } + + keylen = ntohl(*((__be32 *)(data + TIPC_AEAD_ALG_NAME))); + + /* Verify the supplied size values */ + if (unlikely(size != keylen + sizeof(struct tipc_aead_key) || + keylen > TIPC_AEAD_KEY_SIZE_MAX)) { + pr_debug("%s: invalid MSG_CRYPTO key size\n", rx->name); + goto exit; + } spin_lock(&rx->lock); if (unlikely(rx->skey || (key_gen == rx->key_gen && rx->key.keys))) { pr_err("%s: key existed <%p>, gen %d vs %d\n", rx->name, rx->skey, key_gen, rx->key_gen); - goto exit; + goto exit_unlock; } /* Allocate memory for the key */ skey = kmalloc(size, GFP_ATOMIC); if (unlikely(!skey)) { pr_err("%s: unable to allocate memory for skey\n", rx->name); - goto exit; + goto exit_unlock; } /* Copy key from msg data */ - skey->keylen = ntohl(*((__be32 *)(data + TIPC_AEAD_ALG_NAME))); + skey->keylen = keylen; memcpy(skey->alg_name, data, TIPC_AEAD_ALG_NAME); memcpy(skey->key, data + TIPC_AEAD_ALG_NAME + sizeof(__be32), skey->keylen); - /* Sanity check */ - if (unlikely(size != tipc_aead_key_size(skey))) { - kfree(skey); - skey = NULL; - goto exit; - } - rx->key_gen = key_gen; rx->skey_mode = msg_key_mode(hdr); rx->skey = skey; rx->nokey = 0; mb(); /* for nokey flag */ -exit: +exit_unlock: spin_unlock(&rx->lock); +exit: /* Schedule the key attaching on this crypto */ if (likely(skey && queue_delayed_work(tx->wq, &rx->work, 0))) return true; -- cgit v1.2.3 From 6f68cd634856f8ca93bafd623ba5357e0f648c68 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Sun, 24 Oct 2021 16:13:56 +0300 Subject: net: batman-adv: fix error handling Syzbot reported ODEBUG warning in batadv_nc_mesh_free(). The problem was in wrong error handling in batadv_mesh_init(). Before this patch batadv_mesh_init() was calling batadv_mesh_free() in case of any batadv_*_init() calls failure. This approach may work well, when there is some kind of indicator, which can tell which parts of batadv are initialized; but there isn't any. All written above lead to cleaning up uninitialized fields. Even if we hide ODEBUG warning by initializing bat_priv->nc.work, syzbot was able to hit GPF in batadv_nc_purge_paths(), because hash pointer in still NULL. [1] To fix these bugs we can unwind batadv_*_init() calls one by one. It is good approach for 2 reasons: 1) It fixes bugs on error handling path 2) It improves the performance, since we won't call unneeded batadv_*_free() functions. So, this patch makes all batadv_*_init() clean up all allocated memory before returning with an error to no call correspoing batadv_*_free() and open-codes batadv_mesh_free() with proper order to avoid touching uninitialized fields. Link: https://lore.kernel.org/netdev/000000000000c87fbd05cef6bcb0@google.com/ [1] Reported-and-tested-by: syzbot+28b0702ada0bf7381f58@syzkaller.appspotmail.com Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol") Signed-off-by: Pavel Skripkin Acked-by: Sven Eckelmann Signed-off-by: David S. Miller --- net/batman-adv/bridge_loop_avoidance.c | 8 +++-- net/batman-adv/main.c | 56 ++++++++++++++++++++++++---------- net/batman-adv/network-coding.c | 4 ++- net/batman-adv/translation-table.c | 4 ++- 4 files changed, 52 insertions(+), 20 deletions(-) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 1669744304c5..17687848daec 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1560,10 +1560,14 @@ int batadv_bla_init(struct batadv_priv *bat_priv) return 0; bat_priv->bla.claim_hash = batadv_hash_new(128); - bat_priv->bla.backbone_hash = batadv_hash_new(32); + if (!bat_priv->bla.claim_hash) + return -ENOMEM; - if (!bat_priv->bla.claim_hash || !bat_priv->bla.backbone_hash) + bat_priv->bla.backbone_hash = batadv_hash_new(32); + if (!bat_priv->bla.backbone_hash) { + batadv_hash_destroy(bat_priv->bla.claim_hash); return -ENOMEM; + } batadv_hash_set_lock_class(bat_priv->bla.claim_hash, &batadv_claim_hash_lock_class_key); diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 3ddd66e4c29e..5207cd8d6ad8 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -190,29 +190,41 @@ int batadv_mesh_init(struct net_device *soft_iface) bat_priv->gw.generation = 0; - ret = batadv_v_mesh_init(bat_priv); - if (ret < 0) - goto err; - ret = batadv_originator_init(bat_priv); - if (ret < 0) - goto err; + if (ret < 0) { + atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); + goto err_orig; + } ret = batadv_tt_init(bat_priv); - if (ret < 0) - goto err; + if (ret < 0) { + atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); + goto err_tt; + } + + ret = batadv_v_mesh_init(bat_priv); + if (ret < 0) { + atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); + goto err_v; + } ret = batadv_bla_init(bat_priv); - if (ret < 0) - goto err; + if (ret < 0) { + atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); + goto err_bla; + } ret = batadv_dat_init(bat_priv); - if (ret < 0) - goto err; + if (ret < 0) { + atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); + goto err_dat; + } ret = batadv_nc_mesh_init(bat_priv); - if (ret < 0) - goto err; + if (ret < 0) { + atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); + goto err_nc; + } batadv_gw_init(bat_priv); batadv_mcast_init(bat_priv); @@ -222,8 +234,20 @@ int batadv_mesh_init(struct net_device *soft_iface) return 0; -err: - batadv_mesh_free(soft_iface); +err_nc: + batadv_dat_free(bat_priv); +err_dat: + batadv_bla_free(bat_priv); +err_bla: + batadv_v_mesh_free(bat_priv); +err_v: + batadv_tt_free(bat_priv); +err_tt: + batadv_originator_free(bat_priv); +err_orig: + batadv_purge_outstanding_packets(bat_priv, NULL); + atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE); + return ret; } diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 9f06132e007d..0a7f1d36a6a8 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -152,8 +152,10 @@ int batadv_nc_mesh_init(struct batadv_priv *bat_priv) &batadv_nc_coding_hash_lock_class_key); bat_priv->nc.decoding_hash = batadv_hash_new(128); - if (!bat_priv->nc.decoding_hash) + if (!bat_priv->nc.decoding_hash) { + batadv_hash_destroy(bat_priv->nc.coding_hash); goto err; + } batadv_hash_set_lock_class(bat_priv->nc.decoding_hash, &batadv_nc_decoding_hash_lock_class_key); diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index e0b3dace2020..4b7ad6684bc4 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -4162,8 +4162,10 @@ int batadv_tt_init(struct batadv_priv *bat_priv) return ret; ret = batadv_tt_global_init(bat_priv); - if (ret < 0) + if (ret < 0) { + batadv_tt_local_table_free(bat_priv); return ret; + } batadv_tvlv_handler_register(bat_priv, batadv_tt_tvlv_ogm_handler_v1, batadv_tt_tvlv_unicast_handler_v1, -- cgit v1.2.3 From db6c3c064f5d55fa9969f33eafca3cdbefbb3541 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 26 Oct 2021 12:36:17 +0200 Subject: net: lan78xx: fix division by zero in send path Add the missing endpoint max-packet sanity check to probe() to avoid division by zero in lan78xx_tx_bh() in case a malicious device has broken descriptors (or when doing descriptor fuzz testing). Note that USB core will reject URBs submitted for endpoints with zero wMaxPacketSize but that drivers doing packet-size calculations still need to handle this (cf. commit 2548288b4fb0 ("USB: Fix: Don't skip endpoint descriptors with maxpacket=0")). Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver") Cc: stable@vger.kernel.org # 4.3 Cc: Woojung.Huh@microchip.com Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 793f8fbe0069..63cd72c5f580 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -4122,6 +4122,12 @@ static int lan78xx_probe(struct usb_interface *intf, dev->maxpacket = usb_maxpacket(dev->udev, dev->pipe_out, 1); + /* Reject broken descriptors. */ + if (dev->maxpacket == 0) { + ret = -ENODEV; + goto out4; + } + /* driver requires remote-wakeup capability during autosuspend. */ intf->needs_remote_wakeup = 1; -- cgit v1.2.3 From 19fa0887c57d35b57bfb895e6caf8e72d9601ec0 Mon Sep 17 00:00:00 2001 From: Vadym Kochan Date: Tue, 26 Oct 2021 15:19:07 +0300 Subject: MAINTAINERS: please remove myself from the Prestera driver Signed-off-by: Vadym Kochan Signed-off-by: David S. Miller --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8d118d7957d2..086f5c89216b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11278,7 +11278,6 @@ F: Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst F: drivers/net/ethernet/marvell/octeontx2/af/ MARVELL PRESTERA ETHERNET SWITCH DRIVER -M: Vadym Kochan M: Taras Chornyi S: Supported W: https://github.com/Marvell-switching/switchdev-prestera -- cgit v1.2.3 From d308ae0d299a6bb15be4efb91849582d19c23213 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 26 Oct 2021 18:12:04 +0800 Subject: block: drain queue after disk is removed from sysfs Before removing disk from sysfs, userspace still may change queue via sysfs, such as switching elevator or setting wbt latency, both may reinitialize wbt, then the warning in blk_free_queue_stats() will be triggered since rq_qos_exit() is moved to del_gendisk(). Fixes the issue by moving draining queue & tearing down after disk is removed from sysfs, at that time no one can come into queue's store()/show(). Reported-by: Yi Zhang Tested-by: Yi Zhang Fixes: 8e141f9eb803 ("block: drain file system I/O on del_gendisk") Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20211026101204.2897166-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/genhd.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index b49858550fa6..ab12ae6e636e 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -588,16 +588,6 @@ void del_gendisk(struct gendisk *disk) * Prevent new I/O from crossing bio_queue_enter(). */ blk_queue_start_drain(q); - blk_mq_freeze_queue_wait(q); - - rq_qos_exit(q); - blk_sync_queue(q); - blk_flush_integrity(); - /* - * Allow using passthrough request again after the queue is torn down. - */ - blk_queue_flag_clear(QUEUE_FLAG_INIT_DONE, q); - __blk_mq_unfreeze_queue(q, true); if (!(disk->flags & GENHD_FL_HIDDEN)) { sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); @@ -620,6 +610,18 @@ void del_gendisk(struct gendisk *disk) sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); pm_runtime_set_memalloc_noio(disk_to_dev(disk), false); device_del(disk_to_dev(disk)); + + blk_mq_freeze_queue_wait(q); + + rq_qos_exit(q); + blk_sync_queue(q); + blk_flush_integrity(); + /* + * Allow using passthrough request again after the queue is torn down. + */ + blk_queue_flag_clear(QUEUE_FLAG_INIT_DONE, q); + __blk_mq_unfreeze_queue(q, true); + } EXPORT_SYMBOL(del_gendisk); -- cgit v1.2.3 From 05d5da3cb11c91c39e607066d3313a6ce621796a Mon Sep 17 00:00:00 2001 From: Christoph Niedermaier Date: Mon, 25 Oct 2021 09:37:06 +0200 Subject: MAINTAINERS: Add maintainers for DHCOM i.MX6 and DHCOM/DHCOR STM32MP1 Add maintainers for DH electronics DHCOM i.MX6 and DHCOM/DHCOR STM32MP1 boards. Signed-off-by: Christoph Niedermaier Cc: linux-arm-kernel@lists.infradead.org Cc: kernel@dh-electronics.com Cc: arnd@arndb.de Link: https://lore.kernel.org/r/20211025073706.2794-1-cniedermaier@dh-electronics.com' To: soc@kernel.org To: linux-kernel@vger.kernel.org Signed-off-by: Arnd Bergmann --- MAINTAINERS | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8d118d7957d2..6fbcb632649f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5458,6 +5458,19 @@ F: include/net/devlink.h F: include/uapi/linux/devlink.h F: net/core/devlink.c +DH ELECTRONICS IMX6 DHCOM BOARD SUPPORT +M: Christoph Niedermaier +L: kernel@dh-electronics.com +S: Maintained +F: arch/arm/boot/dts/imx6*-dhcom-* + +DH ELECTRONICS STM32MP1 DHCOM/DHCOR BOARD SUPPORT +M: Marek Vasut +L: kernel@dh-electronics.com +S: Maintained +F: arch/arm/boot/dts/stm32mp1*-dhcom-* +F: arch/arm/boot/dts/stm32mp1*-dhcor-* + DIALOG SEMICONDUCTOR DRIVERS M: Support Opensource S: Supported -- cgit v1.2.3 From 697542bceae51f7620af333b065dd09d213629fb Mon Sep 17 00:00:00 2001 From: Jaehoon Chung Date: Fri, 22 Oct 2021 17:21:06 +0900 Subject: mmc: dw_mmc: exynos: fix the finding clock sample value Even though there are candiates value if can't find best value, it's returned -EIO. It's not proper behavior. If there is not best value, use a first candiate value to work eMMC. Signed-off-by: Jaehoon Chung Tested-by: Marek Szyprowski Tested-by: Christian Hewitt Cc: stable@vger.kernel.org Fixes: c537a1c5ff63 ("mmc: dw_mmc: exynos: add variable delay tuning sequence") Link: https://lore.kernel.org/r/20211022082106.1557-1-jh80.chung@samsung.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/dw_mmc-exynos.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/mmc/host/dw_mmc-exynos.c b/drivers/mmc/host/dw_mmc-exynos.c index 0c75810812a0..1f8a3c0ddfe1 100644 --- a/drivers/mmc/host/dw_mmc-exynos.c +++ b/drivers/mmc/host/dw_mmc-exynos.c @@ -464,6 +464,18 @@ static s8 dw_mci_exynos_get_best_clksmpl(u8 candiates) } } + /* + * If there is no cadiates value, then it needs to return -EIO. + * If there are candiates values and don't find bset clk sample value, + * then use a first candiates clock sample value. + */ + for (i = 0; i < iter; i++) { + __c = ror8(candiates, i); + if ((__c & 0x1) == 0x1) { + loc = i; + goto out; + } + } out: return loc; } @@ -494,6 +506,8 @@ static int dw_mci_exynos_execute_tuning(struct dw_mci_slot *slot, u32 opcode) priv->tuned_sample = found; } else { ret = -EIO; + dev_warn(&mmc->class_dev, + "There is no candiates value about clksmpl!\n"); } return ret; -- cgit v1.2.3 From 8c8171929116cc23f74743d99251eedadf62341a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 25 Oct 2021 13:56:08 +0200 Subject: mmc: vub300: fix control-message timeouts USB control-message timeouts are specified in milliseconds and should specifically not vary with CONFIG_HZ. Fixes: 88095e7b473a ("mmc: Add new VUB300 USB-to-SD/SDIO/MMC driver") Cc: stable@vger.kernel.org # 3.0 Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20211025115608.5287-1-johan@kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/vub300.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/mmc/host/vub300.c b/drivers/mmc/host/vub300.c index 4950d10d3a19..97beece62fec 100644 --- a/drivers/mmc/host/vub300.c +++ b/drivers/mmc/host/vub300.c @@ -576,7 +576,7 @@ static void check_vub300_port_status(struct vub300_mmc_host *vub300) GET_SYSTEM_PORT_STATUS, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0x0000, 0x0000, &vub300->system_port_status, - sizeof(vub300->system_port_status), HZ); + sizeof(vub300->system_port_status), 1000); if (sizeof(vub300->system_port_status) == retval) new_system_port_status(vub300); } @@ -1241,7 +1241,7 @@ static void __download_offload_pseudocode(struct vub300_mmc_host *vub300, SET_INTERRUPT_PSEUDOCODE, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0x0000, 0x0000, - xfer_buffer, xfer_length, HZ); + xfer_buffer, xfer_length, 1000); kfree(xfer_buffer); if (retval < 0) goto copy_error_message; @@ -1284,7 +1284,7 @@ static void __download_offload_pseudocode(struct vub300_mmc_host *vub300, SET_TRANSFER_PSEUDOCODE, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0x0000, 0x0000, - xfer_buffer, xfer_length, HZ); + xfer_buffer, xfer_length, 1000); kfree(xfer_buffer); if (retval < 0) goto copy_error_message; @@ -1991,7 +1991,7 @@ static void __set_clock_speed(struct vub300_mmc_host *vub300, u8 buf[8], usb_control_msg(vub300->udev, usb_sndctrlpipe(vub300->udev, 0), SET_CLOCK_SPEED, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - 0x00, 0x00, buf, buf_array_size, HZ); + 0x00, 0x00, buf, buf_array_size, 1000); if (retval != 8) { dev_err(&vub300->udev->dev, "SET_CLOCK_SPEED" " %dkHz failed with retval=%d\n", kHzClock, retval); @@ -2013,14 +2013,14 @@ static void vub300_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) usb_control_msg(vub300->udev, usb_sndctrlpipe(vub300->udev, 0), SET_SD_POWER, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - 0x0000, 0x0000, NULL, 0, HZ); + 0x0000, 0x0000, NULL, 0, 1000); /* must wait for the VUB300 u-proc to boot up */ msleep(600); } else if ((ios->power_mode == MMC_POWER_UP) && !vub300->card_powered) { usb_control_msg(vub300->udev, usb_sndctrlpipe(vub300->udev, 0), SET_SD_POWER, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - 0x0001, 0x0000, NULL, 0, HZ); + 0x0001, 0x0000, NULL, 0, 1000); msleep(600); vub300->card_powered = 1; } else if (ios->power_mode == MMC_POWER_ON) { @@ -2275,14 +2275,14 @@ static int vub300_probe(struct usb_interface *interface, GET_HC_INF0, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0x0000, 0x0000, &vub300->hc_info, - sizeof(vub300->hc_info), HZ); + sizeof(vub300->hc_info), 1000); if (retval < 0) goto error5; retval = usb_control_msg(vub300->udev, usb_sndctrlpipe(vub300->udev, 0), SET_ROM_WAIT_STATES, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - firmware_rom_wait_states, 0x0000, NULL, 0, HZ); + firmware_rom_wait_states, 0x0000, NULL, 0, 1000); if (retval < 0) goto error5; dev_info(&vub300->udev->dev, @@ -2297,7 +2297,7 @@ static int vub300_probe(struct usb_interface *interface, GET_SYSTEM_PORT_STATUS, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0x0000, 0x0000, &vub300->system_port_status, - sizeof(vub300->system_port_status), HZ); + sizeof(vub300->system_port_status), 1000); if (retval < 0) { goto error4; } else if (sizeof(vub300->system_port_status) == retval) { -- cgit v1.2.3 From 92b18252b91de567cd875f2e84722b10ab34ee28 Mon Sep 17 00:00:00 2001 From: Wenbin Mei Date: Tue, 26 Oct 2021 15:08:12 +0800 Subject: mmc: cqhci: clear HALT state after CQE enable While mmc0 enter suspend state, we need halt CQE to send legacy cmd(flush cache) and disable cqe, for resume back, we enable CQE and not clear HALT state. In this case MediaTek mmc host controller will keep the value for HALT state after CQE disable/enable flow, so the next CQE transfer after resume will be timeout due to CQE is in HALT state, the log as below: <4>.(4)[318:kworker/4:1H]mmc0: cqhci: timeout for tag 2 <4>.(4)[318:kworker/4:1H]mmc0: cqhci: ============ CQHCI REGISTER DUMP =========== <4>.(4)[318:kworker/4:1H]mmc0: cqhci: Caps: 0x100020b6 | Version: 0x00000510 <4>.(4)[318:kworker/4:1H]mmc0: cqhci: Config: 0x00001103 | Control: 0x00000001 <4>.(4)[318:kworker/4:1H]mmc0: cqhci: Int stat: 0x00000000 | Int enab: 0x00000006 <4>.(4)[318:kworker/4:1H]mmc0: cqhci: Int sig: 0x00000006 | Int Coal: 0x00000000 <4>.(4)[318:kworker/4:1H]mmc0: cqhci: TDL base: 0xfd05f000 | TDL up32: 0x00000000 <4>.(4)[318:kworker/4:1H]mmc0: cqhci: Doorbell: 0x8000203c | TCN: 0x00000000 <4>.(4)[318:kworker/4:1H]mmc0: cqhci: Dev queue: 0x00000000 | Dev Pend: 0x00000000 <4>.(4)[318:kworker/4:1H]mmc0: cqhci: Task clr: 0x00000000 | SSC1: 0x00001000 <4>.(4)[318:kworker/4:1H]mmc0: cqhci: SSC2: 0x00000001 | DCMD rsp: 0x00000000 <4>.(4)[318:kworker/4:1H]mmc0: cqhci: RED mask: 0xfdf9a080 | TERRI: 0x00000000 <4>.(4)[318:kworker/4:1H]mmc0: cqhci: Resp idx: 0x00000000 | Resp arg: 0x00000000 <4>.(4)[318:kworker/4:1H]mmc0: cqhci: CRNQP: 0x00000000 | CRNQDUN: 0x00000000 <4>.(4)[318:kworker/4:1H]mmc0: cqhci: CRNQIS: 0x00000000 | CRNQIE: 0x00000000 This change check HALT state after CQE enable, if CQE is in HALT state, we will clear it. Signed-off-by: Wenbin Mei Cc: stable@vger.kernel.org Acked-by: Adrian Hunter Fixes: a4080225f51d ("mmc: cqhci: support for command queue enabled host") Link: https://lore.kernel.org/r/20211026070812.9359-1-wenbin.mei@mediatek.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/cqhci-core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/host/cqhci-core.c b/drivers/mmc/host/cqhci-core.c index 38559a956330..31f841231609 100644 --- a/drivers/mmc/host/cqhci-core.c +++ b/drivers/mmc/host/cqhci-core.c @@ -282,6 +282,9 @@ static void __cqhci_enable(struct cqhci_host *cq_host) cqhci_writel(cq_host, cqcfg, CQHCI_CFG); + if (cqhci_readl(cq_host, CQHCI_CTL) & CQHCI_HALT) + cqhci_writel(cq_host, 0, CQHCI_CTL); + mmc->cqe_on = true; if (cq_host->ops->enable) -- cgit v1.2.3 From 6e7733ef0bb9372d5491168635f6ecba8ac3cb8a Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 7 Oct 2021 17:33:02 -0700 Subject: Revert "watchdog: iTCO_wdt: Account for rebooting on second timeout" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit cb011044e34c ("watchdog: iTCO_wdt: Account for rebooting on second timeout") and commit aec42642d91f ("watchdog: iTCO_wdt: Fix detection of SMI-off case") since those patches cause a regression on certain boards (https://bugzilla.kernel.org/show_bug.cgi?id=213809). While this revert may result in some boards to only reset after twice the configured timeout value, that is still better than a watchdog reset after half the configured value. Fixes: cb011044e34c ("watchdog: iTCO_wdt: Account for rebooting on second timeout") Fixes: aec42642d91f ("watchdog: iTCO_wdt: Fix detection of SMI-off case") Cc: Jan Kiszka Cc: Mantas Mikulėnas Reported-by: Javier S. Pedro Signed-off-by: Guenter Roeck Link: https://lore.kernel.org/r/20211008003302.1461733-1-linux@roeck-us.net Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/iTCO_wdt.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c index 643c6c2d0b72..ced2fc0deb8c 100644 --- a/drivers/watchdog/iTCO_wdt.c +++ b/drivers/watchdog/iTCO_wdt.c @@ -71,8 +71,6 @@ #define TCOBASE(p) ((p)->tco_res->start) /* SMI Control and Enable Register */ #define SMI_EN(p) ((p)->smi_res->start) -#define TCO_EN (1 << 13) -#define GBL_SMI_EN (1 << 0) #define TCO_RLD(p) (TCOBASE(p) + 0x00) /* TCO Timer Reload/Curr. Value */ #define TCOv1_TMR(p) (TCOBASE(p) + 0x01) /* TCOv1 Timer Initial Value*/ @@ -357,12 +355,8 @@ static int iTCO_wdt_set_timeout(struct watchdog_device *wd_dev, unsigned int t) tmrval = seconds_to_ticks(p, t); - /* - * If TCO SMIs are off, the timer counts down twice before rebooting. - * Otherwise, the BIOS generally reboots when the SMI triggers. - */ - if (p->smi_res && - (inl(SMI_EN(p)) & (TCO_EN | GBL_SMI_EN)) != (TCO_EN | GBL_SMI_EN)) + /* For TCO v1 the timer counts down twice before rebooting */ + if (p->iTCO_version == 1) tmrval /= 2; /* from the specs: */ @@ -527,7 +521,7 @@ static int iTCO_wdt_probe(struct platform_device *pdev) * Disables TCO logic generating an SMI# */ val32 = inl(SMI_EN(p)); - val32 &= ~TCO_EN; /* Turn off SMI clearing watchdog */ + val32 &= 0xffffdfff; /* Turn off SMI clearing watchdog */ outl(val32, SMI_EN(p)); } -- cgit v1.2.3 From f31afb502c3151855df3ed40f5974c7884c10d14 Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Fri, 3 Sep 2021 12:21:01 +0100 Subject: watchdog: sbsa: only use 32-bit accessors SBSA says of the generic watchdog: All registers are 32 bits in size and should be accessed using 32-bit reads and writes. If an access size other than 32 bits is used then the results are IMPLEMENTATION DEFINED. and for qemu, the implementation will only allow 32-bit accesses resulting in a synchronous external abort when configuring the watchdog. Use lo_hi_* accessors rather than a readq/writeq. Fixes: abd3ac7902fb ("watchdog: sbsa: Support architecture version 1") Signed-off-by: Jamie Iles Reviewed-by: Guenter Roeck Reviewed-by: Shaokun Zhang Link: https://lore.kernel.org/r/20210903112101.493552-1-quic_jiles@quicinc.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/sbsa_gwdt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/watchdog/sbsa_gwdt.c b/drivers/watchdog/sbsa_gwdt.c index ee9ff38929eb..6f4319bdbc50 100644 --- a/drivers/watchdog/sbsa_gwdt.c +++ b/drivers/watchdog/sbsa_gwdt.c @@ -130,7 +130,7 @@ static u64 sbsa_gwdt_reg_read(struct sbsa_gwdt *gwdt) if (gwdt->version == 0) return readl(gwdt->control_base + SBSA_GWDT_WOR); else - return readq(gwdt->control_base + SBSA_GWDT_WOR); + return lo_hi_readq(gwdt->control_base + SBSA_GWDT_WOR); } static void sbsa_gwdt_reg_write(u64 val, struct sbsa_gwdt *gwdt) @@ -138,7 +138,7 @@ static void sbsa_gwdt_reg_write(u64 val, struct sbsa_gwdt *gwdt) if (gwdt->version == 0) writel((u32)val, gwdt->control_base + SBSA_GWDT_WOR); else - writeq(val, gwdt->control_base + SBSA_GWDT_WOR); + lo_hi_writeq(val, gwdt->control_base + SBSA_GWDT_WOR); } /* -- cgit v1.2.3 From bcc3e704f1b73f7f8674364f0bda6593bbc9fd2b Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 17 Sep 2021 11:20:23 +0200 Subject: watchdog: sbsa: drop unneeded MODULE_ALIAS The MODULE_DEVICE_TABLE already creates proper alias for platform driver. Having another MODULE_ALIAS causes the alias to be duplicated. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20210917092024.19323-1-krzysztof.kozlowski@canonical.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/sbsa_gwdt.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/watchdog/sbsa_gwdt.c b/drivers/watchdog/sbsa_gwdt.c index 6f4319bdbc50..9791c74aebd4 100644 --- a/drivers/watchdog/sbsa_gwdt.c +++ b/drivers/watchdog/sbsa_gwdt.c @@ -411,4 +411,3 @@ MODULE_AUTHOR("Suravee Suthikulpanit "); MODULE_AUTHOR("Al Stone "); MODULE_AUTHOR("Timur Tabi "); MODULE_LICENSE("GPL v2"); -MODULE_ALIAS("platform:" DRV_NAME); -- cgit v1.2.3 From abd1c6adc16d1b82109a29b570fc545b775049d5 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 10 Sep 2021 21:29:25 -0700 Subject: watchdog: ixp4xx_wdt: Fix address space warning sparse reports the following address space warning. drivers/watchdog/ixp4xx_wdt.c:122:20: sparse: incorrect type in assignment (different address spaces) drivers/watchdog/ixp4xx_wdt.c:122:20: sparse: expected void [noderef] __iomem *base drivers/watchdog/ixp4xx_wdt.c:122:20: sparse: got void *platform_data Add a typecast to solve the problem. Fixes: 21a0a29d16c6 ("watchdog: ixp4xx: Rewrite driver to use core") Cc: Linus Walleij Reviewed-by: Linus Walleij Signed-off-by: Guenter Roeck Link: https://lore.kernel.org/r/20210911042925.556889-1-linux@roeck-us.net Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/ixp4xx_wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/ixp4xx_wdt.c b/drivers/watchdog/ixp4xx_wdt.c index 2693ffb24ac7..31b03fa71341 100644 --- a/drivers/watchdog/ixp4xx_wdt.c +++ b/drivers/watchdog/ixp4xx_wdt.c @@ -119,7 +119,7 @@ static int ixp4xx_wdt_probe(struct platform_device *pdev) iwdt = devm_kzalloc(dev, sizeof(*iwdt), GFP_KERNEL); if (!iwdt) return -ENOMEM; - iwdt->base = dev->platform_data; + iwdt->base = (void __iomem *)dev->platform_data; /* * Retrieve rate from a fixed clock from the device tree if -- cgit v1.2.3 From cd004d8299f1dc6cfa6a4eea8f94cb45eaedf070 Mon Sep 17 00:00:00 2001 From: Walter Stoll Date: Thu, 14 Oct 2021 12:22:29 +0200 Subject: watchdog: Fix OMAP watchdog early handling TI's implementation does not service the watchdog even if the kernel command line parameter omap_wdt.early_enable is set to 1. This patch fixes the issue. Signed-off-by: Walter Stoll Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/88a8fe5229cd68fa0f1fd22f5d66666c1b7057a0.camel@duagon.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/omap_wdt.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/watchdog/omap_wdt.c b/drivers/watchdog/omap_wdt.c index 1616f93dfad7..74d785b2b478 100644 --- a/drivers/watchdog/omap_wdt.c +++ b/drivers/watchdog/omap_wdt.c @@ -268,8 +268,12 @@ static int omap_wdt_probe(struct platform_device *pdev) wdev->wdog.bootstatus = WDIOF_CARDRESET; } - if (!early_enable) + if (early_enable) { + omap_wdt_start(&wdev->wdog); + set_bit(WDOG_HW_RUNNING, &wdev->wdog.status); + } else { omap_wdt_disable(wdev); + } ret = watchdog_register_device(&wdev->wdog); if (ret) { -- cgit v1.2.3 From 61b1d445f3bfe4c3ba4335ceeb7e8ba688fd31e2 Mon Sep 17 00:00:00 2001 From: Mario Date: Tue, 26 Oct 2021 13:27:37 +0200 Subject: drm: panel-orientation-quirks: Add quirk for GPD Win3 Fixes screen orientation for GPD Win 3 handheld gaming console. Signed-off-by: Mario Risoldi Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20211026112737.9181-1-awxkrnl@gmail.com --- drivers/gpu/drm/drm_panel_orientation_quirks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index 30c17a76f49a..e1b2ce4921ae 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -191,6 +191,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_BOARD_NAME, "Default string"), }, .driver_data = (void *)&gpd_win2, + }, { /* GPD Win 3 */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "GPD"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "G1618-03") + }, + .driver_data = (void *)&lcd720x1280_rightside_up, }, { /* I.T.Works TW891 */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "To be filled by O.E.M."), -- cgit v1.2.3 From cd9733f5d75c94a32544d6ce5be47e14194cf137 Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Tue, 12 Oct 2021 13:20:19 +0800 Subject: tcp_bpf: Fix one concurrency problem in the tcp_bpf_send_verdict function With two Msgs, msgA and msgB and a user doing nonblocking sendmsg calls (or multiple cores) on a single socket 'sk' we could get the following flow. msgA, sk msgB, sk ----------- --------------- tcp_bpf_sendmsg() lock(sk) psock = sk->psock tcp_bpf_sendmsg() lock(sk) ... blocking tcp_bpf_send_verdict if (psock->eval == NONE) psock->eval = sk_psock_msg_verdict .. < handle SK_REDIRECT case > release_sock(sk) < lock dropped so grab here > ret = tcp_bpf_sendmsg_redir psock = sk->psock tcp_bpf_send_verdict lock_sock(sk) ... blocking on B if (psock->eval == NONE) <- boom. psock->eval will have msgA state The problem here is we dropped the lock on msgA and grabbed it with msgB. Now we have old state in psock and importantly psock->eval has not been cleared. So msgB will run whatever action was done on A and the verdict program may never see it. Fixes: 604326b41a6fb ("bpf, sockmap: convert to generic sk_msg interface") Signed-off-by: Liu Jian Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20211012052019.184398-1-liujian56@huawei.com --- net/ipv4/tcp_bpf.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index d3e9386b493e..9d068153c316 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -232,6 +232,7 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, bool cork = false, enospc = sk_msg_full(msg); struct sock *sk_redir; u32 tosend, delta = 0; + u32 eval = __SK_NONE; int ret; more_data: @@ -275,13 +276,24 @@ more_data: case __SK_REDIRECT: sk_redir = psock->sk_redir; sk_msg_apply_bytes(psock, tosend); + if (!psock->apply_bytes) { + /* Clean up before releasing the sock lock. */ + eval = psock->eval; + psock->eval = __SK_NONE; + psock->sk_redir = NULL; + } if (psock->cork) { cork = true; psock->cork = NULL; } sk_msg_return(sk, msg, tosend); release_sock(sk); + ret = tcp_bpf_sendmsg_redir(sk_redir, msg, tosend, flags); + + if (eval == __SK_REDIRECT) + sock_put(sk_redir); + lock_sock(sk); if (unlikely(ret < 0)) { int free = sk_msg_free_nocharge(sk, msg); -- cgit v1.2.3 From 7b50ecfcc6cdfe87488576bc3ed443dc8d083b90 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 8 Oct 2021 13:33:03 -0700 Subject: net: Rename ->stream_memory_read to ->sock_is_readable The proto ops ->stream_memory_read() is currently only used by TCP to check whether psock queue is empty or not. We need to rename it before reusing it for non-TCP protocols, and adjust the exsiting users accordingly. Signed-off-by: Cong Wang Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211008203306.37525-2-xiyou.wangcong@gmail.com --- include/net/sock.h | 8 +++++++- include/net/tls.h | 2 +- net/ipv4/tcp.c | 5 +---- net/ipv4/tcp_bpf.c | 4 ++-- net/tls/tls_main.c | 4 ++-- net/tls/tls_sw.c | 2 +- 6 files changed, 14 insertions(+), 11 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index ea6fbc88c8f9..463f390d90b3 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1208,7 +1208,7 @@ struct proto { #endif bool (*stream_memory_free)(const struct sock *sk, int wake); - bool (*stream_memory_read)(const struct sock *sk); + bool (*sock_is_readable)(struct sock *sk); /* Memory pressure */ void (*enter_memory_pressure)(struct sock *sk); void (*leave_memory_pressure)(struct sock *sk); @@ -2820,4 +2820,10 @@ void sock_set_sndtimeo(struct sock *sk, s64 secs); int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len); +static inline bool sk_is_readable(struct sock *sk) +{ + if (sk->sk_prot->sock_is_readable) + return sk->sk_prot->sock_is_readable(sk); + return false; +} #endif /* _SOCK_H */ diff --git a/include/net/tls.h b/include/net/tls.h index be4b3e1cac46..01d2e3744393 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -375,7 +375,7 @@ void tls_sw_release_resources_rx(struct sock *sk); void tls_sw_free_ctx_rx(struct tls_context *tls_ctx); int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); -bool tls_sw_stream_read(const struct sock *sk); +bool tls_sw_sock_is_readable(struct sock *sk); ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e8b48df73c85..f5c336f8b0c8 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -486,10 +486,7 @@ static bool tcp_stream_is_readable(struct sock *sk, int target) { if (tcp_epollin_ready(sk, target)) return true; - - if (sk->sk_prot->stream_memory_read) - return sk->sk_prot->stream_memory_read(sk); - return false; + return sk_is_readable(sk); } /* diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 9d068153c316..7e71e9e278cb 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -150,7 +150,7 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, EXPORT_SYMBOL_GPL(tcp_bpf_sendmsg_redir); #ifdef CONFIG_BPF_SYSCALL -static bool tcp_bpf_stream_read(const struct sock *sk) +static bool tcp_bpf_sock_is_readable(struct sock *sk) { struct sk_psock *psock; bool empty = true; @@ -491,7 +491,7 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS], prot[TCP_BPF_BASE].unhash = sock_map_unhash; prot[TCP_BPF_BASE].close = sock_map_close; prot[TCP_BPF_BASE].recvmsg = tcp_bpf_recvmsg; - prot[TCP_BPF_BASE].stream_memory_read = tcp_bpf_stream_read; + prot[TCP_BPF_BASE].sock_is_readable = tcp_bpf_sock_is_readable; prot[TCP_BPF_TX] = prot[TCP_BPF_BASE]; prot[TCP_BPF_TX].sendmsg = tcp_bpf_sendmsg; diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index fde56ff49163..9ab81db8a654 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -681,12 +681,12 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], prot[TLS_BASE][TLS_SW] = prot[TLS_BASE][TLS_BASE]; prot[TLS_BASE][TLS_SW].recvmsg = tls_sw_recvmsg; - prot[TLS_BASE][TLS_SW].stream_memory_read = tls_sw_stream_read; + prot[TLS_BASE][TLS_SW].sock_is_readable = tls_sw_sock_is_readable; prot[TLS_BASE][TLS_SW].close = tls_sk_proto_close; prot[TLS_SW][TLS_SW] = prot[TLS_SW][TLS_BASE]; prot[TLS_SW][TLS_SW].recvmsg = tls_sw_recvmsg; - prot[TLS_SW][TLS_SW].stream_memory_read = tls_sw_stream_read; + prot[TLS_SW][TLS_SW].sock_is_readable = tls_sw_sock_is_readable; prot[TLS_SW][TLS_SW].close = tls_sk_proto_close; #ifdef CONFIG_TLS_DEVICE diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 4feb95e34b64..d5d09bd817b7 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2026,7 +2026,7 @@ splice_read_end: return copied ? : err; } -bool tls_sw_stream_read(const struct sock *sk) +bool tls_sw_sock_is_readable(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); -- cgit v1.2.3 From fb4e0a5e73d4bb5ab69b7905abd2ec3b580e9b59 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 8 Oct 2021 13:33:04 -0700 Subject: skmsg: Extract and reuse sk_msg_is_readable() tcp_bpf_sock_is_readable() is pretty much generic, we can extract it and reuse it for non-TCP sockets. Signed-off-by: Cong Wang Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211008203306.37525-3-xiyou.wangcong@gmail.com --- include/linux/skmsg.h | 1 + net/core/skmsg.c | 14 ++++++++++++++ net/ipv4/tcp_bpf.c | 15 +-------------- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 14ab0c0bc924..1ce9a9eb223b 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -128,6 +128,7 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from, struct sk_msg *msg, u32 bytes); int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, int len, int flags); +bool sk_msg_is_readable(struct sock *sk); static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes) { diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 2d6249b28928..a86ef7e844f8 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -474,6 +474,20 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, } EXPORT_SYMBOL_GPL(sk_msg_recvmsg); +bool sk_msg_is_readable(struct sock *sk) +{ + struct sk_psock *psock; + bool empty = true; + + rcu_read_lock(); + psock = sk_psock(sk); + if (likely(psock)) + empty = list_empty(&psock->ingress_msg); + rcu_read_unlock(); + return !empty; +} +EXPORT_SYMBOL_GPL(sk_msg_is_readable); + static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk, struct sk_buff *skb) { diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 7e71e9e278cb..5f4d6f45d87f 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -150,19 +150,6 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, EXPORT_SYMBOL_GPL(tcp_bpf_sendmsg_redir); #ifdef CONFIG_BPF_SYSCALL -static bool tcp_bpf_sock_is_readable(struct sock *sk) -{ - struct sk_psock *psock; - bool empty = true; - - rcu_read_lock(); - psock = sk_psock(sk); - if (likely(psock)) - empty = list_empty(&psock->ingress_msg); - rcu_read_unlock(); - return !empty; -} - static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock, long timeo) { @@ -491,7 +478,7 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS], prot[TCP_BPF_BASE].unhash = sock_map_unhash; prot[TCP_BPF_BASE].close = sock_map_close; prot[TCP_BPF_BASE].recvmsg = tcp_bpf_recvmsg; - prot[TCP_BPF_BASE].sock_is_readable = tcp_bpf_sock_is_readable; + prot[TCP_BPF_BASE].sock_is_readable = sk_msg_is_readable; prot[TCP_BPF_TX] = prot[TCP_BPF_BASE]; prot[TCP_BPF_TX].sendmsg = tcp_bpf_sendmsg; -- cgit v1.2.3 From af493388950b6ea3a86f860cfaffab137e024fc8 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 8 Oct 2021 13:33:05 -0700 Subject: net: Implement ->sock_is_readable() for UDP and AF_UNIX Yucong noticed we can't poll() sockets in sockmap even when they are the destination sockets of redirections. This is because we never poll any psock queues in ->poll(), except for TCP. With ->sock_is_readable() now we can overwrite >sock_is_readable(), invoke and implement it for both UDP and AF_UNIX sockets. Reported-by: Yucong Sun Signed-off-by: Cong Wang Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211008203306.37525-4-xiyou.wangcong@gmail.com --- net/ipv4/udp.c | 3 +++ net/ipv4/udp_bpf.c | 1 + net/unix/af_unix.c | 4 ++++ net/unix/unix_bpf.c | 2 ++ 4 files changed, 10 insertions(+) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8536b2a7210b..2fffcf2b54f3 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2867,6 +2867,9 @@ __poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait) !(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1) mask &= ~(EPOLLIN | EPOLLRDNORM); + /* psock ingress_msg queue should not contain any bad checksum frames */ + if (sk_is_readable(sk)) + mask |= EPOLLIN | EPOLLRDNORM; return mask; } diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c index 7a1d5f473878..bbe6569c9ad3 100644 --- a/net/ipv4/udp_bpf.c +++ b/net/ipv4/udp_bpf.c @@ -114,6 +114,7 @@ static void udp_bpf_rebuild_protos(struct proto *prot, const struct proto *base) *prot = *base; prot->close = sock_map_close; prot->recvmsg = udp_bpf_recvmsg; + prot->sock_is_readable = sk_msg_is_readable; } static void udp_bpf_check_v6_needs_rebuild(struct proto *ops) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 89f9e85ae970..78e08e82c08c 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -3052,6 +3052,8 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa /* readable? */ if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= EPOLLIN | EPOLLRDNORM; + if (sk_is_readable(sk)) + mask |= EPOLLIN | EPOLLRDNORM; /* Connection-based need to check for termination and startup */ if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && @@ -3091,6 +3093,8 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, /* readable? */ if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= EPOLLIN | EPOLLRDNORM; + if (sk_is_readable(sk)) + mask |= EPOLLIN | EPOLLRDNORM; /* Connection-based need to check for termination and startup */ if (sk->sk_type == SOCK_SEQPACKET) { diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c index b927e2baae50..452376c6f419 100644 --- a/net/unix/unix_bpf.c +++ b/net/unix/unix_bpf.c @@ -102,6 +102,7 @@ static void unix_dgram_bpf_rebuild_protos(struct proto *prot, const struct proto *prot = *base; prot->close = sock_map_close; prot->recvmsg = unix_bpf_recvmsg; + prot->sock_is_readable = sk_msg_is_readable; } static void unix_stream_bpf_rebuild_protos(struct proto *prot, @@ -110,6 +111,7 @@ static void unix_stream_bpf_rebuild_protos(struct proto *prot, *prot = *base; prot->close = sock_map_close; prot->recvmsg = unix_bpf_recvmsg; + prot->sock_is_readable = sk_msg_is_readable; prot->unhash = sock_map_unhash; } -- cgit v1.2.3 From 67b821502dbd6c9b23715da79cb9b37fa7d969dc Mon Sep 17 00:00:00 2001 From: Yucong Sun Date: Fri, 8 Oct 2021 13:33:06 -0700 Subject: selftests/bpf: Use recv_timeout() instead of retries We use non-blocking sockets in those tests, retrying for EAGAIN is ugly because there is no upper bound for the packet arrival time, at least in theory. After we fix poll() on sockmap sockets, now we can switch to select()+recv(). Signed-off-by: Yucong Sun Signed-off-by: Cong Wang Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211008203306.37525-5-xiyou.wangcong@gmail.com --- .../selftests/bpf/prog_tests/sockmap_listen.c | 75 ++++++---------------- 1 file changed, 20 insertions(+), 55 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index 5c5979046523..d88bb65b74cc 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -949,7 +949,6 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd, int err, n; u32 key; char b; - int retries = 100; zero_verdict_count(verd_mapfd); @@ -1002,17 +1001,11 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd, goto close_peer1; if (pass != 1) FAIL("%s: want pass count 1, have %d", log_prefix, pass); -again: - n = read(c0, &b, 1); - if (n < 0) { - if (errno == EAGAIN && retries--) { - usleep(1000); - goto again; - } - FAIL_ERRNO("%s: read", log_prefix); - } + n = recv_timeout(c0, &b, 1, 0, IO_TIMEOUT_SEC); + if (n < 0) + FAIL_ERRNO("%s: recv_timeout", log_prefix); if (n == 0) - FAIL("%s: incomplete read", log_prefix); + FAIL("%s: incomplete recv", log_prefix); close_peer1: xclose(p1); @@ -1571,7 +1564,6 @@ static void unix_redir_to_connected(int sotype, int sock_mapfd, const char *log_prefix = redir_mode_str(mode); int c0, c1, p0, p1; unsigned int pass; - int retries = 100; int err, n; int sfd[2]; u32 key; @@ -1606,17 +1598,11 @@ static void unix_redir_to_connected(int sotype, int sock_mapfd, if (pass != 1) FAIL("%s: want pass count 1, have %d", log_prefix, pass); -again: - n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1); - if (n < 0) { - if (errno == EAGAIN && retries--) { - usleep(1000); - goto again; - } - FAIL_ERRNO("%s: read", log_prefix); - } + n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); + if (n < 0) + FAIL_ERRNO("%s: recv_timeout", log_prefix); if (n == 0) - FAIL("%s: incomplete read", log_prefix); + FAIL("%s: incomplete recv", log_prefix); close: xclose(c1); @@ -1748,7 +1734,6 @@ static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd, const char *log_prefix = redir_mode_str(mode); int c0, c1, p0, p1; unsigned int pass; - int retries = 100; int err, n; u32 key; char b; @@ -1781,17 +1766,11 @@ static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd, if (pass != 1) FAIL("%s: want pass count 1, have %d", log_prefix, pass); -again: - n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1); - if (n < 0) { - if (errno == EAGAIN && retries--) { - usleep(1000); - goto again; - } - FAIL_ERRNO("%s: read", log_prefix); - } + n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); + if (n < 0) + FAIL_ERRNO("%s: recv_timeout", log_prefix); if (n == 0) - FAIL("%s: incomplete read", log_prefix); + FAIL("%s: incomplete recv", log_prefix); close_cli1: xclose(c1); @@ -1841,7 +1820,6 @@ static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd, const char *log_prefix = redir_mode_str(mode); int c0, c1, p0, p1; unsigned int pass; - int retries = 100; int err, n; int sfd[2]; u32 key; @@ -1876,17 +1854,11 @@ static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd, if (pass != 1) FAIL("%s: want pass count 1, have %d", log_prefix, pass); -again: - n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1); - if (n < 0) { - if (errno == EAGAIN && retries--) { - usleep(1000); - goto again; - } - FAIL_ERRNO("%s: read", log_prefix); - } + n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); + if (n < 0) + FAIL_ERRNO("%s: recv_timeout", log_prefix); if (n == 0) - FAIL("%s: incomplete read", log_prefix); + FAIL("%s: incomplete recv", log_prefix); close_cli1: xclose(c1); @@ -1932,7 +1904,6 @@ static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd, int sfd[2]; u32 key; char b; - int retries = 100; zero_verdict_count(verd_mapfd); @@ -1963,17 +1934,11 @@ static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd, if (pass != 1) FAIL("%s: want pass count 1, have %d", log_prefix, pass); -again: - n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1); - if (n < 0) { - if (errno == EAGAIN && retries--) { - usleep(1000); - goto again; - } - FAIL_ERRNO("%s: read", log_prefix); - } + n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); + if (n < 0) + FAIL_ERRNO("%s: recv_timeout", log_prefix); if (n == 0) - FAIL("%s: incomplete read", log_prefix); + FAIL("%s: incomplete recv", log_prefix); close: xclose(c1); -- cgit v1.2.3 From 99d0a3831e3500d945162cdb2310e3a5fce90b60 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 21 Oct 2021 08:46:10 -1000 Subject: bpf: Move BPF_MAP_TYPE for INODE_STORAGE and TASK_STORAGE outside of CONFIG_NET bpf_types.h has BPF_MAP_TYPE_INODE_STORAGE and BPF_MAP_TYPE_TASK_STORAGE declared inside #ifdef CONFIG_NET although they are built regardless of CONFIG_NET. So, when CONFIG_BPF_SYSCALL && !CONFIG_NET, they are built without the declarations leading to spurious build failures and not registered to bpf_map_types making them unavailable. Fix it by moving the BPF_MAP_TYPE for the two map types outside of CONFIG_NET. Reported-by: kernel test robot Fixes: a10787e6d58c ("bpf: Enable task local storage for tracing programs") Signed-off-by: Tejun Heo Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/YXG1cuuSJDqHQfRY@slm.duckdns.org --- include/linux/bpf_types.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 9c81724e4b98..bbe1eefa4c8a 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -101,14 +101,14 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_trace_map_ops) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops) -#ifdef CONFIG_NET -BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) -BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops) -BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops) #ifdef CONFIG_BPF_LSM BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops) +#ifdef CONFIG_NET +BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops) #if defined(CONFIG_XDP_SOCKETS) BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops) -- cgit v1.2.3 From 54713c85f536048e685258f880bf298a74c3620d Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Tue, 26 Oct 2021 13:00:19 +0200 Subject: bpf: Fix potential race in tail call compatibility check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lorenzo noticed that the code testing for program type compatibility of tail call maps is potentially racy in that two threads could encounter a map with an unset type simultaneously and both return true even though they are inserting incompatible programs. The race window is quite small, but artificially enlarging it by adding a usleep_range() inside the check in bpf_prog_array_compatible() makes it trivial to trigger from userspace with a program that does, essentially: map_fd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, 4, 4, 2, 0); pid = fork(); if (pid) { key = 0; value = xdp_fd; } else { key = 1; value = tc_fd; } err = bpf_map_update_elem(map_fd, &key, &value, 0); While the race window is small, it has potentially serious ramifications in that triggering it would allow a BPF program to tail call to a program of a different type. So let's get rid of it by protecting the update with a spinlock. The commit in the Fixes tag is the last commit that touches the code in question. v2: - Use a spinlock instead of an atomic variable and cmpxchg() (Alexei) v3: - Put lock and the members it protects into an embedded 'owner' struct (Daniel) Fixes: 3324b584b6f6 ("ebpf: misc core cleanup") Reported-by: Lorenzo Bianconi Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211026110019.363464-1-toke@redhat.com --- include/linux/bpf.h | 7 +++++-- kernel/bpf/arraymap.c | 1 + kernel/bpf/core.c | 20 +++++++++++++------- kernel/bpf/syscall.c | 6 ++++-- 4 files changed, 23 insertions(+), 11 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 020a7d5bf470..3db6f6c95489 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -929,8 +929,11 @@ struct bpf_array_aux { * stored in the map to make sure that all callers and callees have * the same prog type and JITed flag. */ - enum bpf_prog_type type; - bool jited; + struct { + spinlock_t lock; + enum bpf_prog_type type; + bool jited; + } owner; /* Programs with direct jumps into programs part of this array. */ struct list_head poke_progs; struct bpf_map *map; diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index cebd4fb06d19..447def540544 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -1072,6 +1072,7 @@ static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr) INIT_WORK(&aux->work, prog_array_map_clear_deferred); INIT_LIST_HEAD(&aux->poke_progs); mutex_init(&aux->poke_mutex); + spin_lock_init(&aux->owner.lock); map = array_map_alloc(attr); if (IS_ERR(map)) { diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index c1e7eb3f1876..6e3ae90ad107 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1823,20 +1823,26 @@ static unsigned int __bpf_prog_ret0_warn(const void *ctx, bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp) { + bool ret; + if (fp->kprobe_override) return false; - if (!array->aux->type) { + spin_lock(&array->aux->owner.lock); + + if (!array->aux->owner.type) { /* There's no owner yet where we could check for * compatibility. */ - array->aux->type = fp->type; - array->aux->jited = fp->jited; - return true; + array->aux->owner.type = fp->type; + array->aux->owner.jited = fp->jited; + ret = true; + } else { + ret = array->aux->owner.type == fp->type && + array->aux->owner.jited == fp->jited; } - - return array->aux->type == fp->type && - array->aux->jited == fp->jited; + spin_unlock(&array->aux->owner.lock); + return ret; } static int bpf_check_tail_call(const struct bpf_prog *fp) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 9dab49d3f394..1cad6979a0d0 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -543,8 +543,10 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { array = container_of(map, struct bpf_array, map); - type = array->aux->type; - jited = array->aux->jited; + spin_lock(&array->aux->owner.lock); + type = array->aux->owner.type; + jited = array->aux->owner.jited; + spin_unlock(&array->aux->owner.lock); } seq_printf(m, -- cgit v1.2.3 From 9586e67b911c95ba158fcc247b230e9c2d718623 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Wed, 27 Oct 2021 01:51:27 +0900 Subject: block: schedule queue restart after BLK_STS_ZONE_RESOURCE When dispatching a zone append write request to a SCSI zoned block device, if the target zone of the request is already locked, the device driver will return BLK_STS_ZONE_RESOURCE and the request will be pushed back to the hctx dipatch queue. The queue will be marked as RESTART in dd_finish_request() and restarted in __blk_mq_free_request(). However, this restart applies to the hctx of the completed request. If the requeued request is on a different hctx, dispatch will no be retried until another request is submitted or the next periodic queue run triggers, leading to up to 30 seconds latency for the requeued request. Fix this problem by scheduling a queue restart similarly to the BLK_STS_RESOURCE case or when we cannot get the budget. Also, consolidate the checks into the "need_resource" variable to simplify the condition. Signed-off-by: Naohiro Aota Reviewed-by: Christoph Hellwig Cc: Niklas Cassel Link: https://lore.kernel.org/r/20211026165127.4151055-1-naohiro.aota@wdc.com Signed-off-by: Jens Axboe --- block/blk-mq.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index bc026372de43..652a31fc3bb3 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1325,6 +1325,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list, int errors, queued; blk_status_t ret = BLK_STS_OK; LIST_HEAD(zone_list); + bool needs_resource = false; if (list_empty(list)) return false; @@ -1370,6 +1371,8 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list, queued++; break; case BLK_STS_RESOURCE: + needs_resource = true; + fallthrough; case BLK_STS_DEV_RESOURCE: blk_mq_handle_dev_resource(rq, list); goto out; @@ -1380,6 +1383,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list, * accept. */ blk_mq_handle_zone_resource(rq, &zone_list); + needs_resource = true; break; default: errors++; @@ -1406,7 +1410,6 @@ out: /* For non-shared tags, the RESTART check will suffice */ bool no_tag = prep == PREP_DISPATCH_NO_TAG && (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED); - bool no_budget_avail = prep == PREP_DISPATCH_NO_BUDGET; if (nr_budgets) blk_mq_release_budgets(q, list); @@ -1447,14 +1450,16 @@ out: * If driver returns BLK_STS_RESOURCE and SCHED_RESTART * bit is set, run queue after a delay to avoid IO stalls * that could otherwise occur if the queue is idle. We'll do - * similar if we couldn't get budget and SCHED_RESTART is set. + * similar if we couldn't get budget or couldn't lock a zone + * and SCHED_RESTART is set. */ needs_restart = blk_mq_sched_needs_restart(hctx); + if (prep == PREP_DISPATCH_NO_BUDGET) + needs_resource = true; if (!needs_restart || (no_tag && list_empty_careful(&hctx->dispatch_wait.entry))) blk_mq_run_hw_queue(hctx, true); - else if (needs_restart && (ret == BLK_STS_RESOURCE || - no_budget_avail)) + else if (needs_restart && needs_resource) blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY); blk_mq_update_dispatch_busy(hctx, true); -- cgit v1.2.3 From ce7723e9cdae4eb3030da082876580f4b2dc0861 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Tue, 26 Oct 2021 19:01:55 +0530 Subject: nvme-tcp: fix possible req->offset corruption With commit db5ad6b7f8cd ("nvme-tcp: try to send request in queue_rq context") r2t and response PDU can get processed while send function is executing. Current data digest send code uses req->offset after kernel_sendmsg(), this creates a race condition where req->offset gets reset before it is used in send function. This can happen in two cases - 1. Target sends r2t PDU which resets req->offset. 2. Target send response PDU which completes the req and then req is used for a new command, nvme_tcp_setup_cmd_pdu() resets req->offset. Fix this by storing req->offset in a local variable and using this local variable after kernel_sendmsg(). Fixes: db5ad6b7f8cd ("nvme-tcp: try to send request in queue_rq context") Signed-off-by: Varun Prakash Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 0626d14e6d4c..1a209f0d7181 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1050,6 +1050,7 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req) static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req) { struct nvme_tcp_queue *queue = req->queue; + size_t offset = req->offset; int ret; struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; struct kvec iov = { @@ -1066,7 +1067,7 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req) if (unlikely(ret <= 0)) return ret; - if (req->offset + ret == NVME_TCP_DIGEST_LENGTH) { + if (offset + ret == NVME_TCP_DIGEST_LENGTH) { nvme_tcp_done_send_req(queue); return 1; } -- cgit v1.2.3 From d89b9f3bbb58e9e378881209756b0723694f22ff Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Mon, 25 Oct 2021 22:47:30 +0530 Subject: nvme-tcp: fix data digest pointer calculation ddgst is of type __le32, &req->ddgst + req->offset increases &req->ddgst by 4 * req->offset, fix this by type casting &req->ddgst to u8 *. Fixes: 3f2304f8c6d6 ("nvme-tcp: add NVMe over TCP host driver") Signed-off-by: Varun Prakash Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 1a209f0d7181..4ae562d30d2b 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1054,7 +1054,7 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req) int ret; struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; struct kvec iov = { - .iov_base = &req->ddgst + req->offset, + .iov_base = (u8 *)&req->ddgst + req->offset, .iov_len = NVME_TCP_DIGEST_LENGTH - req->offset }; -- cgit v1.2.3 From e790de54e94a7a15fb725b34724d41d41cbaa60c Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Mon, 25 Oct 2021 22:46:54 +0530 Subject: nvmet-tcp: fix data digest pointer calculation exp_ddgst is of type __le32, &cmd->exp_ddgst + cmd->offset increases &cmd->exp_ddgst by 4 * cmd->offset, fix this by type casting &cmd->exp_ddgst to u8 *. Fixes: 872d26a391da ("nvmet-tcp: add NVMe over TCP target driver") Signed-off-by: Varun Prakash Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index c33a0464346f..586ca20837e7 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -702,7 +702,7 @@ static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch) struct nvmet_tcp_queue *queue = cmd->queue; struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; struct kvec iov = { - .iov_base = &cmd->exp_ddgst + cmd->offset, + .iov_base = (u8 *)&cmd->exp_ddgst + cmd->offset, .iov_len = NVME_TCP_DIGEST_LENGTH - cmd->offset }; int ret; -- cgit v1.2.3 From 86aeda32b887cdaeb0f4b7bfc9971e36377181c7 Mon Sep 17 00:00:00 2001 From: Amit Engel Date: Wed, 27 Oct 2021 09:49:27 +0300 Subject: nvmet-tcp: fix header digest verification Pass the correct length to nvmet_tcp_verify_hdgst, which is the pdu header length. This fixes a wrong behaviour where header digest verification passes although the digest is wrong. Signed-off-by: Amit Engel Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 586ca20837e7..46c3b3be7e03 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1096,7 +1096,7 @@ recv: } if (queue->hdr_digest && - nvmet_tcp_verify_hdgst(queue, &queue->pdu, queue->offset)) { + nvmet_tcp_verify_hdgst(queue, &queue->pdu, hdr->hlen)) { nvmet_tcp_fatal_error(queue); /* fatal */ return -EPROTO; } -- cgit v1.2.3 From 3a60f6537c9adf0828bf1dd868c59f659395257e Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 27 Oct 2021 10:39:03 +0200 Subject: Revert "btrfs: compression: drop kmap/kunmap from generic helpers" This reverts commit 4c2bf276b56d8d27ddbafcdf056ef3fc60ae50b0. The kmaps in compression code are still needed and cause crashes on 32bit machines (ARM, x86). Reproducible eg. by running fstest btrfs/004 with enabled LZO or ZSTD compression. Link: https://lore.kernel.org/all/CAJCQCtT+OuemovPO7GZk8Y8=qtOObr0XTDp8jh4OHD6y84AFxw@mail.gmail.com/ Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=214839 Signed-off-by: David Sterba --- fs/btrfs/compression.c | 3 ++- fs/btrfs/inode.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 7869ad12bc6e..0913ee50e6c3 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -172,9 +172,10 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio, /* Hash through the page sector by sector */ for (pg_offset = 0; pg_offset < bytes_left; pg_offset += sectorsize) { - kaddr = page_address(page); + kaddr = kmap_atomic(page); crypto_shash_digest(shash, kaddr + pg_offset, sectorsize, csum); + kunmap_atomic(kaddr); if (memcmp(&csum, cb_sum, csum_size) != 0) { btrfs_print_data_csum_error(inode, disk_start, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2aa9646bce56..e6772f55943e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -287,8 +287,9 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, cur_size = min_t(unsigned long, compressed_size, PAGE_SIZE); - kaddr = page_address(cpage); + kaddr = kmap_atomic(cpage); write_extent_buffer(leaf, kaddr, ptr, cur_size); + kunmap_atomic(kaddr); i++; ptr += cur_size; -- cgit v1.2.3 From e0c60d0102a5ad3475401e1a2faa3d3623eefce4 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Tue, 26 Oct 2021 15:01:15 +0900 Subject: block: Fix partition check for host-aware zoned block devices Commit a33df75c6328 ("block: use an xarray for disk->part_tbl") modified the method to check partition existence in host-aware zoned block devices from disk_has_partitions() helper function call to empty check of xarray disk->part_tbl. However, disk->part_tbl always has single entry for disk->part0 and never becomes empty. This resulted in the host-aware zoned devices always judged to have partitions, and it made the sysfs queue/zoned attribute to be "none" instead of "host-aware" regardless of partition existence in the devices. This also caused DEBUG_LOCKS_WARN_ON(lock->magic != lock) for sdkp->rev_mutex in scsi layer when the kernel detects host-aware zoned device. Since block layer handled the host-aware zoned devices as non- zoned devices, scsi layer did not have chance to initialize the mutex for zone revalidation. Therefore, the warning was triggered. To fix the issues, call the helper function disk_has_partitions() in place of disk->part_tbl empty check. Since the function was removed with the commit a33df75c6328, reimplement it to walk through entries in the xarray disk->part_tbl. Fixes: a33df75c6328 ("block: use an xarray for disk->part_tbl") Signed-off-by: Shin'ichiro Kawasaki Cc: stable@vger.kernel.org # v5.14+ Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20211026060115.753746-1-shinichiro.kawasaki@wdc.com Signed-off-by: Jens Axboe --- block/blk-settings.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index a7c857ad7d10..b880c70e22e4 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -842,6 +842,24 @@ bool blk_queue_can_use_dma_map_merging(struct request_queue *q, } EXPORT_SYMBOL_GPL(blk_queue_can_use_dma_map_merging); +static bool disk_has_partitions(struct gendisk *disk) +{ + unsigned long idx; + struct block_device *part; + bool ret = false; + + rcu_read_lock(); + xa_for_each(&disk->part_tbl, idx, part) { + if (bdev_is_partition(part)) { + ret = true; + break; + } + } + rcu_read_unlock(); + + return ret; +} + /** * blk_queue_set_zoned - configure a disk queue zoned model. * @disk: the gendisk of the queue to configure @@ -876,7 +894,7 @@ void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model) * we do nothing special as far as the block layer is concerned. */ if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) || - !xa_empty(&disk->part_tbl)) + disk_has_partitions(disk)) model = BLK_ZONED_NONE; break; case BLK_ZONED_NONE: -- cgit v1.2.3 From 3bda2e5df476417b6d08967e2d84234a59d57b1c Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Wed, 27 Oct 2021 20:11:43 +0800 Subject: net: hns3: fix pause config problem after autoneg disabled If a TP port is configured by follow steps: 1.ethtool -s ethx autoneg off speed 100 duplex full 2.ethtool -A ethx rx on tx on 3.ethtool -s ethx autoneg on(rx&tx negotiated pause results are off) 4.ethtool -s ethx autoneg off speed 100 duplex full In step 3, driver will set rx&tx pause parameters of hardware to off as pause parameters negotiated with link partner are off. After step 4, the "ethtool -a ethx" command shows both rx and tx pause parameters are on. However, pause parameters of hardware are still off and port has no flow control function actually. To fix this problem, if autoneg is disabled, driver uses its saved parameters to restore pause of hardware. If the speed is not changed in this case, there is no link state changed for phy, it will cause the pause parameter is not taken effect, so we need to force phy to go down and up. Fixes: aacbe27e82f0 ("net: hns3: modify how pause options is displayed") Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 1 + drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c | 33 ++++++++++++++++------ .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 30 ++++++++++++++++++++ .../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 2 +- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h | 1 + 5 files changed, 57 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index d701451596c8..da3a593f6a56 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -568,6 +568,7 @@ struct hnae3_ae_ops { u32 *auto_neg, u32 *rx_en, u32 *tx_en); int (*set_pauseparam)(struct hnae3_handle *handle, u32 auto_neg, u32 rx_en, u32 tx_en); + int (*restore_pauseparam)(struct hnae3_handle *handle); int (*set_autoneg)(struct hnae3_handle *handle, bool enable); int (*get_autoneg)(struct hnae3_handle *handle); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index 5ebd96f6833d..7d92dd273ed7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -824,6 +824,26 @@ static int hns3_check_ksettings_param(const struct net_device *netdev, return 0; } +static int hns3_set_phy_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *cmd) +{ + struct hnae3_handle *handle = hns3_get_handle(netdev); + const struct hnae3_ae_ops *ops = handle->ae_algo->ops; + int ret; + + if (cmd->base.speed == SPEED_1000 && + cmd->base.autoneg == AUTONEG_DISABLE) + return -EINVAL; + + if (cmd->base.autoneg == AUTONEG_DISABLE && ops->restore_pauseparam) { + ret = ops->restore_pauseparam(handle); + if (ret) + return ret; + } + + return phy_ethtool_ksettings_set(netdev->phydev, cmd); +} + static int hns3_set_link_ksettings(struct net_device *netdev, const struct ethtool_link_ksettings *cmd) { @@ -842,16 +862,11 @@ static int hns3_set_link_ksettings(struct net_device *netdev, cmd->base.autoneg, cmd->base.speed, cmd->base.duplex); /* Only support ksettings_set for netdev with phy attached for now */ - if (netdev->phydev) { - if (cmd->base.speed == SPEED_1000 && - cmd->base.autoneg == AUTONEG_DISABLE) - return -EINVAL; - - return phy_ethtool_ksettings_set(netdev->phydev, cmd); - } else if (test_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, ae_dev->caps) && - ops->set_phy_link_ksettings) { + if (netdev->phydev) + return hns3_set_phy_link_ksettings(netdev, cmd); + else if (test_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, ae_dev->caps) && + ops->set_phy_link_ksettings) return ops->set_phy_link_ksettings(handle, cmd); - } if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2) return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index dcd40cc73082..c6b9806c75a5 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -11021,6 +11021,35 @@ static int hclge_set_pauseparam(struct hnae3_handle *handle, u32 auto_neg, return -EOPNOTSUPP; } +static int hclge_restore_pauseparam(struct hnae3_handle *handle) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + u32 auto_neg, rx_pause, tx_pause; + int ret; + + hclge_get_pauseparam(handle, &auto_neg, &rx_pause, &tx_pause); + /* when autoneg is disabled, the pause setting of phy has no effect + * unless the link goes down. + */ + ret = phy_suspend(hdev->hw.mac.phydev); + if (ret) + return ret; + + phy_set_asym_pause(hdev->hw.mac.phydev, rx_pause, tx_pause); + + ret = phy_resume(hdev->hw.mac.phydev); + if (ret) + return ret; + + ret = hclge_mac_pause_setup_hw(hdev); + if (ret) + dev_err(&hdev->pdev->dev, + "restore pauseparam error, ret = %d.\n", ret); + + return ret; +} + static void hclge_get_ksettings_an_result(struct hnae3_handle *handle, u8 *auto_neg, u32 *speed, u8 *duplex) { @@ -12984,6 +13013,7 @@ static const struct hnae3_ae_ops hclge_ops = { .halt_autoneg = hclge_halt_autoneg, .get_pauseparam = hclge_get_pauseparam, .set_pauseparam = hclge_set_pauseparam, + .restore_pauseparam = hclge_restore_pauseparam, .set_mtu = hclge_set_mtu, .reset_queue = hclge_reset_tqp, .get_stats = hclge_get_stats, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 95074e91a846..124791e4bfee 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -1435,7 +1435,7 @@ static int hclge_bp_setup_hw(struct hclge_dev *hdev, u8 tc) return 0; } -static int hclge_mac_pause_setup_hw(struct hclge_dev *hdev) +int hclge_mac_pause_setup_hw(struct hclge_dev *hdev) { bool tx_en, rx_en; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h index 2ee9b795f71d..4b2c3a788980 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h @@ -244,6 +244,7 @@ int hclge_tm_get_pri_weight(struct hclge_dev *hdev, u8 pri_id, u8 *weight); int hclge_tm_get_pri_shaper(struct hclge_dev *hdev, u8 pri_id, enum hclge_opcode_type cmd, struct hclge_tm_shaper_para *para); +int hclge_mac_pause_setup_hw(struct hclge_dev *hdev); int hclge_tm_get_q_to_qs_map(struct hclge_dev *hdev, u16 q_id, u16 *qset_id); int hclge_tm_get_q_to_tc(struct hclge_dev *hdev, u16 q_id, u8 *tc_id); int hclge_tm_get_pg_to_pri_map(struct hclge_dev *hdev, u8 pg_id, -- cgit v1.2.3 From f29da4088fb4eeba457219a931327d1d5f45196a Mon Sep 17 00:00:00 2001 From: Yufeng Mo Date: Wed, 27 Oct 2021 20:11:44 +0800 Subject: net: hns3: change hclge/hclgevf workqueue to WQ_UNBOUND mode Currently, the workqueue of hclge/hclgevf is executed on the CPU that initiates scheduling requests by default. In stress scenarios, the CPU may be busy and workqueue scheduling is completed after a long period of time. To avoid this situation and implement proper scheduling, use the WQ_UNBOUND mode instead. In this way, the workqueue can be performed on a relatively idle CPU. Signed-off-by: Yufeng Mo Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 34 ++++------------------ .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 1 - .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 2 +- 3 files changed, 6 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index c6b9806c75a5..3dbde0496545 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2847,33 +2847,28 @@ static void hclge_mbx_task_schedule(struct hclge_dev *hdev) { if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) && !test_and_set_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state)) - mod_delayed_work_on(cpumask_first(&hdev->affinity_mask), - hclge_wq, &hdev->service_task, 0); + mod_delayed_work(hclge_wq, &hdev->service_task, 0); } static void hclge_reset_task_schedule(struct hclge_dev *hdev) { if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) && !test_and_set_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state)) - mod_delayed_work_on(cpumask_first(&hdev->affinity_mask), - hclge_wq, &hdev->service_task, 0); + mod_delayed_work(hclge_wq, &hdev->service_task, 0); } static void hclge_errhand_task_schedule(struct hclge_dev *hdev) { if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) && !test_and_set_bit(HCLGE_STATE_ERR_SERVICE_SCHED, &hdev->state)) - mod_delayed_work_on(cpumask_first(&hdev->affinity_mask), - hclge_wq, &hdev->service_task, 0); + mod_delayed_work(hclge_wq, &hdev->service_task, 0); } void hclge_task_schedule(struct hclge_dev *hdev, unsigned long delay_time) { if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) && !test_bit(HCLGE_STATE_RST_FAIL, &hdev->state)) - mod_delayed_work_on(cpumask_first(&hdev->affinity_mask), - hclge_wq, &hdev->service_task, - delay_time); + mod_delayed_work(hclge_wq, &hdev->service_task, delay_time); } static int hclge_get_mac_link_status(struct hclge_dev *hdev, int *link_status) @@ -3491,33 +3486,14 @@ static void hclge_get_misc_vector(struct hclge_dev *hdev) hdev->num_msi_used += 1; } -static void hclge_irq_affinity_notify(struct irq_affinity_notify *notify, - const cpumask_t *mask) -{ - struct hclge_dev *hdev = container_of(notify, struct hclge_dev, - affinity_notify); - - cpumask_copy(&hdev->affinity_mask, mask); -} - -static void hclge_irq_affinity_release(struct kref *ref) -{ -} - static void hclge_misc_affinity_setup(struct hclge_dev *hdev) { irq_set_affinity_hint(hdev->misc_vector.vector_irq, &hdev->affinity_mask); - - hdev->affinity_notify.notify = hclge_irq_affinity_notify; - hdev->affinity_notify.release = hclge_irq_affinity_release; - irq_set_affinity_notifier(hdev->misc_vector.vector_irq, - &hdev->affinity_notify); } static void hclge_misc_affinity_teardown(struct hclge_dev *hdev) { - irq_set_affinity_notifier(hdev->misc_vector.vector_irq, NULL); irq_set_affinity_hint(hdev->misc_vector.vector_irq, NULL); } @@ -13082,7 +13058,7 @@ static int hclge_init(void) { pr_info("%s is initializing\n", HCLGE_NAME); - hclge_wq = alloc_workqueue("%s", 0, 0, HCLGE_NAME); + hclge_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, HCLGE_NAME); if (!hclge_wq) { pr_err("%s: failed to create workqueue\n", HCLGE_NAME); return -ENOMEM; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index de6afbcbfbac..69cd8f87b4c8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -944,7 +944,6 @@ struct hclge_dev { /* affinity mask and notify for misc interrupt */ cpumask_t affinity_mask; - struct irq_affinity_notify affinity_notify; struct hclge_ptp *ptp; struct devlink *devlink; }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index bef6b98e2f50..5efa5420297d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -3899,7 +3899,7 @@ static int hclgevf_init(void) { pr_info("%s is initializing\n", HCLGEVF_NAME); - hclgevf_wq = alloc_workqueue("%s", 0, 0, HCLGEVF_NAME); + hclgevf_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, HCLGEVF_NAME); if (!hclgevf_wq) { pr_err("%s: failed to create workqueue\n", HCLGEVF_NAME); return -ENOMEM; -- cgit v1.2.3 From 0251d196b0e1a19c870be882e5d4f496de8ab758 Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Wed, 27 Oct 2021 20:11:45 +0800 Subject: net: hns3: ignore reset event before initialization process is done Currently, if there is a reset event triggered by RAS during device in initialization process, driver may run reset process concurrently with initialization process. In this case, it may cause problem. For example, the RSS indirection table may has not been alloc memory in initialization process yet, but it is used in reset process, it will cause a call trace like this: [61228.744836] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 ... [61228.897677] Workqueue: hclgevf hclgevf_service_task [hclgevf] [61228.911390] pstate: 40400009 (nZcv daif +PAN -UAO -TCO BTYPE=--) [61228.918670] pc : hclgevf_set_rss_indir_table+0xb4/0x190 [hclgevf] [61228.927812] lr : hclgevf_set_rss_indir_table+0x90/0x190 [hclgevf] [61228.937248] sp : ffff8000162ebb50 [61228.941087] x29: ffff8000162ebb50 x28: ffffb77add72dbc0 x27: ffff0820c7dc8080 [61228.949516] x26: 0000000000000000 x25: ffff0820ad4fc880 x24: ffff0820c7dc8080 [61228.958220] x23: ffff0820c7dc8090 x22: 00000000ffffffff x21: 0000000000000040 [61228.966360] x20: ffffb77add72b9c0 x19: 0000000000000000 x18: 0000000000000030 [61228.974646] x17: 0000000000000000 x16: ffffb77ae713feb0 x15: ffff0820ad4fcce8 [61228.982808] x14: ffffffffffffffff x13: ffff8000962eb7f7 x12: 00003834ec70c960 [61228.991990] x11: 00e0fafa8c206982 x10: 9670facc78a8f9a8 x9 : ffffb77add717530 [61229.001123] x8 : ffff0820ad4fd6b8 x7 : 0000000000000000 x6 : 0000000000000011 [61229.010249] x5 : 00000000000cb1b0 x4 : 0000000000002adb x3 : 0000000000000049 [61229.018662] x2 : ffff8000162ebbb8 x1 : 0000000000000000 x0 : 0000000000000480 [61229.027002] Call trace: [61229.030177] hclgevf_set_rss_indir_table+0xb4/0x190 [hclgevf] [61229.039009] hclgevf_rss_init_hw+0x128/0x1b4 [hclgevf] [61229.046809] hclgevf_reset_rebuild+0x17c/0x69c [hclgevf] [61229.053862] hclgevf_reset_service_task+0x4cc/0xa80 [hclgevf] [61229.061306] hclgevf_service_task+0x6c/0x630 [hclgevf] [61229.068491] process_one_work+0x1dc/0x48c [61229.074121] worker_thread+0x15c/0x464 [61229.078562] kthread+0x168/0x16c [61229.082873] ret_from_fork+0x10/0x18 [61229.088221] Code: 7900e7f6 f904a683 d503201f 9101a3e2 (38616b43) [61229.095357] ---[ end trace 153661a538f6768c ]--- To fix this problem, don't schedule reset task before initialization process is done. Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 1 + drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 3 +++ drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h | 1 + 3 files changed, 5 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 3dbde0496545..269e579762b2 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2853,6 +2853,7 @@ static void hclge_mbx_task_schedule(struct hclge_dev *hdev) static void hclge_reset_task_schedule(struct hclge_dev *hdev) { if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) && + test_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state) && !test_and_set_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state)) mod_delayed_work(hclge_wq, &hdev->service_task, 0); } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 5efa5420297d..cf00ad7bb881 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -2232,6 +2232,7 @@ static void hclgevf_get_misc_vector(struct hclgevf_dev *hdev) void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev) { if (!test_bit(HCLGEVF_STATE_REMOVING, &hdev->state) && + test_bit(HCLGEVF_STATE_SERVICE_INITED, &hdev->state) && !test_and_set_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state)) mod_delayed_work(hclgevf_wq, &hdev->service_task, 0); @@ -3449,6 +3450,8 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev) hclgevf_init_rxd_adv_layout(hdev); + set_bit(HCLGEVF_STATE_SERVICE_INITED, &hdev->state); + hdev->last_reset_time = jiffies; dev_info(&hdev->pdev->dev, "finished initializing %s driver\n", HCLGEVF_DRIVER_NAME); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index 883130a9b48f..28288d7e3303 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -146,6 +146,7 @@ enum hclgevf_states { HCLGEVF_STATE_REMOVING, HCLGEVF_STATE_NIC_REGISTERED, HCLGEVF_STATE_ROCE_REGISTERED, + HCLGEVF_STATE_SERVICE_INITED, /* task states */ HCLGEVF_STATE_RST_SERVICE_SCHED, HCLGEVF_STATE_RST_HANDLING, -- cgit v1.2.3 From 2a21dab594a98c338c4bfbc31864cbca15888549 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Wed, 27 Oct 2021 20:11:46 +0800 Subject: net: hns3: fix data endian problem of some functions of debugfs The member data in struct hclge_desc is type of __le32, it needs endian conversion before using it, and some functions of debugfs didn't do that, so this patch fixes it. Fixes: c0ebebb9ccc1 ("net: hns3: Add "dcb register" status information query function") Signed-off-by: Jie Wang Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- .../ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c | 30 ++++++++++------------ 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c index 32f62cd2dd99..9cda8b3562b8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c @@ -391,7 +391,7 @@ static int hclge_dbg_dump_mac(struct hclge_dev *hdev, char *buf, int len) static int hclge_dbg_dump_dcb_qset(struct hclge_dev *hdev, char *buf, int len, int *pos) { - struct hclge_dbg_bitmap_cmd *bitmap; + struct hclge_dbg_bitmap_cmd req; struct hclge_desc desc; u16 qset_id, qset_num; int ret; @@ -408,12 +408,12 @@ static int hclge_dbg_dump_dcb_qset(struct hclge_dev *hdev, char *buf, int len, if (ret) return ret; - bitmap = (struct hclge_dbg_bitmap_cmd *)&desc.data[1]; + req.bitmap = (u8)le32_to_cpu(desc.data[1]); *pos += scnprintf(buf + *pos, len - *pos, "%04u %#x %#x %#x %#x\n", - qset_id, bitmap->bit0, bitmap->bit1, - bitmap->bit2, bitmap->bit3); + qset_id, req.bit0, req.bit1, req.bit2, + req.bit3); } return 0; @@ -422,7 +422,7 @@ static int hclge_dbg_dump_dcb_qset(struct hclge_dev *hdev, char *buf, int len, static int hclge_dbg_dump_dcb_pri(struct hclge_dev *hdev, char *buf, int len, int *pos) { - struct hclge_dbg_bitmap_cmd *bitmap; + struct hclge_dbg_bitmap_cmd req; struct hclge_desc desc; u8 pri_id, pri_num; int ret; @@ -439,12 +439,11 @@ static int hclge_dbg_dump_dcb_pri(struct hclge_dev *hdev, char *buf, int len, if (ret) return ret; - bitmap = (struct hclge_dbg_bitmap_cmd *)&desc.data[1]; + req.bitmap = (u8)le32_to_cpu(desc.data[1]); *pos += scnprintf(buf + *pos, len - *pos, "%03u %#x %#x %#x\n", - pri_id, bitmap->bit0, bitmap->bit1, - bitmap->bit2); + pri_id, req.bit0, req.bit1, req.bit2); } return 0; @@ -453,7 +452,7 @@ static int hclge_dbg_dump_dcb_pri(struct hclge_dev *hdev, char *buf, int len, static int hclge_dbg_dump_dcb_pg(struct hclge_dev *hdev, char *buf, int len, int *pos) { - struct hclge_dbg_bitmap_cmd *bitmap; + struct hclge_dbg_bitmap_cmd req; struct hclge_desc desc; u8 pg_id; int ret; @@ -466,12 +465,11 @@ static int hclge_dbg_dump_dcb_pg(struct hclge_dev *hdev, char *buf, int len, if (ret) return ret; - bitmap = (struct hclge_dbg_bitmap_cmd *)&desc.data[1]; + req.bitmap = (u8)le32_to_cpu(desc.data[1]); *pos += scnprintf(buf + *pos, len - *pos, "%03u %#x %#x %#x\n", - pg_id, bitmap->bit0, bitmap->bit1, - bitmap->bit2); + pg_id, req.bit0, req.bit1, req.bit2); } return 0; @@ -511,7 +509,7 @@ static int hclge_dbg_dump_dcb_queue(struct hclge_dev *hdev, char *buf, int len, static int hclge_dbg_dump_dcb_port(struct hclge_dev *hdev, char *buf, int len, int *pos) { - struct hclge_dbg_bitmap_cmd *bitmap; + struct hclge_dbg_bitmap_cmd req; struct hclge_desc desc; u8 port_id = 0; int ret; @@ -521,12 +519,12 @@ static int hclge_dbg_dump_dcb_port(struct hclge_dev *hdev, char *buf, int len, if (ret) return ret; - bitmap = (struct hclge_dbg_bitmap_cmd *)&desc.data[1]; + req.bitmap = (u8)le32_to_cpu(desc.data[1]); *pos += scnprintf(buf + *pos, len - *pos, "port_mask: %#x\n", - bitmap->bit0); + req.bit0); *pos += scnprintf(buf + *pos, len - *pos, "port_shaping_pass: %#x\n", - bitmap->bit1); + req.bit1); return 0; } -- cgit v1.2.3 From 6754614a787cbcbf87bae8a75619c24a33ea6791 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Wed, 27 Oct 2021 20:11:47 +0800 Subject: net: hns3: add more string spaces for dumping packets number of queue info in debugfs As the width of packets number registers is 32 bits, they needs at most 10 characters for decimal data printing, but now the string spaces is not enough, so this patch fixes it. Fixes: e44c495d95e ("net: hns3: refactor queue info of debugfs") Signed-off-by: Jie Wang Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index 2b66c59f5eaf..3aaad96d0176 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -462,7 +462,7 @@ static const struct hns3_dbg_item rx_queue_info_items[] = { { "TAIL", 2 }, { "HEAD", 2 }, { "FBDNUM", 2 }, - { "PKTNUM", 2 }, + { "PKTNUM", 5 }, { "COPYBREAK", 2 }, { "RING_EN", 2 }, { "RX_RING_EN", 2 }, @@ -565,7 +565,7 @@ static const struct hns3_dbg_item tx_queue_info_items[] = { { "HEAD", 2 }, { "FBDNUM", 2 }, { "OFFSET", 2 }, - { "PKTNUM", 2 }, + { "PKTNUM", 5 }, { "RING_EN", 2 }, { "TX_RING_EN", 2 }, { "BASE_ADDR", 10 }, -- cgit v1.2.3 From c7a6e3978ea952efb107ecf511c095c3bbb2945f Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Wed, 27 Oct 2021 20:11:48 +0800 Subject: net: hns3: expand buffer len for some debugfs command The specified buffer length for three debugfs files fd_tcam, uc and tqp is not enough for their maximum needs, so this patch fixes them. Fixes: b5a0b70d77b9 ("net: hns3: refactor dump fd tcam of debugfs") Fixes: 1556ea9120ff ("net: hns3: refactor dump mac list of debugfs") Fixes: d96b0e59468d ("net: hns3: refactor dump reg of debugfs") Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index 3aaad96d0176..f2ade0446208 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -137,7 +137,7 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = { .name = "uc", .cmd = HNAE3_DBG_CMD_MAC_UC, .dentry = HNS3_DBG_DENTRY_MAC, - .buf_len = HNS3_DBG_READ_LEN, + .buf_len = HNS3_DBG_READ_LEN_128KB, .init = hns3_dbg_common_file_init, }, { @@ -256,7 +256,7 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = { .name = "tqp", .cmd = HNAE3_DBG_CMD_REG_TQP, .dentry = HNS3_DBG_DENTRY_REG, - .buf_len = HNS3_DBG_READ_LEN, + .buf_len = HNS3_DBG_READ_LEN_128KB, .init = hns3_dbg_common_file_init, }, { @@ -298,7 +298,7 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = { .name = "fd_tcam", .cmd = HNAE3_DBG_CMD_FD_TCAM, .dentry = HNS3_DBG_DENTRY_FD, - .buf_len = HNS3_DBG_READ_LEN, + .buf_len = HNS3_DBG_READ_LEN_1MB, .init = hns3_dbg_common_file_init, }, { -- cgit v1.2.3 From 630a6738da82e2c29e1c38eafd6f8328e0a4963c Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Wed, 27 Oct 2021 20:11:49 +0800 Subject: net: hns3: adjust string spaces of some parameters of tx bd info in debugfs This patch adjusts the string spaces of some parameters of tx bd info in debugfs according to their maximum needs. Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index f2ade0446208..e54f96251fea 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -790,13 +790,13 @@ static int hns3_dbg_rx_bd_info(struct hns3_dbg_data *d, char *buf, int len) } static const struct hns3_dbg_item tx_bd_info_items[] = { - { "BD_IDX", 5 }, - { "ADDRESS", 2 }, + { "BD_IDX", 2 }, + { "ADDRESS", 13 }, { "VLAN_TAG", 2 }, { "SIZE", 2 }, { "T_CS_VLAN_TSO", 2 }, { "OT_VLAN_TAG", 3 }, - { "TV", 2 }, + { "TV", 5 }, { "OLT_VLAN_LEN", 2 }, { "PAYLEN_OL4CS", 2 }, { "BD_FE_SC_VLD", 2 }, -- cgit v1.2.3 From 4a089e95b4d6bb625044d47aed0c442a8f7bd093 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 20 Oct 2021 12:11:16 -0700 Subject: nios2: Make NIOS2_DTB_SOURCE_BOOL depend on !COMPILE_TEST nios2:allmodconfig builds fail with make[1]: *** No rule to make target 'arch/nios2/boot/dts/""', needed by 'arch/nios2/boot/dts/built-in.a'. Stop. make: [Makefile:1868: arch/nios2/boot/dts] Error 2 (ignored) This is seen with compile tests since those enable NIOS2_DTB_SOURCE_BOOL, which in turn enables NIOS2_DTB_SOURCE. This causes the build error because the default value for NIOS2_DTB_SOURCE is an empty string. Disable NIOS2_DTB_SOURCE_BOOL for compile tests to avoid the error. Fixes: 2fc8483fdcde ("nios2: Build infrastructure") Signed-off-by: Guenter Roeck Reviewed-by: Randy Dunlap Signed-off-by: Dinh Nguyen --- arch/nios2/platform/Kconfig.platform | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/nios2/platform/Kconfig.platform b/arch/nios2/platform/Kconfig.platform index 9e32fb7f3d4c..e849daff6fd1 100644 --- a/arch/nios2/platform/Kconfig.platform +++ b/arch/nios2/platform/Kconfig.platform @@ -37,6 +37,7 @@ config NIOS2_DTB_PHYS_ADDR config NIOS2_DTB_SOURCE_BOOL bool "Compile and link device tree into kernel image" + depends on !COMPILE_TEST help This allows you to specify a dts (device tree source) file which will be compiled and linked into the kernel image. -- cgit v1.2.3 From 9b0971ca7fc75daca80c0bb6c02e96059daea90a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 25 Oct 2021 12:14:31 -0400 Subject: KVM: SEV-ES: fix another issue with string I/O VMGEXITs If the guest requests string I/O from the hypervisor via VMGEXIT, SW_EXITINFO2 will contain the REP count. However, sev_es_string_io was incorrectly treating it as the size of the GHCB buffer in bytes. This fixes the "outsw" test in the experimental SEV tests of kvm-unit-tests. Cc: stable@vger.kernel.org Fixes: 7ed9abfe8e9f ("KVM: SVM: Support string IO operations for an SEV-ES guest") Reported-by: Marc Orr Tested-by: Marc Orr Reviewed-by: Marc Orr Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index e672493b5d8d..efd207fd335e 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2579,11 +2579,20 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) { - if (!setup_vmgexit_scratch(svm, in, svm->vmcb->control.exit_info_2)) + int count; + int bytes; + + if (svm->vmcb->control.exit_info_2 > INT_MAX) + return -EINVAL; + + count = svm->vmcb->control.exit_info_2; + if (unlikely(check_mul_overflow(count, size, &bytes))) + return -EINVAL; + + if (!setup_vmgexit_scratch(svm, in, bytes)) return -EINVAL; - return kvm_sev_es_string_io(&svm->vcpu, size, port, - svm->ghcb_sa, svm->ghcb_sa_len / size, in); + return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->ghcb_sa, count, in); } void sev_es_init_vmcb(struct vcpu_svm *svm) -- cgit v1.2.3 From 4e84dc47bb48accbbeeba4e6bb3f31aa7895323c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 27 Oct 2021 12:51:01 -0400 Subject: ftrace/nds32: Update the proto for ftrace_trace_function to match ftrace_stub The ftrace callback prototype was changed to pass a special ftrace_regs instead of pt_regs as the last parameter, but the static ftrace for nds32 missed updating ftrace_trace_function and this caused a warning when compared to ftrace_stub: ../arch/nds32/kernel/ftrace.c: In function '_mcount': ../arch/nds32/kernel/ftrace.c:24:35: error: comparison of distinct pointer types lacks a cast [-Werror] 24 | if (ftrace_trace_function != ftrace_stub) | ^~ Link: https://lore.kernel.org/all/20211027055554.19372-1-rdunlap@infradead.org/ Link: https://lkml.kernel.org/r/20211027125101.33449969@gandalf.local.home Cc: Ingo Molnar Cc: Andrew Morton Cc: Nick Hu Cc: Greentime Hu Cc: Vincent Chen Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: stable@vger.kernel.org Fixes: d19ad0775dcd6 ("ftrace: Have the callbacks receive a struct ftrace_regs instead of pt_regs") Reported-by: Randy Dunlap Signed-off-by: Steven Rostedt (VMware) --- arch/nds32/kernel/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c index 0e23e3a8df6b..d55b73b18149 100644 --- a/arch/nds32/kernel/ftrace.c +++ b/arch/nds32/kernel/ftrace.c @@ -6,7 +6,7 @@ #ifndef CONFIG_DYNAMIC_FTRACE extern void (*ftrace_trace_function)(unsigned long, unsigned long, - struct ftrace_ops*, struct pt_regs*); + struct ftrace_ops*, struct ftrace_regs*); extern void ftrace_graph_caller(void); noinline void __naked ftrace_stub(unsigned long ip, unsigned long parent_ip, -- cgit v1.2.3 From 6f7c88691191e6c52ef2543d6f1da8d360b27a24 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Tue, 26 Oct 2021 20:40:15 +0800 Subject: usbnet: fix error return code in usbnet_probe() Return error code if usb_maxpacket() returns 0 in usbnet_probe() Fixes: 397430b50a36 ("usbnet: sanity check for maxpacket") Reported-by: Hulk Robot Signed-off-by: Wang Hai Reviewed-by: Johan Hovold Link: https://lore.kernel.org/r/20211026124015.3025136-1-wanghai38@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/usbnet.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 80432ee0ce69..a33d7fb82a00 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1790,6 +1790,7 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) dev->maxpacket = usb_maxpacket (dev->udev, dev->out, 1); if (dev->maxpacket == 0) { /* that is a broken device */ + status = -ENODEV; goto out4; } -- cgit v1.2.3 From 890d33561337ffeba0d8ba42517e71288cfee2b6 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Tue, 26 Oct 2021 15:31:00 +0200 Subject: virtio-ring: fix DMA metadata flags The flags are currently overwritten, leading to the wrong direction being passed to the DMA unmap functions. Fixes: 72b5e8958738aaa4 ("virtio-ring: store DMA metadata in desc_extra for split virtqueue") Signed-off-by: Vincent Whitchurch Link: https://lore.kernel.org/r/20211026133100.17541-1-vincent.whitchurch@axis.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/virtio/virtio_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index dd95dfd85e98..3035bb6f5458 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -576,7 +576,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, /* Last one doesn't continue. */ desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); if (!indirect && vq->use_dma_api) - vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags = + vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &= ~VRING_DESC_F_NEXT; if (indirect) { -- cgit v1.2.3 From 64a19591a2938b170aa736443d5d3bf4c51e1388 Mon Sep 17 00:00:00 2001 From: Chen Lu <181250012@smail.nju.edu.cn> Date: Mon, 18 Oct 2021 13:22:38 +0800 Subject: riscv: fix misalgned trap vector base address The trap vector marked by label .Lsecondary_park must align on a 4-byte boundary, as the {m,s}tvec is defined to require 4-byte alignment. Signed-off-by: Chen Lu <181250012@smail.nju.edu.cn> Reviewed-by: Anup Patel Fixes: e011995e826f ("RISC-V: Move relocate and few other functions out of __init") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/head.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index fce5184b22c3..52c5ff9804c5 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -193,6 +193,7 @@ setup_trap_vector: csrw CSR_SCRATCH, zero ret +.align 2 .Lsecondary_park: /* We lack SMP support or have too many harts, so park this hart */ wfi -- cgit v1.2.3 From f82161516756be2827609ec4845e1e4316df0709 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 24 Oct 2021 09:38:31 -0700 Subject: ptp: Document the PTP_CLK_MAGIC ioctl number Add PTP_CLK_MAGIC to the userspace-api/ioctl/ioctl-number.rst documentation file. Fixes: d94ba80ebbea ("ptp: Added a brand new class driver for ptp clocks.") Signed-off-by: Randy Dunlap Cc: Richard Cochran Cc: John Stultz Cc: Jonathan Corbet Link: https://lore.kernel.org/r/20211024163831.10200-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski --- Documentation/userspace-api/ioctl/ioctl-number.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst index 2e8134059c87..6655d929a351 100644 --- a/Documentation/userspace-api/ioctl/ioctl-number.rst +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst @@ -104,6 +104,7 @@ Code Seq# Include File Comments '8' all SNP8023 advanced NIC card ';' 64-7F linux/vfio.h +'=' 00-3f uapi/linux/ptp_clock.h '@' 00-0F linux/radeonfb.h conflict! '@' 00-0F drivers/video/aty/aty128fb.c conflict! 'A' 00-1F linux/apm_bios.h conflict! -- cgit v1.2.3 From 72f898ca0ab85fde6facf78b14d9f67a4a7b32d1 Mon Sep 17 00:00:00 2001 From: Janghyub Seo Date: Tue, 26 Oct 2021 07:12:42 +0000 Subject: r8169: Add device 10ec:8162 to driver r8169 This patch makes the driver r8169 pick up device Realtek Semiconductor Co. , Ltd. Device [10ec:8162]. Signed-off-by: Janghyub Seo Suggested-by: Rushab Shah Link: https://lore.kernel.org/r/1635231849296.1489250046.441294000@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 46a6ff9a782d..2918947dd57c 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -157,6 +157,7 @@ static const struct pci_device_id rtl8169_pci_tbl[] = { { PCI_VDEVICE(REALTEK, 0x8129) }, { PCI_VDEVICE(REALTEK, 0x8136), RTL_CFG_NO_GBIT }, { PCI_VDEVICE(REALTEK, 0x8161) }, + { PCI_VDEVICE(REALTEK, 0x8162) }, { PCI_VDEVICE(REALTEK, 0x8167) }, { PCI_VDEVICE(REALTEK, 0x8168) }, { PCI_VDEVICE(NCUBE, 0x8168) }, -- cgit v1.2.3 From 7fa598f9706d40bd16f2ab286bdf5808e1393d35 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 27 Oct 2021 12:08:54 -0400 Subject: tracing: Do not warn when connecting eprobe to non existing event When the syscall trace points are not configured in, the kselftests for ftrace will try to attach an event probe (eprobe) to one of the system call trace points. This triggered a WARNING, because the failure only expects to see memory issues. But this is not the only failure. The user may attempt to attach to a non existent event, and the kernel must not warn about it. Link: https://lkml.kernel.org/r/20211027120854.0680aa0f@gandalf.local.home Fixes: 7491e2c442781 ("tracing: Add a probe that attaches to trace events") Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_eprobe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index c4a15aef36af..5c5f208c15d3 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -904,8 +904,8 @@ static int __trace_eprobe_create(int argc, const char *argv[]) if (IS_ERR(ep)) { ret = PTR_ERR(ep); - /* This must return -ENOMEM, else there is a bug */ - WARN_ON_ONCE(ret != -ENOMEM); + /* This must return -ENOMEM or misssing event, else there is a bug */ + WARN_ON_ONCE(ret != -ENOMEM && ret != -ENODEV); ep = NULL; goto error; } -- cgit v1.2.3 From 3f4e54bd312d3dafb59daf2b97ffa08abebe60f5 Mon Sep 17 00:00:00 2001 From: Patrik Jakobsson Date: Wed, 27 Oct 2021 16:27:30 +0200 Subject: drm/amdgpu: Fix even more out of bound writes from debugfs CVE-2021-42327 was fixed by: commit f23750b5b3d98653b31d4469592935ef6364ad67 Author: Thelford Williams Date: Wed Oct 13 16:04:13 2021 -0400 drm/amdgpu: fix out of bounds write but amdgpu_dm_debugfs.c contains more of the same issue so fix the remaining ones. v2: * Add missing fix in dp_max_bpc_write (Harry Wentland) Fixes: 918698d5c2b5 ("drm/amd/display: Return the number of bytes parsed than allocated") Signed-off-by: Patrik Jakobsson Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 17f2756a64dc..8080bba5b7a7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -487,7 +487,7 @@ static ssize_t dp_phy_settings_write(struct file *f, const char __user *buf, if (!wr_buf) return -ENOSPC; - if (parse_write_buffer_into_params(wr_buf, size, + if (parse_write_buffer_into_params(wr_buf, wr_buf_size, (long *)param, buf, max_param_num, ¶m_nums)) { @@ -639,7 +639,7 @@ static ssize_t dp_phy_test_pattern_debugfs_write(struct file *f, const char __us if (!wr_buf) return -ENOSPC; - if (parse_write_buffer_into_params(wr_buf, size, + if (parse_write_buffer_into_params(wr_buf, wr_buf_size, (long *)param, buf, max_param_num, ¶m_nums)) { @@ -914,7 +914,7 @@ static ssize_t dp_dsc_passthrough_set(struct file *f, const char __user *buf, return -ENOSPC; } - if (parse_write_buffer_into_params(wr_buf, size, + if (parse_write_buffer_into_params(wr_buf, wr_buf_size, ¶m, buf, max_param_num, ¶m_nums)) { @@ -1211,7 +1211,7 @@ static ssize_t trigger_hotplug(struct file *f, const char __user *buf, return -ENOSPC; } - if (parse_write_buffer_into_params(wr_buf, size, + if (parse_write_buffer_into_params(wr_buf, wr_buf_size, (long *)param, buf, max_param_num, ¶m_nums)) { @@ -1396,7 +1396,7 @@ static ssize_t dp_dsc_clock_en_write(struct file *f, const char __user *buf, return -ENOSPC; } - if (parse_write_buffer_into_params(wr_buf, size, + if (parse_write_buffer_into_params(wr_buf, wr_buf_size, (long *)param, buf, max_param_num, ¶m_nums)) { @@ -1581,7 +1581,7 @@ static ssize_t dp_dsc_slice_width_write(struct file *f, const char __user *buf, return -ENOSPC; } - if (parse_write_buffer_into_params(wr_buf, size, + if (parse_write_buffer_into_params(wr_buf, wr_buf_size, (long *)param, buf, max_param_num, ¶m_nums)) { @@ -1766,7 +1766,7 @@ static ssize_t dp_dsc_slice_height_write(struct file *f, const char __user *buf, return -ENOSPC; } - if (parse_write_buffer_into_params(wr_buf, size, + if (parse_write_buffer_into_params(wr_buf, wr_buf_size, (long *)param, buf, max_param_num, ¶m_nums)) { @@ -1944,7 +1944,7 @@ static ssize_t dp_dsc_bits_per_pixel_write(struct file *f, const char __user *bu return -ENOSPC; } - if (parse_write_buffer_into_params(wr_buf, size, + if (parse_write_buffer_into_params(wr_buf, wr_buf_size, (long *)param, buf, max_param_num, ¶m_nums)) { @@ -2382,7 +2382,7 @@ static ssize_t dp_max_bpc_write(struct file *f, const char __user *buf, return -ENOSPC; } - if (parse_write_buffer_into_params(wr_buf, size, + if (parse_write_buffer_into_params(wr_buf, wr_buf_size, (long *)param, buf, max_param_num, ¶m_nums)) { -- cgit v1.2.3 From 54149d13f369e1ab02f36b91feee02069184c1d8 Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Thu, 21 Oct 2021 13:27:16 -0400 Subject: drm/amd/display: Fallback to clocks which meet requested voltage on DCN31 [WHY] On certain configs, SMU clock table voltages don't match which cause parser to behave incorrectly by leaving dcfclk and socclk table entries unpopulated. [HOW] Currently the function that finds the corresponding clock for a given voltage only checks for exact voltage level matches. In the case that no match gets found, parser now falls back to searching for the max clock which meets the requested voltage (i.e. its corresponding voltage is below requested). Signed-off-by: Michael Strauss Reviewed-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c index 035ba0ef6369..377c4e53a2b3 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c @@ -518,14 +518,21 @@ static unsigned int find_clk_for_voltage( unsigned int voltage) { int i; + int max_voltage = 0; + int clock = 0; for (i = 0; i < NUM_SOC_VOLTAGE_LEVELS; i++) { - if (clock_table->SocVoltage[i] == voltage) + if (clock_table->SocVoltage[i] == voltage) { return clocks[i]; + } else if (clock_table->SocVoltage[i] >= max_voltage && + clock_table->SocVoltage[i] < voltage) { + max_voltage = clock_table->SocVoltage[i]; + clock = clocks[i]; + } } - ASSERT(0); - return 0; + ASSERT(clock); + return clock; } void dcn31_clk_mgr_helper_populate_bw_params( -- cgit v1.2.3 From ad76744b041d8c87ef1c9adbb04fb7eaa20a179e Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Fri, 22 Oct 2021 16:14:24 -0400 Subject: drm/amd/display: Fix deadlock when falling back to v2 from v3 [Why] A deadlock in the kernel occurs when we fallback from the V3 to V2 add_topology_to_display or remove_topology_to_display because they both try to acquire the dtm_mutex but recursive locking isn't supported on mutex_lock(). [How] Make the mutex_lock/unlock more fine grained and move them up such that they're only required for the psp invocation itself. Fixes: bf62221e9d0e ("drm/amd/display: Add DCN3.1 HDCP support") Signed-off-by: Nicholas Kazlauskas Reviewed-by: Aric Cyr Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c index e9bd84ec027d..be61975f1470 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c @@ -105,6 +105,7 @@ static enum mod_hdcp_status remove_display_from_topology_v3( dtm_cmd->dtm_status = TA_DTM_STATUS__GENERIC_FAILURE; psp_dtm_invoke(psp, dtm_cmd->cmd_id); + mutex_unlock(&psp->dtm_context.mutex); if (dtm_cmd->dtm_status != TA_DTM_STATUS__SUCCESS) { status = remove_display_from_topology_v2(hdcp, index); @@ -115,8 +116,6 @@ static enum mod_hdcp_status remove_display_from_topology_v3( HDCP_TOP_REMOVE_DISPLAY_TRACE(hdcp, display->index); } - mutex_unlock(&psp->dtm_context.mutex); - return status; } @@ -205,6 +204,7 @@ static enum mod_hdcp_status add_display_to_topology_v3( dtm_cmd->dtm_in_message.topology_update_v3.link_hdcp_cap = link->hdcp_supported_informational; psp_dtm_invoke(psp, dtm_cmd->cmd_id); + mutex_unlock(&psp->dtm_context.mutex); if (dtm_cmd->dtm_status != TA_DTM_STATUS__SUCCESS) { status = add_display_to_topology_v2(hdcp, display); @@ -214,8 +214,6 @@ static enum mod_hdcp_status add_display_to_topology_v3( HDCP_TOP_ADD_DISPLAY_TRACE(hdcp, display->index); } - mutex_unlock(&psp->dtm_context.mutex); - return status; } -- cgit v1.2.3 From 9a4aa3a2f1606a03c220b21049baa4a2b6169626 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Wed, 27 Oct 2021 12:32:55 +0300 Subject: drm/i915: Revert 'guc_id' from i915_request tracepoint Avoid adding backend specific data to the tracepoints outside of the LOW_LEVEL_TRACEPOINTS kernel config protection. These bits of information are bound to change depending on the selected submission method per platform and are not necessarily possible to maintain in the future. Fixes: dbf9da8d55ef ("drm/i915/guc: Add trace point for GuC submit") Signed-off-by: Joonas Lahtinen Cc: John Harrison Cc: Matthew Brost Cc: Daniele Ceraolo Spurio Cc: Chris Wilson Cc: Matt Roper Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20211027093255.66489-1-joonas.lahtinen@linux.intel.com (cherry picked from commit 64512a66b67e6546e2db15192b3603cd6d58b75c) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_trace.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 806ad688274b..63fec1c3c132 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -794,7 +794,6 @@ DECLARE_EVENT_CLASS(i915_request, TP_STRUCT__entry( __field(u32, dev) __field(u64, ctx) - __field(u32, guc_id) __field(u16, class) __field(u16, instance) __field(u32, seqno) @@ -805,16 +804,14 @@ DECLARE_EVENT_CLASS(i915_request, __entry->dev = rq->engine->i915->drm.primary->index; __entry->class = rq->engine->uabi_class; __entry->instance = rq->engine->uabi_instance; - __entry->guc_id = rq->context->guc_id; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; __entry->tail = rq->tail; ), - TP_printk("dev=%u, engine=%u:%u, guc_id=%u, ctx=%llu, seqno=%u, tail=%u", + TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, tail=%u", __entry->dev, __entry->class, __entry->instance, - __entry->guc_id, __entry->ctx, __entry->seqno, - __entry->tail) + __entry->ctx, __entry->seqno, __entry->tail) ); DEFINE_EVENT(i915_request, i915_request_add, -- cgit v1.2.3 From e8a1ff65927080278e6826f797b7c197fb2611a6 Mon Sep 17 00:00:00 2001 From: Wenbin Mei Date: Thu, 28 Oct 2021 10:20:49 +0800 Subject: mmc: mediatek: Move cqhci init behind ungate clock We must enable clock before cqhci init, because crypto needs read information from CQHCI registers, otherwise, it will hang in MediaTek mmc host controller. Signed-off-by: Wenbin Mei Fixes: 88bd652b3c74 ("mmc: mediatek: command queue support") Cc: stable@vger.kernel.org Acked-by: Chaotian Jing Link: https://lore.kernel.org/r/20211028022049.22129-1-wenbin.mei@mediatek.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/mtk-sd.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index 4dfc246c5f95..b06b4dcb7c78 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -2577,6 +2577,25 @@ static int msdc_drv_probe(struct platform_device *pdev) host->dma_mask = DMA_BIT_MASK(32); mmc_dev(mmc)->dma_mask = &host->dma_mask; + host->timeout_clks = 3 * 1048576; + host->dma.gpd = dma_alloc_coherent(&pdev->dev, + 2 * sizeof(struct mt_gpdma_desc), + &host->dma.gpd_addr, GFP_KERNEL); + host->dma.bd = dma_alloc_coherent(&pdev->dev, + MAX_BD_NUM * sizeof(struct mt_bdma_desc), + &host->dma.bd_addr, GFP_KERNEL); + if (!host->dma.gpd || !host->dma.bd) { + ret = -ENOMEM; + goto release_mem; + } + msdc_init_gpd_bd(host, &host->dma); + INIT_DELAYED_WORK(&host->req_timeout, msdc_request_timeout); + spin_lock_init(&host->lock); + + platform_set_drvdata(pdev, mmc); + msdc_ungate_clock(host); + msdc_init_hw(host); + if (mmc->caps2 & MMC_CAP2_CQE) { host->cq_host = devm_kzalloc(mmc->parent, sizeof(*host->cq_host), @@ -2597,25 +2616,6 @@ static int msdc_drv_probe(struct platform_device *pdev) mmc->max_seg_size = 64 * 1024; } - host->timeout_clks = 3 * 1048576; - host->dma.gpd = dma_alloc_coherent(&pdev->dev, - 2 * sizeof(struct mt_gpdma_desc), - &host->dma.gpd_addr, GFP_KERNEL); - host->dma.bd = dma_alloc_coherent(&pdev->dev, - MAX_BD_NUM * sizeof(struct mt_bdma_desc), - &host->dma.bd_addr, GFP_KERNEL); - if (!host->dma.gpd || !host->dma.bd) { - ret = -ENOMEM; - goto release_mem; - } - msdc_init_gpd_bd(host, &host->dma); - INIT_DELAYED_WORK(&host->req_timeout, msdc_request_timeout); - spin_lock_init(&host->lock); - - platform_set_drvdata(pdev, mmc); - msdc_ungate_clock(host); - msdc_init_hw(host); - ret = devm_request_irq(&pdev->dev, host->irq, msdc_irq, IRQF_TRIGGER_NONE, pdev->name, host); if (ret) -- cgit v1.2.3 From 9159f102402a64ac85e676b75cc1f9c62c5b4b73 Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Tue, 26 Oct 2021 14:50:31 -0700 Subject: vmxnet3: do not stop tx queues after netif_device_detach() The netif_device_detach() conditionally stops all tx queues if the queues are running. There is no need to call netif_tx_stop_all_queues() again. Signed-off-by: Dongli Zhang Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_drv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 142f70670f5c..8799854bacb2 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -3833,7 +3833,6 @@ vmxnet3_suspend(struct device *device) vmxnet3_free_intr_resources(adapter); netif_device_detach(netdev); - netif_tx_stop_all_queues(netdev); /* Create wake-up filters. */ pmConf = adapter->pm_conf; -- cgit v1.2.3 From 90a881fc352a953f1c8beb61977a2db0818157d4 Mon Sep 17 00:00:00 2001 From: Yu Xiao Date: Thu, 28 Oct 2021 12:00:36 +0200 Subject: nfp: bpf: relax prog rejection for mtu check through max_pkt_offset MTU change is refused whenever the value of new MTU is bigger than the max packet bytes that fits in NFP Cluster Target Memory (CTM). However, an eBPF program doesn't always need to access the whole packet data. The maximum direct packet access (DPA) offset has always been caculated by verifier and stored in the max_pkt_offset field of prog aux data. Signed-off-by: Yu Xiao Reviewed-by: Yinjun Zhang Reviewed-by: Niklas Soderlund Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/main.c | 16 +++++++++++----- drivers/net/ethernet/netronome/nfp/bpf/main.h | 2 ++ drivers/net/ethernet/netronome/nfp/bpf/offload.c | 17 +++++++++++++---- 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index 11c83a99b014..f469950c7265 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -182,15 +182,21 @@ static int nfp_bpf_check_mtu(struct nfp_app *app, struct net_device *netdev, int new_mtu) { struct nfp_net *nn = netdev_priv(netdev); - unsigned int max_mtu; + struct nfp_bpf_vnic *bv; + struct bpf_prog *prog; if (~nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF) return 0; - max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; - if (new_mtu > max_mtu) { - nn_info(nn, "BPF offload active, MTU over %u not supported\n", - max_mtu); + if (nn->xdp_hw.prog) { + prog = nn->xdp_hw.prog; + } else { + bv = nn->app_priv; + prog = bv->tc_prog; + } + + if (nfp_bpf_offload_check_mtu(nn, prog, new_mtu)) { + nn_info(nn, "BPF offload active, potential packet access beyond hardware packet boundary"); return -EBUSY; } return 0; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index d0e17eebddd9..16841bb750b7 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -560,6 +560,8 @@ bool nfp_is_subprog_start(struct nfp_insn_meta *meta); void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog); int nfp_bpf_jit(struct nfp_prog *prog); bool nfp_bpf_supported_opcode(u8 code); +bool nfp_bpf_offload_check_mtu(struct nfp_net *nn, struct bpf_prog *prog, + unsigned int mtu); int nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx); diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 53851853562c..9d97cd281f18 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -481,19 +481,28 @@ int nfp_bpf_event_output(struct nfp_app_bpf *bpf, const void *data, return 0; } +bool nfp_bpf_offload_check_mtu(struct nfp_net *nn, struct bpf_prog *prog, + unsigned int mtu) +{ + unsigned int fw_mtu, pkt_off; + + fw_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; + pkt_off = min(prog->aux->max_pkt_offset, mtu); + + return fw_mtu < pkt_off; +} + static int nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog, struct netlink_ext_ack *extack) { struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; - unsigned int fw_mtu, pkt_off, max_stack, max_prog_len; + unsigned int max_stack, max_prog_len; dma_addr_t dma_addr; void *img; int err; - fw_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; - pkt_off = min(prog->aux->max_pkt_offset, nn->dp.netdev->mtu); - if (fw_mtu < pkt_off) { + if (nfp_bpf_offload_check_mtu(nn, prog, nn->dp.netdev->mtu)) { NL_SET_ERR_MSG_MOD(extack, "BPF offload not supported with potential packet access beyond HW packet split boundary"); return -EOPNOTSUPP; } -- cgit v1.2.3 From c4a146c7cf5e8ad76231523b174d161bf152c6e7 Mon Sep 17 00:00:00 2001 From: Tony Lu Date: Thu, 28 Oct 2021 15:13:45 +0800 Subject: net/smc: Fix smc_link->llc_testlink_time overflow The value of llc_testlink_time is set to the value stored in net->ipv4.sysctl_tcp_keepalive_time when linkgroup init. The value of sysctl_tcp_keepalive_time is already jiffies, so we don't need to multiply by HZ, which would cause smc_link->llc_testlink_time overflow, and test_link send flood. Signed-off-by: Tony Lu Reviewed-by: Xuan Zhuo Reviewed-by: Wen Gu Signed-off-by: David S. Miller --- net/smc/smc_llc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 72f4b72eb175..f1d323439a2a 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -1822,7 +1822,7 @@ void smc_llc_link_active(struct smc_link *link) link->smcibdev->ibdev->name, link->ibport); link->state = SMC_LNK_ACTIVE; if (link->lgr->llc_testlink_time) { - link->llc_testlink_time = link->lgr->llc_testlink_time * HZ; + link->llc_testlink_time = link->lgr->llc_testlink_time; schedule_delayed_work(&link->llc_testlink_wrk, link->llc_testlink_time); } -- cgit v1.2.3 From f3a3a0fe0b644582fa5d83dd94b398f99fc57914 Mon Sep 17 00:00:00 2001 From: Wen Gu Date: Thu, 28 Oct 2021 15:13:47 +0800 Subject: net/smc: Correct spelling mistake to TCPF_SYN_RECV There should use TCPF_SYN_RECV instead of TCP_SYN_RECV. Signed-off-by: Wen Gu Reviewed-by: Tony Lu Signed-off-by: David S. Miller --- net/smc/af_smc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index c038efc23ce3..78b663dbfa1f 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1057,7 +1057,7 @@ static void smc_connect_work(struct work_struct *work) if (smc->clcsock->sk->sk_err) { smc->sk.sk_err = smc->clcsock->sk->sk_err; } else if ((1 << smc->clcsock->sk->sk_state) & - (TCPF_SYN_SENT | TCP_SYN_RECV)) { + (TCPF_SYN_SENT | TCPF_SYN_RECV)) { rc = sk_stream_wait_connect(smc->clcsock->sk, &timeo); if ((rc == -EPIPE) && ((1 << smc->clcsock->sk->sk_state) & -- cgit v1.2.3 From da353fac65fede6b8b4cfe207f0d9408e3121105 Mon Sep 17 00:00:00 2001 From: Daniel Jordan Date: Wed, 27 Oct 2021 17:59:20 -0400 Subject: net/tls: Fix flipped sign in tls_err_abort() calls sk->sk_err appears to expect a positive value, a convention that ktls doesn't always follow and that leads to memory corruption in other code. For instance, [kworker] tls_encrypt_done(..., err=) tls_err_abort(.., err) sk->sk_err = err; [task] splice_from_pipe_feed ... tls_sw_do_sendpage if (sk->sk_err) { ret = -sk->sk_err; // ret is positive splice_from_pipe_feed (continued) ret = actor(...) // ret is still positive and interpreted as bytes // written, resulting in underflow of buf->len and // sd->len, leading to huge buf->offset and bogus // addresses computed in later calls to actor() Fix all tls_err_abort() callers to pass a negative error code consistently and centralize the error-prone sign flip there, throwing in a warning to catch future misuse and uninlining the function so it really does only warn once. Cc: stable@vger.kernel.org Fixes: c46234ebb4d1e ("tls: RX path for ktls") Reported-by: syzbot+b187b77c8474f9648fae@syzkaller.appspotmail.com Signed-off-by: Daniel Jordan Signed-off-by: David S. Miller --- include/net/tls.h | 9 ++------- net/tls/tls_sw.c | 17 +++++++++++++---- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/include/net/tls.h b/include/net/tls.h index 01d2e3744393..1fffb206f09f 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -358,6 +358,7 @@ int tls_sk_query(struct sock *sk, int optname, char __user *optval, int __user *optlen); int tls_sk_attach(struct sock *sk, int optname, char __user *optval, unsigned int optlen); +void tls_err_abort(struct sock *sk, int err); int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx); void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx); @@ -466,12 +467,6 @@ static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk) #endif } -static inline void tls_err_abort(struct sock *sk, int err) -{ - sk->sk_err = err; - sk_error_report(sk); -} - static inline bool tls_bigint_increment(unsigned char *seq, int len) { int i; @@ -512,7 +507,7 @@ static inline void tls_advance_record_sn(struct sock *sk, struct cipher_context *ctx) { if (tls_bigint_increment(ctx->rec_seq, prot->rec_seq_size)) - tls_err_abort(sk, EBADMSG); + tls_err_abort(sk, -EBADMSG); if (prot->version != TLS_1_3_VERSION && prot->cipher_type != TLS_CIPHER_CHACHA20_POLY1305) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index d5d09bd817b7..1644f8baea19 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -35,6 +35,7 @@ * SOFTWARE. */ +#include #include #include #include @@ -43,6 +44,14 @@ #include #include +noinline void tls_err_abort(struct sock *sk, int err) +{ + WARN_ON_ONCE(err >= 0); + /* sk->sk_err should contain a positive error code. */ + sk->sk_err = -err; + sk_error_report(sk); +} + static int __skb_nsg(struct sk_buff *skb, int offset, int len, unsigned int recursion_level) { @@ -419,7 +428,7 @@ int tls_tx_records(struct sock *sk, int flags) tx_err: if (rc < 0 && rc != -EAGAIN) - tls_err_abort(sk, EBADMSG); + tls_err_abort(sk, -EBADMSG); return rc; } @@ -763,7 +772,7 @@ static int tls_push_record(struct sock *sk, int flags, msg_pl->sg.size + prot->tail_size, i); if (rc < 0) { if (rc != -EINPROGRESS) { - tls_err_abort(sk, EBADMSG); + tls_err_abort(sk, -EBADMSG); if (split) { tls_ctx->pending_open_record_frags = true; tls_merge_open_record(sk, rec, tmp, orig_end); @@ -1827,7 +1836,7 @@ int tls_sw_recvmsg(struct sock *sk, err = decrypt_skb_update(sk, skb, &msg->msg_iter, &chunk, &zc, async_capable); if (err < 0 && err != -EINPROGRESS) { - tls_err_abort(sk, EBADMSG); + tls_err_abort(sk, -EBADMSG); goto recv_end; } @@ -2007,7 +2016,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, } if (err < 0) { - tls_err_abort(sk, EBADMSG); + tls_err_abort(sk, -EBADMSG); goto splice_read_end; } ctx->decrypted = 1; -- cgit v1.2.3 From 1d9d6fd21ad4a28b16ed9ee5432ae738b9dc58aa Mon Sep 17 00:00:00 2001 From: Daniel Jordan Date: Wed, 27 Oct 2021 17:59:21 -0400 Subject: net/tls: Fix flipped sign in async_wait.err assignment sk->sk_err contains a positive number, yet async_wait.err wants the opposite. Fix the missed sign flip, which Jakub caught by inspection. Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance") Suggested-by: Jakub Kicinski Signed-off-by: Daniel Jordan Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 1644f8baea19..1b08b877a890 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -459,7 +459,7 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err) /* If err is already set on socket, return the same code */ if (sk->sk_err) { - ctx->async_wait.err = sk->sk_err; + ctx->async_wait.err = -sk->sk_err; } else { ctx->async_wait.err = err; tls_err_abort(sk, err); -- cgit v1.2.3 From e8684db191e4164f3f5f3ad7dec04a6734c25f1c Mon Sep 17 00:00:00 2001 From: Yuiko Oshino Date: Wed, 27 Oct 2021 14:23:02 -0400 Subject: net: ethernet: microchip: lan743x: Fix skb allocation failure The driver allocates skb during ndo_open with GFP_ATOMIC which has high chance of failure when there are multiple instances. GFP_KERNEL is enough while open and use GFP_ATOMIC only from interrupt context. Fixes: 23f0703c125b ("lan743x: Add main source files for new lan743x driver") Signed-off-by: Yuiko Oshino Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/lan743x_main.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index c4f32c7e0db1..4d5a5d6595b3 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -1944,7 +1944,8 @@ static void lan743x_rx_update_tail(struct lan743x_rx *rx, int index) index); } -static int lan743x_rx_init_ring_element(struct lan743x_rx *rx, int index) +static int lan743x_rx_init_ring_element(struct lan743x_rx *rx, int index, + gfp_t gfp) { struct net_device *netdev = rx->adapter->netdev; struct device *dev = &rx->adapter->pdev->dev; @@ -1958,7 +1959,7 @@ static int lan743x_rx_init_ring_element(struct lan743x_rx *rx, int index) descriptor = &rx->ring_cpu_ptr[index]; buffer_info = &rx->buffer_info[index]; - skb = __netdev_alloc_skb(netdev, buffer_length, GFP_ATOMIC | GFP_DMA); + skb = __netdev_alloc_skb(netdev, buffer_length, gfp); if (!skb) return -ENOMEM; dma_ptr = dma_map_single(dev, skb->data, buffer_length, DMA_FROM_DEVICE); @@ -2120,7 +2121,8 @@ static int lan743x_rx_process_buffer(struct lan743x_rx *rx) /* save existing skb, allocate new skb and map to dma */ skb = buffer_info->skb; - if (lan743x_rx_init_ring_element(rx, rx->last_head)) { + if (lan743x_rx_init_ring_element(rx, rx->last_head, + GFP_ATOMIC | GFP_DMA)) { /* failed to allocate next skb. * Memory is very low. * Drop this packet and reuse buffer. @@ -2335,13 +2337,16 @@ static int lan743x_rx_ring_init(struct lan743x_rx *rx) rx->last_head = 0; for (index = 0; index < rx->ring_size; index++) { - ret = lan743x_rx_init_ring_element(rx, index); + ret = lan743x_rx_init_ring_element(rx, index, GFP_KERNEL); if (ret) goto cleanup; } return 0; cleanup: + netif_warn(rx->adapter, ifup, rx->adapter->netdev, + "Error allocating memory for LAN743x\n"); + lan743x_rx_ring_cleanup(rx); return ret; } -- cgit v1.2.3 From cc45b96e2de7ada26520f101dada0abafa4ba997 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Wed, 27 Oct 2021 23:02:32 +0530 Subject: octeontx2-af: Check whether ipolicers exists While displaying ingress policers information in debugfs check whether ingress policers exist in the hardware or not because some platforms(CN9XXX) do not have this feature. Fixes: e7d8971763f3 ("octeontx2-af: cn10k: Debugfs support for bandwidth") Signed-off-by: Subbaraya Sundeep Signed-off-by: Rakesh Babu Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index 9338765da048..6c589ca9b577 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -1719,6 +1719,10 @@ static int rvu_dbg_nix_band_prof_ctx_display(struct seq_file *m, void *unused) u16 pcifunc; char *str; + /* Ingress policers do not exist on all platforms */ + if (!nix_hw->ipolicer) + return 0; + for (layer = 0; layer < BAND_PROF_NUM_LAYERS; layer++) { if (layer == BAND_PROF_INVAL_LAYER) continue; @@ -1768,6 +1772,10 @@ static int rvu_dbg_nix_band_prof_rsrc_display(struct seq_file *m, void *unused) int layer; char *str; + /* Ingress policers do not exist on all platforms */ + if (!nix_hw->ipolicer) + return 0; + seq_puts(m, "\nBandwidth profile resource free count\n"); seq_puts(m, "=====================================\n"); for (layer = 0; layer < BAND_PROF_NUM_LAYERS; layer++) { -- cgit v1.2.3 From e77bcdd1f639809950c45234b08647ac6d3ffe7b Mon Sep 17 00:00:00 2001 From: Rakesh Babu Date: Wed, 27 Oct 2021 23:02:33 +0530 Subject: octeontx2-af: Display all enabled PF VF rsrc_alloc entries. Currently, we are using a fixed buffer size of length 2048 to display rsrc_alloc output. As a result a maximum of 2048 characters of rsrc_alloc output is displayed, which may lead sometimes to display only partial output. This patch fixes this dependency on max limit of buffer size and displays all PF VF entries. Each column of the debugfs entry "rsrc_alloc" uses a fixed width of 12 characters to print the list of LFs of each block for a PF/VF. If the length of list of LFs of a block exceeds this fixed width then the list gets truncated and displays only a part of the list. This patch fixes this by using the maximum possible length of list of LFs among all blocks of all PFs and VFs entries as the width size. Fixes: f7884097141b ("octeontx2-af: Formatting debugfs entry rsrc_alloc.") Fixes: 23205e6d06d4 ("octeontx2-af: Dump current resource provisioning status") Signed-off-by: Rakesh Babu Signed-off-by: Nithin Dabilpuram Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller --- .../ethernet/marvell/octeontx2/af/rvu_debugfs.c | 138 ++++++++++++++++----- 1 file changed, 106 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index 6c589ca9b577..c7e12464c243 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -226,18 +226,85 @@ static const struct file_operations rvu_dbg_##name##_fops = { \ static void print_nix_qsize(struct seq_file *filp, struct rvu_pfvf *pfvf); +static void get_lf_str_list(struct rvu_block block, int pcifunc, + char *lfs) +{ + int lf = 0, seq = 0, len = 0, prev_lf = block.lf.max; + + for_each_set_bit(lf, block.lf.bmap, block.lf.max) { + if (lf >= block.lf.max) + break; + + if (block.fn_map[lf] != pcifunc) + continue; + + if (lf == prev_lf + 1) { + prev_lf = lf; + seq = 1; + continue; + } + + if (seq) + len += sprintf(lfs + len, "-%d,%d", prev_lf, lf); + else + len += (len ? sprintf(lfs + len, ",%d", lf) : + sprintf(lfs + len, "%d", lf)); + + prev_lf = lf; + seq = 0; + } + + if (seq) + len += sprintf(lfs + len, "-%d", prev_lf); + + lfs[len] = '\0'; +} + +static int get_max_column_width(struct rvu *rvu) +{ + int index, pf, vf, lf_str_size = 12, buf_size = 256; + struct rvu_block block; + u16 pcifunc; + char *buf; + + buf = kzalloc(buf_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + for (pf = 0; pf < rvu->hw->total_pfs; pf++) { + for (vf = 0; vf <= rvu->hw->total_vfs; vf++) { + pcifunc = pf << 10 | vf; + if (!pcifunc) + continue; + + for (index = 0; index < BLK_COUNT; index++) { + block = rvu->hw->block[index]; + if (!strlen(block.name)) + continue; + + get_lf_str_list(block, pcifunc, buf); + if (lf_str_size <= strlen(buf)) + lf_str_size = strlen(buf) + 1; + } + } + } + + kfree(buf); + return lf_str_size; +} + /* Dumps current provisioning status of all RVU block LFs */ static ssize_t rvu_dbg_rsrc_attach_status(struct file *filp, char __user *buffer, size_t count, loff_t *ppos) { - int index, off = 0, flag = 0, go_back = 0, len = 0; + int index, off = 0, flag = 0, len = 0, i = 0; struct rvu *rvu = filp->private_data; - int lf, pf, vf, pcifunc; + int bytes_not_copied = 0; struct rvu_block block; - int bytes_not_copied; - int lf_str_size = 12; + int pf, vf, pcifunc; int buf_size = 2048; + int lf_str_size; char *lfs; char *buf; @@ -249,6 +316,9 @@ static ssize_t rvu_dbg_rsrc_attach_status(struct file *filp, if (!buf) return -ENOSPC; + /* Get the maximum width of a column */ + lf_str_size = get_max_column_width(rvu); + lfs = kzalloc(lf_str_size, GFP_KERNEL); if (!lfs) { kfree(buf); @@ -262,65 +332,69 @@ static ssize_t rvu_dbg_rsrc_attach_status(struct file *filp, "%-*s", lf_str_size, rvu->hw->block[index].name); } + off += scnprintf(&buf[off], buf_size - 1 - off, "\n"); + bytes_not_copied = copy_to_user(buffer + (i * off), buf, off); + if (bytes_not_copied) + goto out; + + i++; + *ppos += off; for (pf = 0; pf < rvu->hw->total_pfs; pf++) { for (vf = 0; vf <= rvu->hw->total_vfs; vf++) { + off = 0; + flag = 0; pcifunc = pf << 10 | vf; if (!pcifunc) continue; if (vf) { sprintf(lfs, "PF%d:VF%d", pf, vf - 1); - go_back = scnprintf(&buf[off], - buf_size - 1 - off, - "%-*s", lf_str_size, lfs); + off = scnprintf(&buf[off], + buf_size - 1 - off, + "%-*s", lf_str_size, lfs); } else { sprintf(lfs, "PF%d", pf); - go_back = scnprintf(&buf[off], - buf_size - 1 - off, - "%-*s", lf_str_size, lfs); + off = scnprintf(&buf[off], + buf_size - 1 - off, + "%-*s", lf_str_size, lfs); } - off += go_back; - for (index = 0; index < BLKTYPE_MAX; index++) { + for (index = 0; index < BLK_COUNT; index++) { block = rvu->hw->block[index]; if (!strlen(block.name)) continue; len = 0; lfs[len] = '\0'; - for (lf = 0; lf < block.lf.max; lf++) { - if (block.fn_map[lf] != pcifunc) - continue; + get_lf_str_list(block, pcifunc, lfs); + if (strlen(lfs)) flag = 1; - len += sprintf(&lfs[len], "%d,", lf); - } - if (flag) - len--; - lfs[len] = '\0'; off += scnprintf(&buf[off], buf_size - 1 - off, "%-*s", lf_str_size, lfs); - if (!strlen(lfs)) - go_back += lf_str_size; } - if (!flag) - off -= go_back; - else - flag = 0; - off--; - off += scnprintf(&buf[off], buf_size - 1 - off, "\n"); + if (flag) { + off += scnprintf(&buf[off], + buf_size - 1 - off, "\n"); + bytes_not_copied = copy_to_user(buffer + + (i * off), + buf, off); + if (bytes_not_copied) + goto out; + + i++; + *ppos += off; + } } } - bytes_not_copied = copy_to_user(buffer, buf, off); +out: kfree(lfs); kfree(buf); - if (bytes_not_copied) return -EFAULT; - *ppos = off; - return off; + return *ppos; } RVU_DEBUG_FOPS(rsrc_status, rsrc_attach_status, NULL); -- cgit v1.2.3 From c2d4c543f74c90f883e8ec62a31973ae8807d354 Mon Sep 17 00:00:00 2001 From: Rakesh Babu Saladi Date: Wed, 27 Oct 2021 23:02:34 +0530 Subject: octeontx2-af: Fix possible null pointer dereference. This patch fixes possible null pointer dereference in files "rvu_debugfs.c" and "rvu_nix.c" Fixes: 8756828a8148 ("octeontx2-af: Add NPA aura and pool contexts to debugfs") Fixes: 9a946def264d ("octeontx2-af: Modify nix_vtag_cfg mailbox to support TX VTAG entries") Signed-off-by: Rakesh Babu Saladi Signed-off-by: Subbaraya Sundeep Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c | 2 +- drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index c7e12464c243..49d822a98ada 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -578,7 +578,7 @@ static ssize_t rvu_dbg_qsize_write(struct file *filp, if (cmd_buf) ret = -EINVAL; - if (!strncmp(subtoken, "help", 4) || ret < 0) { + if (ret < 0 || !strncmp(subtoken, "help", 4)) { dev_info(rvu->dev, "Use echo <%s-lf > qsize\n", blk_string); goto qsize_write_done; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 9ef4e942e31e..6970540dc470 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -2507,6 +2507,9 @@ static void nix_free_tx_vtag_entries(struct rvu *rvu, u16 pcifunc) return; nix_hw = get_nix_hw(rvu->hw, blkaddr); + if (!nix_hw) + return; + vlan = &nix_hw->txvlan; mutex_lock(&vlan->rsrc_lock); -- cgit v1.2.3 From f3d1436d4bf8ced1c9a62a045d193a65567e1fcc Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 26 Oct 2021 04:12:38 +0100 Subject: KVM: x86: Take srcu lock in post_kvm_run_save() The Xen interrupt injection for event channels relies on accessing the guest's vcpu_info structure in __kvm_xen_has_interrupt(), through a gfn_to_hva_cache. This requires the srcu lock to be held, which is mostly the case except for this code path: [ 11.822877] WARNING: suspicious RCU usage [ 11.822965] ----------------------------- [ 11.823013] include/linux/kvm_host.h:664 suspicious rcu_dereference_check() usage! [ 11.823131] [ 11.823131] other info that might help us debug this: [ 11.823131] [ 11.823196] [ 11.823196] rcu_scheduler_active = 2, debug_locks = 1 [ 11.823253] 1 lock held by dom:0/90: [ 11.823292] #0: ffff998956ec8118 (&vcpu->mutex){+.+.}, at: kvm_vcpu_ioctl+0x85/0x680 [ 11.823379] [ 11.823379] stack backtrace: [ 11.823428] CPU: 2 PID: 90 Comm: dom:0 Kdump: loaded Not tainted 5.4.34+ #5 [ 11.823496] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 [ 11.823612] Call Trace: [ 11.823645] dump_stack+0x7a/0xa5 [ 11.823681] lockdep_rcu_suspicious+0xc5/0x100 [ 11.823726] __kvm_xen_has_interrupt+0x179/0x190 [ 11.823773] kvm_cpu_has_extint+0x6d/0x90 [ 11.823813] kvm_cpu_accept_dm_intr+0xd/0x40 [ 11.823853] kvm_vcpu_ready_for_interrupt_injection+0x20/0x30 < post_kvm_run_save() inlined here > [ 11.823906] kvm_arch_vcpu_ioctl_run+0x135/0x6a0 [ 11.823947] kvm_vcpu_ioctl+0x263/0x680 Fixes: 40da8ccd724f ("KVM: x86/xen: Add event channel interrupt vector upcall") Signed-off-by: David Woodhouse Cc: stable@vger.kernel.org Message-Id: <606aaaf29fca3850a63aa4499826104e77a72346.camel@infradead.org> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5c82eff19e4a..ffd9dd7d5d14 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8783,9 +8783,17 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) kvm_run->cr8 = kvm_get_cr8(vcpu); kvm_run->apic_base = kvm_get_apic_base(vcpu); + + /* + * The call to kvm_ready_for_interrupt_injection() may end up in + * kvm_xen_has_interrupt() which may require the srcu lock to be + * held, to protect against changes in the vcpu_info address. + */ + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); kvm_run->ready_for_interrupt_injection = pic_in_kernel(vcpu->kvm) || kvm_vcpu_ready_for_interrupt_injection(vcpu); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (is_smm(vcpu)) kvm_run->flags |= KVM_RUN_X86_SMM; -- cgit v1.2.3 From 27de809a3d83a6199664479ebb19712533d6fd9b Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Thu, 28 Oct 2021 14:51:15 +0200 Subject: riscv, bpf: Fix potential NULL dereference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bpf_jit_binary_free() function requires a non-NULL argument. When the RISC-V BPF JIT fails to converge in NR_JIT_ITERATIONS steps, jit_data->header will be NULL, which triggers a NULL dereference. Avoid this by checking the argument, prior calling the function. Fixes: ca6cb5447cec ("riscv, bpf: Factor common RISC-V JIT code") Signed-off-by: Björn Töpel Acked-by: Daniel Borkmann Link: https://lore.kernel.org/r/20211028125115.514587-1-bjorn@kernel.org Signed-off-by: Jakub Kicinski --- arch/riscv/net/bpf_jit_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c index 0fee2cbaaf53..753d85bdfad0 100644 --- a/arch/riscv/net/bpf_jit_core.c +++ b/arch/riscv/net/bpf_jit_core.c @@ -125,7 +125,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) if (i == NR_JIT_ITERATIONS) { pr_err("bpf-jit: image did not converge in <%d passes!\n", i); - bpf_jit_binary_free(jit_data->header); + if (jit_data->header) + bpf_jit_binary_free(jit_data->header); prog = orig_prog; goto out_offset; } -- cgit v1.2.3 From f7cc8890f30d3ddc785e2b2ddc647da5b4b3c3ec Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 27 Oct 2021 13:38:55 -0700 Subject: mptcp: fix corrupt receiver key in MPC + data + checksum MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit using packetdrill it's possible to observe that the receiver key contains random values when clients transmit MP_CAPABLE with data and checksum (as specified in RFC8684 §3.1). Fix the layout of mptcp_out_options, to avoid using the skb extension copy when writing the MP_CAPABLE sub-option. Fixes: d7b269083786 ("mptcp: shrink mptcp_out_options struct") Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/233 Reported-by: Poorva Sonparote Signed-off-by: Davide Caratti Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20211027203855.264600-1-mathew.j.martineau@linux.intel.com Signed-off-by: Jakub Kicinski --- include/net/mptcp.h | 4 ++++ net/mptcp/options.c | 39 ++++++++++++++++++++++++--------------- 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/include/net/mptcp.h b/include/net/mptcp.h index 6026bbefbffd..3214848402ec 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -69,6 +69,10 @@ struct mptcp_out_options { struct { u64 sndr_key; u64 rcvr_key; + u64 data_seq; + u32 subflow_seq; + u16 data_len; + __sum16 csum; }; struct { struct mptcp_addr_info addr; diff --git a/net/mptcp/options.c b/net/mptcp/options.c index c41273cefc51..f0f22eb4fd5f 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -485,11 +485,11 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, mpext = mptcp_get_ext(skb); data_len = mpext ? mpext->data_len : 0; - /* we will check ext_copy.data_len in mptcp_write_options() to + /* we will check ops->data_len in mptcp_write_options() to * discriminate between TCPOLEN_MPTCP_MPC_ACK_DATA and * TCPOLEN_MPTCP_MPC_ACK */ - opts->ext_copy.data_len = data_len; + opts->data_len = data_len; opts->suboptions = OPTION_MPTCP_MPC_ACK; opts->sndr_key = subflow->local_key; opts->rcvr_key = subflow->remote_key; @@ -505,9 +505,9 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, len = TCPOLEN_MPTCP_MPC_ACK_DATA; if (opts->csum_reqd) { /* we need to propagate more info to csum the pseudo hdr */ - opts->ext_copy.data_seq = mpext->data_seq; - opts->ext_copy.subflow_seq = mpext->subflow_seq; - opts->ext_copy.csum = mpext->csum; + opts->data_seq = mpext->data_seq; + opts->subflow_seq = mpext->subflow_seq; + opts->csum = mpext->csum; len += TCPOLEN_MPTCP_DSS_CHECKSUM; } *size = ALIGN(len, 4); @@ -1227,7 +1227,7 @@ static void mptcp_set_rwin(const struct tcp_sock *tp) WRITE_ONCE(msk->rcv_wnd_sent, ack_seq); } -static u16 mptcp_make_csum(const struct mptcp_ext *mpext) +static u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __sum16 sum) { struct csum_pseudo_header header; __wsum csum; @@ -1237,15 +1237,21 @@ static u16 mptcp_make_csum(const struct mptcp_ext *mpext) * always the 64-bit value, irrespective of what length is used in the * DSS option itself. */ - header.data_seq = cpu_to_be64(mpext->data_seq); - header.subflow_seq = htonl(mpext->subflow_seq); - header.data_len = htons(mpext->data_len); + header.data_seq = cpu_to_be64(data_seq); + header.subflow_seq = htonl(subflow_seq); + header.data_len = htons(data_len); header.csum = 0; - csum = csum_partial(&header, sizeof(header), ~csum_unfold(mpext->csum)); + csum = csum_partial(&header, sizeof(header), ~csum_unfold(sum)); return (__force u16)csum_fold(csum); } +static u16 mptcp_make_csum(const struct mptcp_ext *mpext) +{ + return __mptcp_make_csum(mpext->data_seq, mpext->subflow_seq, mpext->data_len, + mpext->csum); +} + void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, struct mptcp_out_options *opts) { @@ -1337,7 +1343,7 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, len = TCPOLEN_MPTCP_MPC_SYN; } else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions) { len = TCPOLEN_MPTCP_MPC_SYNACK; - } else if (opts->ext_copy.data_len) { + } else if (opts->data_len) { len = TCPOLEN_MPTCP_MPC_ACK_DATA; if (opts->csum_reqd) len += TCPOLEN_MPTCP_DSS_CHECKSUM; @@ -1366,14 +1372,17 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, put_unaligned_be64(opts->rcvr_key, ptr); ptr += 2; - if (!opts->ext_copy.data_len) + if (!opts->data_len) goto mp_capable_done; if (opts->csum_reqd) { - put_unaligned_be32(opts->ext_copy.data_len << 16 | - mptcp_make_csum(&opts->ext_copy), ptr); + put_unaligned_be32(opts->data_len << 16 | + __mptcp_make_csum(opts->data_seq, + opts->subflow_seq, + opts->data_len, + opts->csum), ptr); } else { - put_unaligned_be32(opts->ext_copy.data_len << 16 | + put_unaligned_be32(opts->data_len << 16 | TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); } ptr += 1; -- cgit v1.2.3 From 35392da51b1ab7ba0c63de0a553e2a93c2314266 Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Thu, 28 Oct 2021 22:06:24 +0800 Subject: Revert "net: hns3: fix pause config problem after autoneg disabled" This reverts commit 3bda2e5df476417b6d08967e2d84234a59d57b1c. According to discussion with Andrew as follow: https://lore.kernel.org/netdev/09eda9fe-196b-006b-6f01-f54e75715961@huawei.com/ HNS3 driver needs to separate pause autoneg from general autoneg, so revert this incorrect patch. Signed-off-by: Guangbin Huang Link: https://lore.kernel.org/r/20211028140624.53149-1-huangguangbin2@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 1 - drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c | 33 ++++++---------------- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 30 -------------------- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 2 +- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h | 1 - 5 files changed, 10 insertions(+), 57 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index da3a593f6a56..d701451596c8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -568,7 +568,6 @@ struct hnae3_ae_ops { u32 *auto_neg, u32 *rx_en, u32 *tx_en); int (*set_pauseparam)(struct hnae3_handle *handle, u32 auto_neg, u32 rx_en, u32 tx_en); - int (*restore_pauseparam)(struct hnae3_handle *handle); int (*set_autoneg)(struct hnae3_handle *handle, bool enable); int (*get_autoneg)(struct hnae3_handle *handle); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index 7d92dd273ed7..5ebd96f6833d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -824,26 +824,6 @@ static int hns3_check_ksettings_param(const struct net_device *netdev, return 0; } -static int hns3_set_phy_link_ksettings(struct net_device *netdev, - const struct ethtool_link_ksettings *cmd) -{ - struct hnae3_handle *handle = hns3_get_handle(netdev); - const struct hnae3_ae_ops *ops = handle->ae_algo->ops; - int ret; - - if (cmd->base.speed == SPEED_1000 && - cmd->base.autoneg == AUTONEG_DISABLE) - return -EINVAL; - - if (cmd->base.autoneg == AUTONEG_DISABLE && ops->restore_pauseparam) { - ret = ops->restore_pauseparam(handle); - if (ret) - return ret; - } - - return phy_ethtool_ksettings_set(netdev->phydev, cmd); -} - static int hns3_set_link_ksettings(struct net_device *netdev, const struct ethtool_link_ksettings *cmd) { @@ -862,11 +842,16 @@ static int hns3_set_link_ksettings(struct net_device *netdev, cmd->base.autoneg, cmd->base.speed, cmd->base.duplex); /* Only support ksettings_set for netdev with phy attached for now */ - if (netdev->phydev) - return hns3_set_phy_link_ksettings(netdev, cmd); - else if (test_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, ae_dev->caps) && - ops->set_phy_link_ksettings) + if (netdev->phydev) { + if (cmd->base.speed == SPEED_1000 && + cmd->base.autoneg == AUTONEG_DISABLE) + return -EINVAL; + + return phy_ethtool_ksettings_set(netdev->phydev, cmd); + } else if (test_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, ae_dev->caps) && + ops->set_phy_link_ksettings) { return ops->set_phy_link_ksettings(handle, cmd); + } if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2) return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 269e579762b2..d891390d492f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -10998,35 +10998,6 @@ static int hclge_set_pauseparam(struct hnae3_handle *handle, u32 auto_neg, return -EOPNOTSUPP; } -static int hclge_restore_pauseparam(struct hnae3_handle *handle) -{ - struct hclge_vport *vport = hclge_get_vport(handle); - struct hclge_dev *hdev = vport->back; - u32 auto_neg, rx_pause, tx_pause; - int ret; - - hclge_get_pauseparam(handle, &auto_neg, &rx_pause, &tx_pause); - /* when autoneg is disabled, the pause setting of phy has no effect - * unless the link goes down. - */ - ret = phy_suspend(hdev->hw.mac.phydev); - if (ret) - return ret; - - phy_set_asym_pause(hdev->hw.mac.phydev, rx_pause, tx_pause); - - ret = phy_resume(hdev->hw.mac.phydev); - if (ret) - return ret; - - ret = hclge_mac_pause_setup_hw(hdev); - if (ret) - dev_err(&hdev->pdev->dev, - "restore pauseparam error, ret = %d.\n", ret); - - return ret; -} - static void hclge_get_ksettings_an_result(struct hnae3_handle *handle, u8 *auto_neg, u32 *speed, u8 *duplex) { @@ -12990,7 +12961,6 @@ static const struct hnae3_ae_ops hclge_ops = { .halt_autoneg = hclge_halt_autoneg, .get_pauseparam = hclge_get_pauseparam, .set_pauseparam = hclge_set_pauseparam, - .restore_pauseparam = hclge_restore_pauseparam, .set_mtu = hclge_set_mtu, .reset_queue = hclge_reset_tqp, .get_stats = hclge_get_stats, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 124791e4bfee..95074e91a846 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -1435,7 +1435,7 @@ static int hclge_bp_setup_hw(struct hclge_dev *hdev, u8 tc) return 0; } -int hclge_mac_pause_setup_hw(struct hclge_dev *hdev) +static int hclge_mac_pause_setup_hw(struct hclge_dev *hdev) { bool tx_en, rx_en; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h index 4b2c3a788980..2ee9b795f71d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h @@ -244,7 +244,6 @@ int hclge_tm_get_pri_weight(struct hclge_dev *hdev, u8 pri_id, u8 *weight); int hclge_tm_get_pri_shaper(struct hclge_dev *hdev, u8 pri_id, enum hclge_opcode_type cmd, struct hclge_tm_shaper_para *para); -int hclge_mac_pause_setup_hw(struct hclge_dev *hdev); int hclge_tm_get_q_to_qs_map(struct hclge_dev *hdev, u16 q_id, u16 *qset_id); int hclge_tm_get_q_to_tc(struct hclge_dev *hdev, u16 q_id, u8 *tc_id); int hclge_tm_get_pg_to_pri_map(struct hclge_dev *hdev, u8 pg_id, -- cgit v1.2.3 From b112166a894db446f47a8c31781b037f28ac1721 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 28 Oct 2021 19:08:57 +0200 Subject: MAINTAINERS: dri-devel is for all of drivers/gpu Somehow we only have a list of subdirectories, which apparently made it harder for folks to find the gpu maintainers. Fix that. References: https://lore.kernel.org/dri-devel/YXrAAZlxxStNFG%2FK@phenom.ffwll.local/ Signed-off-by: Daniel Vetter Cc: David Airlie Cc: Daniel Vetter Cc: Steven Rostedt Reviewed-by: Alex Deucher Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20211028170857.4029606-1-daniel.vetter@ffwll.ch --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index f26920f0fa65..c2fbfc53c749 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6147,8 +6147,7 @@ T: git git://anongit.freedesktop.org/drm/drm F: Documentation/devicetree/bindings/display/ F: Documentation/devicetree/bindings/gpu/ F: Documentation/gpu/ -F: drivers/gpu/drm/ -F: drivers/gpu/vga/ +F: drivers/gpu/ F: include/drm/ F: include/linux/vga* F: include/uapi/drm/ -- cgit v1.2.3 From 90935eb303e0d12f3d3d0383262e65290321f5f6 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 28 Oct 2021 21:51:49 +0200 Subject: mmc: tmio: reenable card irqs after the reset callback The reset callback may clear the internal card detect interrupts, so make sure to reenable them if needed. Fixes: b4d86f37eacb ("mmc: renesas_sdhi: do hard reset if possible") Reported-by: Biju Das Signed-off-by: Wolfram Sang Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211028195149.8003-1-wsa+renesas@sang-engineering.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/tmio_mmc_core.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index 7dfc26f48c18..e2affa52ef46 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -195,6 +195,10 @@ static void tmio_mmc_reset(struct tmio_mmc_host *host) sd_ctrl_write32_as_16_and_16(host, CTL_IRQ_MASK, host->sdcard_irq_mask_all); host->sdcard_irq_mask = host->sdcard_irq_mask_all; + if (host->native_hotplug) + tmio_mmc_enable_mmc_irqs(host, + TMIO_STAT_CARD_REMOVE | TMIO_STAT_CARD_INSERT); + tmio_mmc_set_bus_width(host, host->mmc->ios.bus_width); if (host->pdata->flags & TMIO_MMC_SDIO_IRQ) { @@ -956,8 +960,15 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) case MMC_POWER_OFF: tmio_mmc_power_off(host); /* For R-Car Gen2+, we need to reset SDHI specific SCC */ - if (host->pdata->flags & TMIO_MMC_MIN_RCAR2) + if (host->pdata->flags & TMIO_MMC_MIN_RCAR2) { host->reset(host); + + if (host->native_hotplug) + tmio_mmc_enable_mmc_irqs(host, + TMIO_STAT_CARD_REMOVE | + TMIO_STAT_CARD_INSERT); + } + host->set_clock(host, 0); break; case MMC_POWER_UP: @@ -1185,10 +1196,6 @@ int tmio_mmc_host_probe(struct tmio_mmc_host *_host) _host->set_clock(_host, 0); tmio_mmc_reset(_host); - if (_host->native_hotplug) - tmio_mmc_enable_mmc_irqs(_host, - TMIO_STAT_CARD_REMOVE | TMIO_STAT_CARD_INSERT); - spin_lock_init(&_host->lock); mutex_init(&_host->ios_lock); -- cgit v1.2.3 From 8dcb3060d81dbfa8d954a2ec64ef7ca330f5bb4d Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Thu, 28 Oct 2021 14:36:04 -0700 Subject: memcg: page_alloc: skip bulk allocator for __GFP_ACCOUNT Commit 5c1f4e690eec ("mm/vmalloc: switch to bulk allocator in __vmalloc_area_node()") switched to bulk page allocator for order 0 allocation backing vmalloc. However bulk page allocator does not support __GFP_ACCOUNT allocations and there are several users of kvmalloc(__GFP_ACCOUNT). For now make __GFP_ACCOUNT allocations bypass bulk page allocator. In future if there is workload that can be significantly improved with the bulk page allocator with __GFP_ACCCOUNT support, we can revisit the decision. Link: https://lkml.kernel.org/r/20211014151607.2171970-1-shakeelb@google.com Fixes: 5c1f4e690eec ("mm/vmalloc: switch to bulk allocator in __vmalloc_area_node()") Signed-off-by: Shakeel Butt Reported-by: Vasily Averin Tested-by: Vasily Averin Acked-by: David Hildenbrand Acked-by: Michal Hocko Acked-by: Roman Gushchin Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index b37435c274cf..3ec39552d00f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5223,6 +5223,10 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, if (unlikely(page_array && nr_pages - nr_populated == 0)) goto out; + /* Bulk allocator does not support memcg accounting. */ + if (memcg_kmem_enabled() && (gfp & __GFP_ACCOUNT)) + goto failed; + /* Use the single page allocator for one page. */ if (nr_pages - nr_populated == 1) goto failed; -- cgit v1.2.3 From c7cb42e94473aafe553c0f2a3d8ca904599399ed Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 28 Oct 2021 14:36:07 -0700 Subject: mm: hwpoison: remove the unnecessary THP check When handling THP hwpoison checked if the THP is in allocation or free stage since hwpoison may mistreat it as hugetlb page. After commit 415c64c1453a ("mm/memory-failure: split thp earlier in memory error handling") the problem has been fixed, so this check is no longer needed. Remove it. The side effect of the removal is hwpoison may report unsplit THP instead of unknown error for shmem THP. It seems not like a big deal. The following patch "mm: filemap: check if THP has hwpoisoned subpage for PMD page fault" depends on this, which fixes shmem THP with hwpoisoned subpage(s) are mapped PMD wrongly. So this patch needs to be backported to -stable as well. Link: https://lkml.kernel.org/r/20211020210755.23964-2-shy828301@gmail.com Signed-off-by: Yang Shi Suggested-by: Naoya Horiguchi Acked-by: Naoya Horiguchi Cc: Hugh Dickins Cc: Kirill A. Shutemov Cc: Matthew Wilcox Cc: Oscar Salvador Cc: Peter Xu Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 3e6449f2102a..73f68699e7ab 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1147,20 +1147,6 @@ static int __get_hwpoison_page(struct page *page) if (!HWPoisonHandlable(head)) return -EBUSY; - if (PageTransHuge(head)) { - /* - * Non anonymous thp exists only in allocation/free time. We - * can't handle such a case correctly, so let's give it up. - * This should be better than triggering BUG_ON when kernel - * tries to touch the "partially handled" page. - */ - if (!PageAnon(head)) { - pr_err("Memory failure: %#lx: non anonymous thp\n", - page_to_pfn(page)); - return 0; - } - } - if (get_page_unless_zero(head)) { if (head == compound_head(page)) return 1; -- cgit v1.2.3 From eac96c3efdb593df1a57bb5b95dbe037bfa9a522 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 28 Oct 2021 14:36:11 -0700 Subject: mm: filemap: check if THP has hwpoisoned subpage for PMD page fault When handling shmem page fault the THP with corrupted subpage could be PMD mapped if certain conditions are satisfied. But kernel is supposed to send SIGBUS when trying to map hwpoisoned page. There are two paths which may do PMD map: fault around and regular fault. Before commit f9ce0be71d1f ("mm: Cleanup faultaround and finish_fault() codepaths") the thing was even worse in fault around path. The THP could be PMD mapped as long as the VMA fits regardless what subpage is accessed and corrupted. After this commit as long as head page is not corrupted the THP could be PMD mapped. In the regular fault path the THP could be PMD mapped as long as the corrupted page is not accessed and the VMA fits. This loophole could be fixed by iterating every subpage to check if any of them is hwpoisoned or not, but it is somewhat costly in page fault path. So introduce a new page flag called HasHWPoisoned on the first tail page. It indicates the THP has hwpoisoned subpage(s). It is set if any subpage of THP is found hwpoisoned by memory failure and after the refcount is bumped successfully, then cleared when the THP is freed or split. The soft offline path doesn't need this since soft offline handler just marks a subpage hwpoisoned when the subpage is migrated successfully. But shmem THP didn't get split then migrated at all. Link: https://lkml.kernel.org/r/20211020210755.23964-3-shy828301@gmail.com Fixes: 800d8c63b2e9 ("shmem: add huge pages support") Signed-off-by: Yang Shi Reviewed-by: Naoya Horiguchi Suggested-by: Kirill A. Shutemov Cc: Hugh Dickins Cc: Matthew Wilcox Cc: Oscar Salvador Cc: Peter Xu Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 23 +++++++++++++++++++++++ mm/huge_memory.c | 2 ++ mm/memory-failure.c | 14 ++++++++++++++ mm/memory.c | 9 +++++++++ mm/page_alloc.c | 4 +++- 5 files changed, 51 insertions(+), 1 deletion(-) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index a558d67ee86f..fbfd3fad48f2 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -171,6 +171,15 @@ enum pageflags { /* Compound pages. Stored in first tail page's flags */ PG_double_map = PG_workingset, +#ifdef CONFIG_MEMORY_FAILURE + /* + * Compound pages. Stored in first tail page's flags. + * Indicates that at least one subpage is hwpoisoned in the + * THP. + */ + PG_has_hwpoisoned = PG_mappedtodisk, +#endif + /* non-lru isolated movable page */ PG_isolated = PG_reclaim, @@ -668,6 +677,20 @@ PAGEFLAG_FALSE(DoubleMap) TESTSCFLAG_FALSE(DoubleMap) #endif +#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_TRANSPARENT_HUGEPAGE) +/* + * PageHasHWPoisoned indicates that at least one subpage is hwpoisoned in the + * compound page. + * + * This flag is set by hwpoison handler. Cleared by THP split or free page. + */ +PAGEFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND) + TESTSCFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND) +#else +PAGEFLAG_FALSE(HasHWPoisoned) + TESTSCFLAG_FALSE(HasHWPoisoned) +#endif + /* * Check if a page is currently marked HWPoisoned. Note that this check is * best effort only and inherently racy: there is no way to synchronize with diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 92192cb086c7..c5142d237e48 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2426,6 +2426,8 @@ static void __split_huge_page(struct page *page, struct list_head *list, /* lock lru list/PageCompound, ref frozen by page_ref_freeze */ lruvec = lock_page_lruvec(head); + ClearPageHasHWPoisoned(head); + for (i = nr - 1; i >= 1; i--) { __split_huge_page_tail(head, i, lruvec, list); /* Some pages can be beyond EOF: drop them from page cache */ diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 73f68699e7ab..bdbbb32211a5 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1694,6 +1694,20 @@ try_again: } if (PageTransHuge(hpage)) { + /* + * The flag must be set after the refcount is bumped + * otherwise it may race with THP split. + * And the flag can't be set in get_hwpoison_page() since + * it is called by soft offline too and it is just called + * for !MF_COUNT_INCREASE. So here seems to be the best + * place. + * + * Don't need care about the above error handling paths for + * get_hwpoison_page() since they handle either free page + * or unhandlable page. The refcount is bumped iff the + * page is a valid handlable page. + */ + SetPageHasHWPoisoned(hpage); if (try_to_split_thp_page(p, "Memory Failure") < 0) { action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED); res = -EBUSY; diff --git a/mm/memory.c b/mm/memory.c index adf9b9ef8277..c52be6d6b605 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3906,6 +3906,15 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) if (compound_order(page) != HPAGE_PMD_ORDER) return ret; + /* + * Just backoff if any subpage of a THP is corrupted otherwise + * the corrupted page may mapped by PMD silently to escape the + * check. This kind of THP just can be PTE mapped. Access to + * the corrupted subpage should trigger SIGBUS as expected. + */ + if (unlikely(PageHasHWPoisoned(page))) + return ret; + /* * Archs like ppc64 need additional space to store information * related to pte entry. Use the preallocated table for that. diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3ec39552d00f..23d3339ac4e8 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1312,8 +1312,10 @@ static __always_inline bool free_pages_prepare(struct page *page, VM_BUG_ON_PAGE(compound && compound_order(page) != order, page); - if (compound) + if (compound) { ClearPageDoubleMap(page); + ClearPageHasHWPoisoned(page); + } for (i = 1; i < (1 << order); i++) { if (compound) bad += free_tail_pages_check(page, page + i); -- cgit v1.2.3 From 337546e83fc7e50917f44846beee936abb9c9f1f Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 28 Oct 2021 14:36:14 -0700 Subject: mm/oom_kill.c: prevent a race between process_mrelease and exit_mmap Race between process_mrelease and exit_mmap, where free_pgtables is called while __oom_reap_task_mm is in progress, leads to kernel crash during pte_offset_map_lock call. oom-reaper avoids this race by setting MMF_OOM_VICTIM flag and causing exit_mmap to take and release mmap_write_lock, blocking it until oom-reaper releases mmap_read_lock. Reusing MMF_OOM_VICTIM for process_mrelease would be the simplest way to fix this race, however that would be considered a hack. Fix this race by elevating mm->mm_users and preventing exit_mmap from executing until process_mrelease is finished. Patch slightly refactors the code to adapt for a possible mmget_not_zero failure. This fix has considerable negative impact on process_mrelease performance and will likely need later optimization. Link: https://lkml.kernel.org/r/20211022014658.263508-1-surenb@google.com Fixes: 884a7e5964e0 ("mm: introduce process_mrelease system call") Signed-off-by: Suren Baghdasaryan Acked-by: Michal Hocko Cc: David Rientjes Cc: Matthew Wilcox (Oracle) Cc: Johannes Weiner Cc: Roman Gushchin Cc: Rik van Riel Cc: Minchan Kim Cc: Christian Brauner Cc: Christoph Hellwig Cc: Oleg Nesterov Cc: David Hildenbrand Cc: Jann Horn Cc: Shakeel Butt Cc: Andy Lutomirski Cc: Christian Brauner Cc: Florian Weimer Cc: Jan Engelhardt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/oom_kill.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 831340e7ad8b..989f35a2bbb1 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -1150,7 +1150,7 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags) struct task_struct *task; struct task_struct *p; unsigned int f_flags; - bool reap = true; + bool reap = false; struct pid *pid; long ret = 0; @@ -1177,15 +1177,15 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags) goto put_task; } - mm = p->mm; - mmgrab(mm); - - /* If the work has been done already, just exit with success */ - if (test_bit(MMF_OOM_SKIP, &mm->flags)) - reap = false; - else if (!task_will_free_mem(p)) { - reap = false; - ret = -EINVAL; + if (mmget_not_zero(p->mm)) { + mm = p->mm; + if (task_will_free_mem(p)) + reap = true; + else { + /* Error only if the work has not been done already */ + if (!test_bit(MMF_OOM_SKIP, &mm->flags)) + ret = -EINVAL; + } } task_unlock(p); @@ -1201,7 +1201,8 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags) mmap_read_unlock(mm); drop_mm: - mmdrop(mm); + if (mm) + mmput(mm); put_task: put_task_struct(task); put_pid: -- cgit v1.2.3 From 6f1b228529ae49b0f85ab89bcdb6c365df401558 Mon Sep 17 00:00:00 2001 From: Gautham Ananthakrishna Date: Thu, 28 Oct 2021 14:36:17 -0700 Subject: ocfs2: fix race between searching chunks and release journal_head from buffer_head Encountered a race between ocfs2_test_bg_bit_allocatable() and jbd2_journal_put_journal_head() resulting in the below vmcore. PID: 106879 TASK: ffff880244ba9c00 CPU: 2 COMMAND: "loop3" Call trace: panic oops_end no_context __bad_area_nosemaphore bad_area_nosemaphore __do_page_fault do_page_fault page_fault [exception RIP: ocfs2_block_group_find_clear_bits+316] ocfs2_block_group_find_clear_bits [ocfs2] ocfs2_cluster_group_search [ocfs2] ocfs2_search_chain [ocfs2] ocfs2_claim_suballoc_bits [ocfs2] __ocfs2_claim_clusters [ocfs2] ocfs2_claim_clusters [ocfs2] ocfs2_local_alloc_slide_window [ocfs2] ocfs2_reserve_local_alloc_bits [ocfs2] ocfs2_reserve_clusters_with_limit [ocfs2] ocfs2_reserve_clusters [ocfs2] ocfs2_lock_refcount_allocators [ocfs2] ocfs2_make_clusters_writable [ocfs2] ocfs2_replace_cow [ocfs2] ocfs2_refcount_cow [ocfs2] ocfs2_file_write_iter [ocfs2] lo_rw_aio loop_queue_work kthread_worker_fn kthread ret_from_fork When ocfs2_test_bg_bit_allocatable() called bh2jh(bg_bh), the bg_bh->b_private NULL as jbd2_journal_put_journal_head() raced and released the jounal head from the buffer head. Needed to take bit lock for the bit 'BH_JournalHead' to fix this race. Link: https://lkml.kernel.org/r/1634820718-6043-1-git-send-email-gautham.ananthakrishna@oracle.com Signed-off-by: Gautham Ananthakrishna Reviewed-by: Joseph Qi Cc: Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/suballoc.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 8521942f5af2..481017e1dac5 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -1251,7 +1251,7 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, { struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; struct journal_head *jh; - int ret; + int ret = 1; if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap)) return 0; @@ -1259,14 +1259,18 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, if (!buffer_jbd(bg_bh)) return 1; - jh = bh2jh(bg_bh); - spin_lock(&jh->b_state_lock); - bg = (struct ocfs2_group_desc *) jh->b_committed_data; - if (bg) - ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); - else - ret = 1; - spin_unlock(&jh->b_state_lock); + jbd_lock_bh_journal_head(bg_bh); + if (buffer_jbd(bg_bh)) { + jh = bh2jh(bg_bh); + spin_lock(&jh->b_state_lock); + bg = (struct ocfs2_group_desc *) jh->b_committed_data; + if (bg) + ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); + else + ret = 1; + spin_unlock(&jh->b_state_lock); + } + jbd_unlock_bh_journal_head(bg_bh); return ret; } -- cgit v1.2.3 From 855d44434fa24d5344c1cb0edb38723f891cd415 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 28 Oct 2021 14:36:21 -0700 Subject: mm/secretmem: avoid letting secretmem_users drop to zero Quoting Dmitry: "refcount_inc() needs to be done before fd_install(). After fd_install() finishes, the fd can be used by userspace and we can have secret data in memory before the refcount_inc(). A straightforward misuse where a user will predict the returned fd in another thread before the syscall returns and will use it to store secret data is somewhat dubious because such a user just shoots themself in the foot. But a more interesting misuse would be to close the predicted fd and decrement the refcount before the corresponding refcount_inc, this way one can briefly drop the refcount to zero while there are other users of secretmem." Move fd_install() after refcount_inc(). Link: https://lkml.kernel.org/r/20211021154046.880251-1-keescook@chromium.org Link: https://lore.kernel.org/lkml/CACT4Y+b1sW6-Hkn8HQYw_SsT7X3tp-CJNh2ci0wG3ZnQz9jjig@mail.gmail.com Fixes: 9a436f8ff631 ("PM: hibernate: disable when there are active secretmem users") Signed-off-by: Kees Cook Reported-by: Dmitry Vyukov Reviewed-by: Dmitry Vyukov Reviewed-by: David Hildenbrand Reviewed-by: Jordy Zomer Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/secretmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/secretmem.c b/mm/secretmem.c index c2dda408bb36..22b310adb53d 100644 --- a/mm/secretmem.c +++ b/mm/secretmem.c @@ -218,8 +218,8 @@ SYSCALL_DEFINE1(memfd_secret, unsigned int, flags) file->f_flags |= O_LARGEFILE; - fd_install(fd, file); atomic_inc(&secretmem_users); + fd_install(fd, file); return fd; err_put_fd: -- cgit v1.2.3 From ffb29b1c255ab48cb0062a3d11c101501e3e9b3f Mon Sep 17 00:00:00 2001 From: Chen Wandun Date: Thu, 28 Oct 2021 14:36:24 -0700 Subject: mm/vmalloc: fix numa spreading for large hash tables Eric Dumazet reported a strange numa spreading info in [1], and found commit 121e6f3258fe ("mm/vmalloc: hugepage vmalloc mappings") introduced this issue [2]. Dig into the difference before and after this patch, page allocation has some difference: before: alloc_large_system_hash __vmalloc __vmalloc_node(..., NUMA_NO_NODE, ...) __vmalloc_node_range __vmalloc_area_node alloc_page /* because NUMA_NO_NODE, so choose alloc_page branch */ alloc_pages_current alloc_page_interleave /* can be proved by print policy mode */ after: alloc_large_system_hash __vmalloc __vmalloc_node(..., NUMA_NO_NODE, ...) __vmalloc_node_range __vmalloc_area_node alloc_pages_node /* choose nid by nuam_mem_id() */ __alloc_pages_node(nid, ....) So after commit 121e6f3258fe ("mm/vmalloc: hugepage vmalloc mappings"), it will allocate memory in current node instead of interleaving allocate memory. Link: https://lore.kernel.org/linux-mm/CANn89iL6AAyWhfxdHO+jaT075iOa3XcYn9k6JJc7JR2XYn6k_Q@mail.gmail.com/ [1] Link: https://lore.kernel.org/linux-mm/CANn89iLofTR=AK-QOZY87RdUZENCZUT4O6a0hvhu3_EwRMerOg@mail.gmail.com/ [2] Link: https://lkml.kernel.org/r/20211021080744.874701-2-chenwandun@huawei.com Fixes: 121e6f3258fe ("mm/vmalloc: hugepage vmalloc mappings") Signed-off-by: Chen Wandun Reported-by: Eric Dumazet Cc: Shakeel Butt Cc: Nicholas Piggin Cc: Kefeng Wang Cc: Hanjun Guo Cc: Uladzislau Rezki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index d77830ff604c..e8a807c78110 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2816,6 +2816,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid, unsigned int order, unsigned int nr_pages, struct page **pages) { unsigned int nr_allocated = 0; + struct page *page; + int i; /* * For order-0 pages we make use of bulk allocator, if @@ -2823,7 +2825,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid, * to fails, fallback to a single page allocator that is * more permissive. */ - if (!order) { + if (!order && nid != NUMA_NO_NODE) { while (nr_allocated < nr_pages) { unsigned int nr, nr_pages_request; @@ -2848,7 +2850,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid, if (nr != nr_pages_request) break; } - } else + } else if (order) /* * Compound pages required for remap_vmalloc_page if * high-order pages. @@ -2856,11 +2858,12 @@ vm_area_alloc_pages(gfp_t gfp, int nid, gfp |= __GFP_COMP; /* High-order pages or fallback path if "bulk" fails. */ - while (nr_allocated < nr_pages) { - struct page *page; - int i; - page = alloc_pages_node(nid, gfp, order); + while (nr_allocated < nr_pages) { + if (nid == NUMA_NO_NODE) + page = alloc_pages(gfp, order); + else + page = alloc_pages_node(nid, gfp, order); if (unlikely(!page)) break; -- cgit v1.2.3 From 74c42e1baacf206338b1dd6b6199ac964512b5bb Mon Sep 17 00:00:00 2001 From: Rongwei Wang Date: Thu, 28 Oct 2021 14:36:27 -0700 Subject: mm, thp: bail out early in collapse_file for writeback page Currently collapse_file does not explicitly check PG_writeback, instead, page_has_private and try_to_release_page are used to filter writeback pages. This does not work for xfs with blocksize equal to or larger than pagesize, because in such case xfs has no page->private. This makes collapse_file bail out early for writeback page. Otherwise, xfs end_page_writeback will panic as follows. page:fffffe00201bcc80 refcount:0 mapcount:0 mapping:ffff0003f88c86a8 index:0x0 pfn:0x84ef32 aops:xfs_address_space_operations [xfs] ino:30000b7 dentry name:"libtest.so" flags: 0x57fffe0000008027(locked|referenced|uptodate|active|writeback) raw: 57fffe0000008027 ffff80001b48bc28 ffff80001b48bc28 ffff0003f88c86a8 raw: 0000000000000000 0000000000000000 00000000ffffffff ffff0000c3e9a000 page dumped because: VM_BUG_ON_PAGE(((unsigned int) page_ref_count(page) + 127u <= 127u)) page->mem_cgroup:ffff0000c3e9a000 ------------[ cut here ]------------ kernel BUG at include/linux/mm.h:1212! Internal error: Oops - BUG: 0 [#1] SMP Modules linked in: BUG: Bad page state in process khugepaged pfn:84ef32 xfs(E) page:fffffe00201bcc80 refcount:0 mapcount:0 mapping:0 index:0x0 pfn:0x84ef32 libcrc32c(E) rfkill(E) aes_ce_blk(E) crypto_simd(E) ... CPU: 25 PID: 0 Comm: swapper/25 Kdump: loaded Tainted: ... pstate: 60400005 (nZCv daif +PAN -UAO -TCO BTYPE=--) Call trace: end_page_writeback+0x1c0/0x214 iomap_finish_page_writeback+0x13c/0x204 iomap_finish_ioend+0xe8/0x19c iomap_writepage_end_bio+0x38/0x50 bio_endio+0x168/0x1ec blk_update_request+0x278/0x3f0 blk_mq_end_request+0x34/0x15c virtblk_request_done+0x38/0x74 [virtio_blk] blk_done_softirq+0xc4/0x110 __do_softirq+0x128/0x38c __irq_exit_rcu+0x118/0x150 irq_exit+0x1c/0x30 __handle_domain_irq+0x8c/0xf0 gic_handle_irq+0x84/0x108 el1_irq+0xcc/0x180 arch_cpu_idle+0x18/0x40 default_idle_call+0x4c/0x1a0 cpuidle_idle_call+0x168/0x1e0 do_idle+0xb4/0x104 cpu_startup_entry+0x30/0x9c secondary_start_kernel+0x104/0x180 Code: d4210000 b0006161 910c8021 94013f4d (d4210000) ---[ end trace 4a88c6a074082f8c ]--- Kernel panic - not syncing: Oops - BUG: Fatal exception in interrupt Link: https://lkml.kernel.org/r/20211022023052.33114-1-rongwei.wang@linux.alibaba.com Fixes: 99cb0dbd47a1 ("mm,thp: add read-only THP support for (non-shmem) FS") Signed-off-by: Rongwei Wang Signed-off-by: Xu Yu Suggested-by: Yang Shi Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: Yang Shi Acked-by: Kirill A. Shutemov Cc: Song Liu Cc: William Kucharski Cc: Hugh Dickins Cc: Mike Kravetz Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/khugepaged.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 045cc579f724..48de4e1b0783 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1763,6 +1763,10 @@ static void collapse_file(struct mm_struct *mm, filemap_flush(mapping); result = SCAN_FAIL; goto xa_unlocked; + } else if (PageWriteback(page)) { + xas_unlock_irq(&xas); + result = SCAN_FAIL; + goto xa_unlocked; } else if (trylock_page(page)) { get_page(page); xas_unlock_irq(&xas); @@ -1798,7 +1802,8 @@ static void collapse_file(struct mm_struct *mm, goto out_unlock; } - if (!is_shmem && PageDirty(page)) { + if (!is_shmem && (PageDirty(page) || + PageWriteback(page))) { /* * khugepaged only works on read-only fd, so this * page is dirty because it hasn't been flushed -- cgit v1.2.3 From a4aeaa06d45e90f9b279f0b09de84bd00006e733 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 28 Oct 2021 14:36:30 -0700 Subject: mm: khugepaged: skip huge page collapse for special files The read-only THP for filesystems will collapse THP for files opened readonly and mapped with VM_EXEC. The intended usecase is to avoid TLB misses for large text segments. But it doesn't restrict the file types so a THP could be collapsed for a non-regular file, for example, block device, if it is opened readonly and mapped with EXEC permission. This may cause bugs, like [1] and [2]. This is definitely not the intended usecase, so just collapse THP for regular files in order to close the attack surface. [shy828301@gmail.com: fix vm_file check [3]] Link: https://lore.kernel.org/lkml/CACkBjsYwLYLRmX8GpsDpMthagWOjWWrNxqY6ZLNQVr6yx+f5vA@mail.gmail.com/ [1] Link: https://lore.kernel.org/linux-mm/000000000000c6a82505ce284e4c@google.com/ [2] Link: https://lkml.kernel.org/r/CAHbLzkqTW9U3VvTu1Ki5v_cLRC9gHW+znBukg_ycergE0JWj-A@mail.gmail.com [3] Link: https://lkml.kernel.org/r/20211027195221.3825-1-shy828301@gmail.com Fixes: 99cb0dbd47a1 ("mm,thp: add read-only THP support for (non-shmem) FS") Signed-off-by: Hugh Dickins Signed-off-by: Yang Shi Reported-by: Hao Sun Reported-by: syzbot+aae069be1de40fb11825@syzkaller.appspotmail.com Cc: Matthew Wilcox Cc: Kirill A. Shutemov Cc: Song Liu Cc: Andrea Righi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/khugepaged.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 48de4e1b0783..8a8b3aa92937 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -445,22 +445,25 @@ static bool hugepage_vma_check(struct vm_area_struct *vma, if (!transhuge_vma_enabled(vma, vm_flags)) return false; + if (vma->vm_file && !IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - + vma->vm_pgoff, HPAGE_PMD_NR)) + return false; + /* Enabled via shmem mount options or sysfs settings. */ - if (shmem_file(vma->vm_file) && shmem_huge_enabled(vma)) { - return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff, - HPAGE_PMD_NR); - } + if (shmem_file(vma->vm_file)) + return shmem_huge_enabled(vma); /* THP settings require madvise. */ if (!(vm_flags & VM_HUGEPAGE) && !khugepaged_always()) return false; - /* Read-only file mappings need to be aligned for THP to work. */ + /* Only regular file is valid */ if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && vma->vm_file && - !inode_is_open_for_write(vma->vm_file->f_inode) && (vm_flags & VM_EXEC)) { - return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff, - HPAGE_PMD_NR); + struct inode *inode = vma->vm_file->f_inode; + + return !inode_is_open_for_write(inode) && + S_ISREG(inode->i_mode); } if (!vma->anon_vma || vma->vm_ops) -- cgit v1.2.3 From 2e014660b3e4b7bd0e75f845cdcf745c0f632889 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 28 Oct 2021 14:36:33 -0700 Subject: mm/damon/core-test: fix wrong expectations for 'damon_split_regions_of()' Kunit test cases for 'damon_split_regions_of()' expects the number of regions after calling the function will be same to their request ('nr_sub'). However, the requested number is just an upper-limit, because the function randomly decides the size of each sub-region. This fixes the wrong expectation. Link: https://lkml.kernel.org/r/20211028090628.14948-1-sj@kernel.org Fixes: 17ccae8bb5c9 ("mm/damon: add kunit tests") Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/damon/core-test.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/damon/core-test.h b/mm/damon/core-test.h index c938a9c34e6c..7008c3735e99 100644 --- a/mm/damon/core-test.h +++ b/mm/damon/core-test.h @@ -219,14 +219,14 @@ static void damon_test_split_regions_of(struct kunit *test) r = damon_new_region(0, 22); damon_add_region(r, t); damon_split_regions_of(c, t, 2); - KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 2u); + KUNIT_EXPECT_LE(test, damon_nr_regions(t), 2u); damon_free_target(t); t = damon_new_target(42); r = damon_new_region(0, 220); damon_add_region(r, t); damon_split_regions_of(c, t, 4); - KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 4u); + KUNIT_EXPECT_LE(test, damon_nr_regions(t), 4u); damon_free_target(t); damon_destroy_ctx(c); } -- cgit v1.2.3 From 9c7516d669e68e94e17203469a873ff5d1d3a41a Mon Sep 17 00:00:00 2001 From: David Yang Date: Thu, 28 Oct 2021 14:36:36 -0700 Subject: tools/testing/selftests/vm/split_huge_page_test.c: fix application of sizeof to pointer The coccinelle check report: ./tools/testing/selftests/vm/split_huge_page_test.c:344:36-42: ERROR: application of sizeof to pointer Use "strlen" to fix it. Link: https://lkml.kernel.org/r/20211012030116.184027-1-davidcomponentone@gmail.com Signed-off-by: David Yang Reported-by: Zeal Robot Cc: Zi Yan Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/split_huge_page_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/vm/split_huge_page_test.c b/tools/testing/selftests/vm/split_huge_page_test.c index 1af16d2c2a0a..52497b7b9f1d 100644 --- a/tools/testing/selftests/vm/split_huge_page_test.c +++ b/tools/testing/selftests/vm/split_huge_page_test.c @@ -341,7 +341,7 @@ void split_file_backed_thp(void) } /* write something to the file, so a file-backed THP can be allocated */ - num_written = write(fd, tmpfs_loc, sizeof(tmpfs_loc)); + num_written = write(fd, tmpfs_loc, strlen(tmpfs_loc) + 1); close(fd); if (num_written < 1) { -- cgit v1.2.3 From 56ee254d23c59fd48fb6f192d79858f60cfe2bf6 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 27 Oct 2021 10:42:27 +0200 Subject: Revert "btrfs: compression: drop kmap/kunmap from zstd" This reverts commit bbaf9715f3f5b5ff0de71da91fcc34ee9c198ed8. The kmaps in compression code are still needed and cause crashes on 32bit machines (ARM, x86). Reproducible eg. by running fstest btrfs/004 with enabled LZO or ZSTD compression. Example stacktrace with ZSTD on a 32bit ARM machine: Unable to handle kernel NULL pointer dereference at virtual address 00000000 pgd = c4159ed3 [00000000] *pgd=00000000 Internal error: Oops: 5 [#1] PREEMPT SMP ARM Modules linked in: CPU: 0 PID: 210 Comm: kworker/u2:3 Not tainted 5.14.0-rc79+ #12 Hardware name: Allwinner sun4i/sun5i Families Workqueue: btrfs-delalloc btrfs_work_helper PC is at mmiocpy+0x48/0x330 LR is at ZSTD_compressStream_generic+0x15c/0x28c (mmiocpy) from [] (ZSTD_compressStream_generic+0x15c/0x28c) (ZSTD_compressStream_generic) from [] (ZSTD_compressStream+0x64/0xa0) (ZSTD_compressStream) from [] (zstd_compress_pages+0x170/0x488) (zstd_compress_pages) from [] (btrfs_compress_pages+0x124/0x12c) (btrfs_compress_pages) from [] (compress_file_range+0x3c0/0x834) (compress_file_range) from [] (async_cow_start+0x10/0x28) (async_cow_start) from [] (btrfs_work_helper+0x100/0x230) (btrfs_work_helper) from [] (process_one_work+0x1b4/0x418) (process_one_work) from [] (worker_thread+0x44/0x524) (worker_thread) from [] (kthread+0x180/0x1b0) (kthread) from [] Link: https://lore.kernel.org/all/CAJCQCtT+OuemovPO7GZk8Y8=qtOObr0XTDp8jh4OHD6y84AFxw@mail.gmail.com/ Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=214839 Signed-off-by: David Sterba --- fs/btrfs/zstd.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c index 56dce9f00988..f06b68040352 100644 --- a/fs/btrfs/zstd.c +++ b/fs/btrfs/zstd.c @@ -399,7 +399,7 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping, /* map in the first page of input data */ in_page = find_get_page(mapping, start >> PAGE_SHIFT); - workspace->in_buf.src = page_address(in_page); + workspace->in_buf.src = kmap(in_page); workspace->in_buf.pos = 0; workspace->in_buf.size = min_t(size_t, len, PAGE_SIZE); @@ -411,7 +411,7 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping, goto out; } pages[nr_pages++] = out_page; - workspace->out_buf.dst = page_address(out_page); + workspace->out_buf.dst = kmap(out_page); workspace->out_buf.pos = 0; workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE); @@ -446,6 +446,7 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping, if (workspace->out_buf.pos == workspace->out_buf.size) { tot_out += PAGE_SIZE; max_out -= PAGE_SIZE; + kunmap(out_page); if (nr_pages == nr_dest_pages) { out_page = NULL; ret = -E2BIG; @@ -457,7 +458,7 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping, goto out; } pages[nr_pages++] = out_page; - workspace->out_buf.dst = page_address(out_page); + workspace->out_buf.dst = kmap(out_page); workspace->out_buf.pos = 0; workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE); @@ -472,12 +473,13 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping, /* Check if we need more input */ if (workspace->in_buf.pos == workspace->in_buf.size) { tot_in += PAGE_SIZE; + kunmap(in_page); put_page(in_page); start += PAGE_SIZE; len -= PAGE_SIZE; in_page = find_get_page(mapping, start >> PAGE_SHIFT); - workspace->in_buf.src = page_address(in_page); + workspace->in_buf.src = kmap(in_page); workspace->in_buf.pos = 0; workspace->in_buf.size = min_t(size_t, len, PAGE_SIZE); } @@ -504,6 +506,7 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping, tot_out += PAGE_SIZE; max_out -= PAGE_SIZE; + kunmap(out_page); if (nr_pages == nr_dest_pages) { out_page = NULL; ret = -E2BIG; @@ -515,7 +518,7 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping, goto out; } pages[nr_pages++] = out_page; - workspace->out_buf.dst = page_address(out_page); + workspace->out_buf.dst = kmap(out_page); workspace->out_buf.pos = 0; workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE); } @@ -531,8 +534,12 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping, out: *out_pages = nr_pages; /* Cleanup */ - if (in_page) + if (in_page) { + kunmap(in_page); put_page(in_page); + } + if (out_page) + kunmap(out_page); return ret; } @@ -556,7 +563,7 @@ int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb) goto done; } - workspace->in_buf.src = page_address(pages_in[page_in_index]); + workspace->in_buf.src = kmap(pages_in[page_in_index]); workspace->in_buf.pos = 0; workspace->in_buf.size = min_t(size_t, srclen, PAGE_SIZE); @@ -592,14 +599,14 @@ int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb) break; if (workspace->in_buf.pos == workspace->in_buf.size) { - page_in_index++; + kunmap(pages_in[page_in_index++]); if (page_in_index >= total_pages_in) { workspace->in_buf.src = NULL; ret = -EIO; goto done; } srclen -= PAGE_SIZE; - workspace->in_buf.src = page_address(pages_in[page_in_index]); + workspace->in_buf.src = kmap(pages_in[page_in_index]); workspace->in_buf.pos = 0; workspace->in_buf.size = min_t(size_t, srclen, PAGE_SIZE); } @@ -607,6 +614,8 @@ int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb) ret = 0; zero_fill_bio(cb->orig_bio); done: + if (workspace->in_buf.src) + kunmap(pages_in[page_in_index]); return ret; } -- cgit v1.2.3 From 55276e14df4324ade34583adef110e11d249fb7b Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 27 Oct 2021 10:42:43 +0200 Subject: Revert "btrfs: compression: drop kmap/kunmap from zlib" This reverts commit 696ab562e6df9fbafd6052d8ce4aafcb2ed16069. The kmaps in compression code are still needed and cause crashes on 32bit machines (ARM, x86). Reproducible eg. by running fstest btrfs/004 with enabled LZO or ZSTD compression. Link: https://lore.kernel.org/all/CAJCQCtT+OuemovPO7GZk8Y8=qtOObr0XTDp8jh4OHD6y84AFxw@mail.gmail.com/ Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=214839 Signed-off-by: David Sterba --- fs/btrfs/zlib.c | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index 8afa90074891..767a0c6c9694 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c @@ -126,7 +126,7 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping, ret = -ENOMEM; goto out; } - cpage_out = page_address(out_page); + cpage_out = kmap(out_page); pages[0] = out_page; nr_pages = 1; @@ -148,22 +148,26 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping, int i; for (i = 0; i < in_buf_pages; i++) { - if (in_page) + if (in_page) { + kunmap(in_page); put_page(in_page); + } in_page = find_get_page(mapping, start >> PAGE_SHIFT); - data_in = page_address(in_page); + data_in = kmap(in_page); memcpy(workspace->buf + i * PAGE_SIZE, data_in, PAGE_SIZE); start += PAGE_SIZE; } workspace->strm.next_in = workspace->buf; } else { - if (in_page) + if (in_page) { + kunmap(in_page); put_page(in_page); + } in_page = find_get_page(mapping, start >> PAGE_SHIFT); - data_in = page_address(in_page); + data_in = kmap(in_page); start += PAGE_SIZE; workspace->strm.next_in = data_in; } @@ -192,6 +196,7 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping, * the stream end if required */ if (workspace->strm.avail_out == 0) { + kunmap(out_page); if (nr_pages == nr_dest_pages) { out_page = NULL; ret = -E2BIG; @@ -202,7 +207,7 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping, ret = -ENOMEM; goto out; } - cpage_out = page_address(out_page); + cpage_out = kmap(out_page); pages[nr_pages] = out_page; nr_pages++; workspace->strm.avail_out = PAGE_SIZE; @@ -229,6 +234,7 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping, goto out; } else if (workspace->strm.avail_out == 0) { /* get another page for the stream end */ + kunmap(out_page); if (nr_pages == nr_dest_pages) { out_page = NULL; ret = -E2BIG; @@ -239,7 +245,7 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping, ret = -ENOMEM; goto out; } - cpage_out = page_address(out_page); + cpage_out = kmap(out_page); pages[nr_pages] = out_page; nr_pages++; workspace->strm.avail_out = PAGE_SIZE; @@ -258,8 +264,13 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping, *total_in = workspace->strm.total_in; out: *out_pages = nr_pages; - if (in_page) + if (out_page) + kunmap(out_page); + + if (in_page) { + kunmap(in_page); put_page(in_page); + } return ret; } @@ -276,7 +287,7 @@ int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb) unsigned long buf_start; struct page **pages_in = cb->compressed_pages; - data_in = page_address(pages_in[page_in_index]); + data_in = kmap(pages_in[page_in_index]); workspace->strm.next_in = data_in; workspace->strm.avail_in = min_t(size_t, srclen, PAGE_SIZE); workspace->strm.total_in = 0; @@ -298,6 +309,7 @@ int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb) if (Z_OK != zlib_inflateInit2(&workspace->strm, wbits)) { pr_warn("BTRFS: inflateInit failed\n"); + kunmap(pages_in[page_in_index]); return -EIO; } while (workspace->strm.total_in < srclen) { @@ -324,13 +336,13 @@ int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb) if (workspace->strm.avail_in == 0) { unsigned long tmp; - + kunmap(pages_in[page_in_index]); page_in_index++; if (page_in_index >= total_pages_in) { data_in = NULL; break; } - data_in = page_address(pages_in[page_in_index]); + data_in = kmap(pages_in[page_in_index]); workspace->strm.next_in = data_in; tmp = srclen - workspace->strm.total_in; workspace->strm.avail_in = min(tmp, PAGE_SIZE); @@ -342,6 +354,8 @@ int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb) ret = 0; done: zlib_inflateEnd(&workspace->strm); + if (data_in) + kunmap(pages_in[page_in_index]); if (!ret) zero_fill_bio(cb->orig_bio); return ret; -- cgit v1.2.3 From ccaa66c8dd277ac02f96914168bb7177f7ea8117 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 27 Oct 2021 10:44:21 +0200 Subject: Revert "btrfs: compression: drop kmap/kunmap from lzo" This reverts commit 8c945d32e60427cbc0859cf7045bbe6196bb03d8. The kmaps in compression code are still needed and cause crashes on 32bit machines (ARM, x86). Reproducible eg. by running fstest btrfs/004 with enabled LZO or ZSTD compression. The revert does not apply cleanly due to changes in a6e66e6f8c1b ("btrfs: rework lzo_decompress_bio() to make it subpage compatible") that reworked the page iteration so the revert is done to be equivalent to the original code. Link: https://lore.kernel.org/all/CAJCQCtT+OuemovPO7GZk8Y8=qtOObr0XTDp8jh4OHD6y84AFxw@mail.gmail.com/ Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=214839 Tested-by: Qu Wenruo Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/lzo.c | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c index c25dfd1a8a54..3dbe6eb5fda7 100644 --- a/fs/btrfs/lzo.c +++ b/fs/btrfs/lzo.c @@ -141,7 +141,7 @@ int lzo_compress_pages(struct list_head *ws, struct address_space *mapping, *total_in = 0; in_page = find_get_page(mapping, start >> PAGE_SHIFT); - data_in = page_address(in_page); + data_in = kmap(in_page); /* * store the size of all chunks of compressed data in @@ -152,7 +152,7 @@ int lzo_compress_pages(struct list_head *ws, struct address_space *mapping, ret = -ENOMEM; goto out; } - cpage_out = page_address(out_page); + cpage_out = kmap(out_page); out_offset = LZO_LEN; tot_out = LZO_LEN; pages[0] = out_page; @@ -210,6 +210,7 @@ int lzo_compress_pages(struct list_head *ws, struct address_space *mapping, if (out_len == 0 && tot_in >= len) break; + kunmap(out_page); if (nr_pages == nr_dest_pages) { out_page = NULL; ret = -E2BIG; @@ -221,7 +222,7 @@ int lzo_compress_pages(struct list_head *ws, struct address_space *mapping, ret = -ENOMEM; goto out; } - cpage_out = page_address(out_page); + cpage_out = kmap(out_page); pages[nr_pages++] = out_page; pg_bytes_left = PAGE_SIZE; @@ -243,11 +244,12 @@ int lzo_compress_pages(struct list_head *ws, struct address_space *mapping, break; bytes_left = len - tot_in; + kunmap(in_page); put_page(in_page); start += PAGE_SIZE; in_page = find_get_page(mapping, start >> PAGE_SHIFT); - data_in = page_address(in_page); + data_in = kmap(in_page); in_len = min(bytes_left, PAGE_SIZE); } @@ -257,17 +259,22 @@ int lzo_compress_pages(struct list_head *ws, struct address_space *mapping, } /* store the size of all chunks of compressed data */ - sizes_ptr = page_address(pages[0]); + sizes_ptr = kmap_local_page(pages[0]); write_compress_length(sizes_ptr, tot_out); + kunmap_local(sizes_ptr); ret = 0; *total_out = tot_out; *total_in = tot_in; out: *out_pages = nr_pages; + if (out_page) + kunmap(out_page); - if (in_page) + if (in_page) { + kunmap(in_page); put_page(in_page); + } return ret; } @@ -283,6 +290,7 @@ static void copy_compressed_segment(struct compressed_bio *cb, u32 orig_in = *cur_in; while (*cur_in < orig_in + len) { + char *kaddr; struct page *cur_page; u32 copy_len = min_t(u32, PAGE_SIZE - offset_in_page(*cur_in), orig_in + len - *cur_in); @@ -290,9 +298,11 @@ static void copy_compressed_segment(struct compressed_bio *cb, ASSERT(copy_len); cur_page = cb->compressed_pages[*cur_in / PAGE_SIZE]; + kaddr = kmap(cur_page); memcpy(dest + *cur_in - orig_in, - page_address(cur_page) + offset_in_page(*cur_in), + kaddr + offset_in_page(*cur_in), copy_len); + kunmap(cur_page); *cur_in += copy_len; } @@ -303,6 +313,7 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb) struct workspace *workspace = list_entry(ws, struct workspace, list); const struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb); const u32 sectorsize = fs_info->sectorsize; + char *kaddr; int ret; /* Compressed data length, can be unaligned */ u32 len_in; @@ -311,7 +322,9 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb) /* Bytes decompressed so far */ u32 cur_out = 0; - len_in = read_compress_length(page_address(cb->compressed_pages[0])); + kaddr = kmap(cb->compressed_pages[0]); + len_in = read_compress_length(kaddr); + kunmap(cb->compressed_pages[0]); cur_in += LZO_LEN; /* @@ -344,9 +357,9 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb) ASSERT(cur_in / sectorsize == (cur_in + LZO_LEN - 1) / sectorsize); cur_page = cb->compressed_pages[cur_in / PAGE_SIZE]; + kaddr = kmap(cur_page); ASSERT(cur_page); - seg_len = read_compress_length(page_address(cur_page) + - offset_in_page(cur_in)); + seg_len = read_compress_length(kaddr + offset_in_page(cur_in)); cur_in += LZO_LEN; /* Copy the compressed segment payload into workspace */ @@ -431,7 +444,7 @@ int lzo_decompress(struct list_head *ws, unsigned char *data_in, destlen = min_t(unsigned long, destlen, PAGE_SIZE); bytes = min_t(unsigned long, destlen, out_len - start_byte); - kaddr = page_address(dest_page); + kaddr = kmap_local_page(dest_page); memcpy(kaddr, workspace->buf + start_byte, bytes); /* @@ -441,6 +454,7 @@ int lzo_decompress(struct list_head *ws, unsigned char *data_in, */ if (bytes < destlen) memset(kaddr+bytes, 0, destlen-bytes); + kunmap_local(kaddr); out: return ret; } -- cgit v1.2.3 From 6130722f1114cac4f3ec858763ad8995d5a2e586 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 29 Oct 2021 09:52:23 -0400 Subject: ftrace: Fix kernel-doc formatting issues Some functions had kernel-doc that used a comma instead of a hash to separate the function name from the one line description. Also, the "ftrace_is_dead()" had an incomplete description. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 635fbdc9d589..feebf57c6458 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2208,7 +2208,7 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update) } /** - * ftrace_update_record, set a record that now is tracing or not + * ftrace_update_record - set a record that now is tracing or not * @rec: the record to update * @enable: set to true if the record is tracing, false to force disable * @@ -2221,7 +2221,7 @@ int ftrace_update_record(struct dyn_ftrace *rec, bool enable) } /** - * ftrace_test_record, check if the record has been enabled or not + * ftrace_test_record - check if the record has been enabled or not * @rec: the record to test * @enable: set to true to check if enabled, false if it is disabled * @@ -2574,7 +2574,7 @@ struct ftrace_rec_iter { }; /** - * ftrace_rec_iter_start, start up iterating over traced functions + * ftrace_rec_iter_start - start up iterating over traced functions * * Returns an iterator handle that is used to iterate over all * the records that represent address locations where functions @@ -2605,7 +2605,7 @@ struct ftrace_rec_iter *ftrace_rec_iter_start(void) } /** - * ftrace_rec_iter_next, get the next record to process. + * ftrace_rec_iter_next - get the next record to process. * @iter: The handle to the iterator. * * Returns the next iterator after the given iterator @iter. @@ -2630,7 +2630,7 @@ struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter) } /** - * ftrace_rec_iter_record, get the record at the iterator location + * ftrace_rec_iter_record - get the record at the iterator location * @iter: The current iterator location * * Returns the record that the current @iter is at. @@ -2733,7 +2733,7 @@ static int __ftrace_modify_code(void *data) } /** - * ftrace_run_stop_machine, go back to the stop machine method + * ftrace_run_stop_machine - go back to the stop machine method * @command: The command to tell ftrace what to do * * If an arch needs to fall back to the stop machine method, the @@ -2745,7 +2745,7 @@ void ftrace_run_stop_machine(int command) } /** - * arch_ftrace_update_code, modify the code to trace or not trace + * arch_ftrace_update_code - modify the code to trace or not trace * @command: The command that needs to be done * * Archs can override this function if it does not need to @@ -7525,7 +7525,9 @@ void ftrace_kill(void) } /** - * Test if ftrace is dead or not. + * ftrace_is_dead - Test if ftrace is dead or not. + * + * Returns 1 if ftrace is "dead", zero otherwise. */ int ftrace_is_dead(void) { -- cgit v1.2.3 From ddcf906fe5ed842034b2d995064c1de1dcc7e812 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 29 Oct 2021 09:54:14 -0400 Subject: tracing: Fix misspelling of "missing" My snake instinct was on and I wrote "misssing" instead of "missing". Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_eprobe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index 5c5f208c15d3..928867f527e7 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -904,7 +904,7 @@ static int __trace_eprobe_create(int argc, const char *argv[]) if (IS_ERR(ep)) { ret = PTR_ERR(ep); - /* This must return -ENOMEM or misssing event, else there is a bug */ + /* This must return -ENOMEM or missing event, else there is a bug */ WARN_ON_ONCE(ret != -ENOMEM && ret != -ENODEV); ep = NULL; goto error; -- cgit v1.2.3 From cf11d01135ea1ff7fddb612033e3cb5cde279ff2 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Fri, 29 Oct 2021 06:59:26 +0200 Subject: riscv: Do not re-populate shadow memory with kasan_populate_early_shadow When calling this function, all the shadow memory is already populated with kasan_early_shadow_pte which has PAGE_KERNEL protection. kasan_populate_early_shadow write-protects the mapping of the range of addresses passed in argument in zero_pte_populate, which actually write-protects all the shadow memory mapping since kasan_early_shadow_pte is used for all the shadow memory at this point. And then when using memblock API to populate the shadow memory, the first write access to the kernel stack triggers a trap. This becomes visible with the next commit that contains a fix for asan-stack. We already manually populate all the shadow memory in kasan_early_init and we write-protect kasan_early_shadow_pte at the end of kasan_init which makes the calls to kasan_populate_early_shadow superfluous so we can remove them. Signed-off-by: Alexandre Ghiti Fixes: e178d670f251 ("riscv/kasan: add KASAN_VMALLOC support") Fixes: 8ad8b72721d0 ("riscv: Add KASAN support") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/kasan_init.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index d7189c8714a9..89a8376ce44e 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -172,21 +172,10 @@ void __init kasan_init(void) phys_addr_t p_start, p_end; u64 i; - /* - * Populate all kernel virtual address space with kasan_early_shadow_page - * except for the linear mapping and the modules/kernel/BPF mapping. - */ - kasan_populate_early_shadow((void *)KASAN_SHADOW_START, - (void *)kasan_mem_to_shadow((void *) - VMEMMAP_END)); if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) kasan_shallow_populate( (void *)kasan_mem_to_shadow((void *)VMALLOC_START), (void *)kasan_mem_to_shadow((void *)VMALLOC_END)); - else - kasan_populate_early_shadow( - (void *)kasan_mem_to_shadow((void *)VMALLOC_START), - (void *)kasan_mem_to_shadow((void *)VMALLOC_END)); /* Populate the linear mapping */ for_each_mem_range(i, &p_start, &p_end) { -- cgit v1.2.3 From 54c5639d8f507ebefa814f574cb6f763033a72a5 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Fri, 29 Oct 2021 06:59:27 +0200 Subject: riscv: Fix asan-stack clang build Nathan reported that because KASAN_SHADOW_OFFSET was not defined in Kconfig, it prevents asan-stack from getting disabled with clang even when CONFIG_KASAN_STACK is disabled: fix this by defining the corresponding config. Reported-by: Nathan Chancellor Signed-off-by: Alexandre Ghiti Fixes: 8ad8b72721d0 ("riscv: Add KASAN support") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 6 ++++++ arch/riscv/include/asm/kasan.h | 3 +-- arch/riscv/mm/kasan_init.c | 3 +++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index c3f3fd583e04..6d5b63bd4bd9 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -163,6 +163,12 @@ config PAGE_OFFSET default 0xffffffff80000000 if 64BIT && MAXPHYSMEM_2GB default 0xffffffe000000000 if 64BIT && MAXPHYSMEM_128GB +config KASAN_SHADOW_OFFSET + hex + depends on KASAN_GENERIC + default 0xdfffffc800000000 if 64BIT + default 0xffffffff if 32BIT + config ARCH_FLATMEM_ENABLE def_bool !NUMA diff --git a/arch/riscv/include/asm/kasan.h b/arch/riscv/include/asm/kasan.h index a2b3d9cdbc86..b00f503ec124 100644 --- a/arch/riscv/include/asm/kasan.h +++ b/arch/riscv/include/asm/kasan.h @@ -30,8 +30,7 @@ #define KASAN_SHADOW_SIZE (UL(1) << ((CONFIG_VA_BITS - 1) - KASAN_SHADOW_SCALE_SHIFT)) #define KASAN_SHADOW_START KERN_VIRT_START #define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE) -#define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << \ - (64 - KASAN_SHADOW_SCALE_SHIFT))) +#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) void kasan_init(void); asmlinkage void kasan_early_init(void); diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index 89a8376ce44e..54294f83513d 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -17,6 +17,9 @@ asmlinkage void __init kasan_early_init(void) uintptr_t i; pgd_t *pgd = early_pg_dir + pgd_index(KASAN_SHADOW_START); + BUILD_BUG_ON(KASAN_SHADOW_OFFSET != + KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT))); + for (i = 0; i < PTRS_PER_PTE; ++i) set_pte(kasan_early_shadow_pte + i, mk_pte(virt_to_page(kasan_early_shadow_page), -- cgit v1.2.3 From 61a9f252c1c026f84129a7bfa476e880b75e80eb Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 27 Oct 2021 23:42:02 -0400 Subject: scsi: mpt3sas: Fix reference tag handling for WRITE_INSERT Testing revealed a problem with how the reference tag was handled for a WRITE_INSERT operation. The SCSI_PROT_REF_CHECK flag is not set when the controller is asked to generate the protection information (i.e. not DIX). And as a result the initial reference tag would not be set in the WRITE_INSERT case. Separate handling of the REF_CHECK and REF_INCREMENT flags to align with both the DIX spec and the MPI implementation. Link: https://lore.kernel.org/r/20211028034202.24225-1-martin.petersen@oracle.com Fixes: b3e2c72af1d5 ("scsi: mpt3sas: Use the proper SCSI midlayer interfaces for PI") Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index d383d4a03436..ad1b6c2b37a7 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -5065,9 +5065,12 @@ _scsih_setup_eedp(struct MPT3SAS_ADAPTER *ioc, struct scsi_cmnd *scmd, if (scmd->prot_flags & SCSI_PROT_GUARD_CHECK) eedp_flags |= MPI2_SCSIIO_EEDPFLAGS_CHECK_GUARD; - if (scmd->prot_flags & SCSI_PROT_REF_CHECK) { - eedp_flags |= MPI2_SCSIIO_EEDPFLAGS_INC_PRI_REFTAG | - MPI2_SCSIIO_EEDPFLAGS_CHECK_REFTAG; + if (scmd->prot_flags & SCSI_PROT_REF_CHECK) + eedp_flags |= MPI2_SCSIIO_EEDPFLAGS_CHECK_REFTAG; + + if (scmd->prot_flags & SCSI_PROT_REF_INCREMENT) { + eedp_flags |= MPI2_SCSIIO_EEDPFLAGS_INC_PRI_REFTAG; + mpi_request->CDB.EEDP32.PrimaryReferenceTag = cpu_to_be32(scsi_prot_ref_tag(scmd)); } -- cgit v1.2.3 From 09d9e4d041876684d33f21d02bcdaea6586734f1 Mon Sep 17 00:00:00 2001 From: Avri Altman Date: Sat, 30 Oct 2021 09:23:01 +0300 Subject: scsi: ufs: ufshpb: Remove HPB2.0 flows The Host Performance Buffer feature allows UFS read commands to carry the physical media addresses along with the LBAs, thus allowing less internal L2P-table switches in the device. HPB1.0 allowed a single LBA, while HPB2.0 increases this capacity up to 255 blocks. Carrying more than a single record, the read operation is no longer purely of type "read" but a "hybrid" command: Writing the physical address to the device in one operation and reading back the required payload in another. The JEDEC HPB spec defines two commands for this operation: HPB-WRITE-BUFFER (0x2) to write the physical addresses to device, and HPB-READ to read the payload. With the current HPB design the UFS driver has no alternative but to divide the READ request into 2 separate commands: HPB-WRITE-BUFFER and HPB-READ. This causes a great deal of aggravation to the block layer guys who demanded that we completely revert the entire HPB driver regardless of the huge amount of corporate effort already invested in it. As a compromise, remove only the pieces that implement the 2.0 specification. This is done as a matter of urgency for the final 5.15 release. Link: https://lore.kernel.org/r/20211030062301.248-1-avri.altman@wdc.com Tested-by: Avri Altman Tested-by: Bean Huo Reviewed-by: Bart Van Assche Reviewed-by: Bean Huo Co-developed-by: James Bottomley Signed-off-by: James Bottomley Signed-off-by: Avri Altman Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 7 +- drivers/scsi/ufs/ufshpb.c | 283 +--------------------------------------------- drivers/scsi/ufs/ufshpb.h | 2 - 3 files changed, 4 insertions(+), 288 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 95be7ecdfe10..41f2ff35f82b 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -2737,12 +2737,7 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) lrbp->req_abort_skip = false; - err = ufshpb_prep(hba, lrbp); - if (err == -EAGAIN) { - lrbp->cmd = NULL; - ufshcd_release(hba); - goto out; - } + ufshpb_prep(hba, lrbp); ufshcd_comp_scsi_upiu(hba, lrbp); diff --git a/drivers/scsi/ufs/ufshpb.c b/drivers/scsi/ufs/ufshpb.c index 589af5f6b940..026a133149dc 100644 --- a/drivers/scsi/ufs/ufshpb.c +++ b/drivers/scsi/ufs/ufshpb.c @@ -84,16 +84,6 @@ static bool ufshpb_is_supported_chunk(struct ufshpb_lu *hpb, int transfer_len) return transfer_len <= hpb->pre_req_max_tr_len; } -/* - * In this driver, WRITE_BUFFER CMD support 36KB (len=9) ~ 1MB (len=256) as - * default. It is possible to change range of transfer_len through sysfs. - */ -static inline bool ufshpb_is_required_wb(struct ufshpb_lu *hpb, int len) -{ - return len > hpb->pre_req_min_tr_len && - len <= hpb->pre_req_max_tr_len; -} - static bool ufshpb_is_general_lun(int lun) { return lun < UFS_UPIU_MAX_UNIT_NUM_ID; @@ -334,7 +324,7 @@ ufshpb_get_pos_from_lpn(struct ufshpb_lu *hpb, unsigned long lpn, int *rgn_idx, static void ufshpb_set_hpb_read_to_upiu(struct ufs_hba *hba, struct ufshcd_lrb *lrbp, - __be64 ppn, u8 transfer_len, int read_id) + __be64 ppn, u8 transfer_len) { unsigned char *cdb = lrbp->cmd->cmnd; __be64 ppn_tmp = ppn; @@ -346,256 +336,11 @@ ufshpb_set_hpb_read_to_upiu(struct ufs_hba *hba, struct ufshcd_lrb *lrbp, /* ppn value is stored as big-endian in the host memory */ memcpy(&cdb[6], &ppn_tmp, sizeof(__be64)); cdb[14] = transfer_len; - cdb[15] = read_id; + cdb[15] = 0; lrbp->cmd->cmd_len = UFS_CDB_SIZE; } -static inline void ufshpb_set_write_buf_cmd(unsigned char *cdb, - unsigned long lpn, unsigned int len, - int read_id) -{ - cdb[0] = UFSHPB_WRITE_BUFFER; - cdb[1] = UFSHPB_WRITE_BUFFER_PREFETCH_ID; - - put_unaligned_be32(lpn, &cdb[2]); - cdb[6] = read_id; - put_unaligned_be16(len * HPB_ENTRY_SIZE, &cdb[7]); - - cdb[9] = 0x00; /* Control = 0x00 */ -} - -static struct ufshpb_req *ufshpb_get_pre_req(struct ufshpb_lu *hpb) -{ - struct ufshpb_req *pre_req; - - if (hpb->num_inflight_pre_req >= hpb->throttle_pre_req) { - dev_info(&hpb->sdev_ufs_lu->sdev_dev, - "pre_req throttle. inflight %d throttle %d", - hpb->num_inflight_pre_req, hpb->throttle_pre_req); - return NULL; - } - - pre_req = list_first_entry_or_null(&hpb->lh_pre_req_free, - struct ufshpb_req, list_req); - if (!pre_req) { - dev_info(&hpb->sdev_ufs_lu->sdev_dev, "There is no pre_req"); - return NULL; - } - - list_del_init(&pre_req->list_req); - hpb->num_inflight_pre_req++; - - return pre_req; -} - -static inline void ufshpb_put_pre_req(struct ufshpb_lu *hpb, - struct ufshpb_req *pre_req) -{ - pre_req->req = NULL; - bio_reset(pre_req->bio); - list_add_tail(&pre_req->list_req, &hpb->lh_pre_req_free); - hpb->num_inflight_pre_req--; -} - -static void ufshpb_pre_req_compl_fn(struct request *req, blk_status_t error) -{ - struct ufshpb_req *pre_req = (struct ufshpb_req *)req->end_io_data; - struct ufshpb_lu *hpb = pre_req->hpb; - unsigned long flags; - - if (error) { - struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); - struct scsi_sense_hdr sshdr; - - dev_err(&hpb->sdev_ufs_lu->sdev_dev, "block status %d", error); - scsi_command_normalize_sense(cmd, &sshdr); - dev_err(&hpb->sdev_ufs_lu->sdev_dev, - "code %x sense_key %x asc %x ascq %x", - sshdr.response_code, - sshdr.sense_key, sshdr.asc, sshdr.ascq); - dev_err(&hpb->sdev_ufs_lu->sdev_dev, - "byte4 %x byte5 %x byte6 %x additional_len %x", - sshdr.byte4, sshdr.byte5, - sshdr.byte6, sshdr.additional_length); - } - - blk_mq_free_request(req); - spin_lock_irqsave(&hpb->rgn_state_lock, flags); - ufshpb_put_pre_req(pre_req->hpb, pre_req); - spin_unlock_irqrestore(&hpb->rgn_state_lock, flags); -} - -static int ufshpb_prep_entry(struct ufshpb_req *pre_req, struct page *page) -{ - struct ufshpb_lu *hpb = pre_req->hpb; - struct ufshpb_region *rgn; - struct ufshpb_subregion *srgn; - __be64 *addr; - int offset = 0; - int copied; - unsigned long lpn = pre_req->wb.lpn; - int rgn_idx, srgn_idx, srgn_offset; - unsigned long flags; - - addr = page_address(page); - ufshpb_get_pos_from_lpn(hpb, lpn, &rgn_idx, &srgn_idx, &srgn_offset); - - spin_lock_irqsave(&hpb->rgn_state_lock, flags); - -next_offset: - rgn = hpb->rgn_tbl + rgn_idx; - srgn = rgn->srgn_tbl + srgn_idx; - - if (!ufshpb_is_valid_srgn(rgn, srgn)) - goto mctx_error; - - if (!srgn->mctx) - goto mctx_error; - - copied = ufshpb_fill_ppn_from_page(hpb, srgn->mctx, srgn_offset, - pre_req->wb.len - offset, - &addr[offset]); - - if (copied < 0) - goto mctx_error; - - offset += copied; - srgn_offset += copied; - - if (srgn_offset == hpb->entries_per_srgn) { - srgn_offset = 0; - - if (++srgn_idx == hpb->srgns_per_rgn) { - srgn_idx = 0; - rgn_idx++; - } - } - - if (offset < pre_req->wb.len) - goto next_offset; - - spin_unlock_irqrestore(&hpb->rgn_state_lock, flags); - return 0; -mctx_error: - spin_unlock_irqrestore(&hpb->rgn_state_lock, flags); - return -ENOMEM; -} - -static int ufshpb_pre_req_add_bio_page(struct ufshpb_lu *hpb, - struct request_queue *q, - struct ufshpb_req *pre_req) -{ - struct page *page = pre_req->wb.m_page; - struct bio *bio = pre_req->bio; - int entries_bytes, ret; - - if (!page) - return -ENOMEM; - - if (ufshpb_prep_entry(pre_req, page)) - return -ENOMEM; - - entries_bytes = pre_req->wb.len * sizeof(__be64); - - ret = bio_add_pc_page(q, bio, page, entries_bytes, 0); - if (ret != entries_bytes) { - dev_err(&hpb->sdev_ufs_lu->sdev_dev, - "bio_add_pc_page fail: %d", ret); - return -ENOMEM; - } - return 0; -} - -static inline int ufshpb_get_read_id(struct ufshpb_lu *hpb) -{ - if (++hpb->cur_read_id >= MAX_HPB_READ_ID) - hpb->cur_read_id = 1; - return hpb->cur_read_id; -} - -static int ufshpb_execute_pre_req(struct ufshpb_lu *hpb, struct scsi_cmnd *cmd, - struct ufshpb_req *pre_req, int read_id) -{ - struct scsi_device *sdev = cmd->device; - struct request_queue *q = sdev->request_queue; - struct request *req; - struct scsi_request *rq; - struct bio *bio = pre_req->bio; - - pre_req->hpb = hpb; - pre_req->wb.lpn = sectors_to_logical(cmd->device, - blk_rq_pos(scsi_cmd_to_rq(cmd))); - pre_req->wb.len = sectors_to_logical(cmd->device, - blk_rq_sectors(scsi_cmd_to_rq(cmd))); - if (ufshpb_pre_req_add_bio_page(hpb, q, pre_req)) - return -ENOMEM; - - req = pre_req->req; - - /* 1. request setup */ - blk_rq_append_bio(req, bio); - req->rq_disk = NULL; - req->end_io_data = (void *)pre_req; - req->end_io = ufshpb_pre_req_compl_fn; - - /* 2. scsi_request setup */ - rq = scsi_req(req); - rq->retries = 1; - - ufshpb_set_write_buf_cmd(rq->cmd, pre_req->wb.lpn, pre_req->wb.len, - read_id); - rq->cmd_len = scsi_command_size(rq->cmd); - - if (blk_insert_cloned_request(q, req) != BLK_STS_OK) - return -EAGAIN; - - hpb->stats.pre_req_cnt++; - - return 0; -} - -static int ufshpb_issue_pre_req(struct ufshpb_lu *hpb, struct scsi_cmnd *cmd, - int *read_id) -{ - struct ufshpb_req *pre_req; - struct request *req = NULL; - unsigned long flags; - int _read_id; - int ret = 0; - - req = blk_get_request(cmd->device->request_queue, - REQ_OP_DRV_OUT | REQ_SYNC, BLK_MQ_REQ_NOWAIT); - if (IS_ERR(req)) - return -EAGAIN; - - spin_lock_irqsave(&hpb->rgn_state_lock, flags); - pre_req = ufshpb_get_pre_req(hpb); - if (!pre_req) { - ret = -EAGAIN; - goto unlock_out; - } - _read_id = ufshpb_get_read_id(hpb); - spin_unlock_irqrestore(&hpb->rgn_state_lock, flags); - - pre_req->req = req; - - ret = ufshpb_execute_pre_req(hpb, cmd, pre_req, _read_id); - if (ret) - goto free_pre_req; - - *read_id = _read_id; - - return ret; -free_pre_req: - spin_lock_irqsave(&hpb->rgn_state_lock, flags); - ufshpb_put_pre_req(hpb, pre_req); -unlock_out: - spin_unlock_irqrestore(&hpb->rgn_state_lock, flags); - blk_put_request(req); - return ret; -} - /* * This function will set up HPB read command using host-side L2P map data. */ @@ -609,7 +354,6 @@ int ufshpb_prep(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) __be64 ppn; unsigned long flags; int transfer_len, rgn_idx, srgn_idx, srgn_offset; - int read_id = 0; int err = 0; hpb = ufshpb_get_hpb_data(cmd->device); @@ -685,24 +429,8 @@ int ufshpb_prep(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) dev_err(hba->dev, "get ppn failed. err %d\n", err); return err; } - if (!ufshpb_is_legacy(hba) && - ufshpb_is_required_wb(hpb, transfer_len)) { - err = ufshpb_issue_pre_req(hpb, cmd, &read_id); - if (err) { - unsigned long timeout; - - timeout = cmd->jiffies_at_alloc + msecs_to_jiffies( - hpb->params.requeue_timeout_ms); - - if (time_before(jiffies, timeout)) - return -EAGAIN; - - hpb->stats.miss_cnt++; - return 0; - } - } - ufshpb_set_hpb_read_to_upiu(hba, lrbp, ppn, transfer_len, read_id); + ufshpb_set_hpb_read_to_upiu(hba, lrbp, ppn, transfer_len); hpb->stats.hit_cnt++; return 0; @@ -1841,16 +1569,11 @@ static void ufshpb_lu_parameter_init(struct ufs_hba *hba, u32 entries_per_rgn; u64 rgn_mem_size, tmp; - /* for pre_req */ - hpb->pre_req_min_tr_len = hpb_dev_info->max_hpb_single_cmd + 1; - if (ufshpb_is_legacy(hba)) hpb->pre_req_max_tr_len = HPB_LEGACY_CHUNK_HIGH; else hpb->pre_req_max_tr_len = HPB_MULTI_CHUNK_HIGH; - hpb->cur_read_id = 0; - hpb->lu_pinned_start = hpb_lu_info->pinned_start; hpb->lu_pinned_end = hpb_lu_info->num_pinned ? (hpb_lu_info->pinned_start + hpb_lu_info->num_pinned - 1) diff --git a/drivers/scsi/ufs/ufshpb.h b/drivers/scsi/ufs/ufshpb.h index a79e07398970..f15d8fdbce2e 100644 --- a/drivers/scsi/ufs/ufshpb.h +++ b/drivers/scsi/ufs/ufshpb.h @@ -241,8 +241,6 @@ struct ufshpb_lu { spinlock_t param_lock; struct list_head lh_pre_req_free; - int cur_read_id; - int pre_req_min_tr_len; int pre_req_max_tr_len; /* cached L2P map management worker */ -- cgit v1.2.3 From 095729484efc4aa4604c474792b059bd940addce Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 30 Sep 2021 09:28:49 +0300 Subject: perf build: Suppress 'rm dlfilter' build message The following build message: rm dlfilters/dlfilter-test-api-v0.o is unwanted. The object file is being treated as an intermediate file and being automatically removed. Mark the object file as .SECONDARY to prevent removal and hence the message. Requested-by: Arnaldo Carvalho de Melo Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20210930062849.110416-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 5cd702062a04..b856afa6eb52 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -787,6 +787,8 @@ $(OUTPUT)dlfilters/%.o: dlfilters/%.c include/perf/perf_dlfilter.h $(Q)$(MKDIR) -p $(OUTPUT)dlfilters $(QUIET_CC)$(CC) -c -Iinclude $(EXTRA_CFLAGS) -o $@ -fpic $< +.SECONDARY: $(DLFILTERS:.so=.o) + $(OUTPUT)dlfilters/%.so: $(OUTPUT)dlfilters/%.o $(QUIET_LINK)$(CC) $(EXTRA_CFLAGS) -shared -o $@ $< -- cgit v1.2.3 From 29c77550eef31b0d72a45b49eeab03b8963264e8 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Sun, 3 Oct 2021 22:32:38 -0700 Subject: perf script: Check session->header.env.arch before using it When perf.data is not written cleanly, we would like to process existing data as much as possible (please see f_header.data.size == 0 condition in perf_session__read_header). However, perf.data with partial data may crash perf. Specifically, we see crash in 'perf script' for NULL session->header.env.arch. Fix this by checking session->header.env.arch before using it to determine native_arch. Also split the if condition so it is easier to read. Committer notes: If it is a pipe, we already assume is a native arch, so no need to check session->header.env.arch. Signed-off-by: Song Liu Cc: Peter Zijlstra Cc: kernel-team@fb.com Cc: stable@vger.kernel.org Link: http://lore.kernel.org/lkml/20211004053238.514936-1-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 6211d0b84b7a..42eb6bdf3c58 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -4039,11 +4039,15 @@ script_found: goto out_delete; uname(&uts); - if (data.is_pipe || /* assume pipe_mode indicates native_arch */ - !strcmp(uts.machine, session->header.env.arch) || - (!strcmp(uts.machine, "x86_64") && - !strcmp(session->header.env.arch, "i386"))) + if (data.is_pipe) { /* Assume pipe_mode indicates native_arch */ native_arch = true; + } else if (session->header.env.arch) { + if (!strcmp(uts.machine, session->header.env.arch)) + native_arch = true; + else if (!strcmp(uts.machine, "x86_64") && + !strcmp(session->header.env.arch, "i386")) + native_arch = true; + } script.session = session; script__setup_sample_type(&script); -- cgit v1.2.3 From 89ac61ff05a5c79545aa93f7b1da7e4dbc60fc9a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 28 Sep 2021 21:52:53 +0200 Subject: perf callchain: Fix compilation on powerpc with gcc11+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Got following build fail on powerpc: CC arch/powerpc/util/skip-callchain-idx.o In function ‘check_return_reg’, inlined from ‘check_return_addr’ at arch/powerpc/util/skip-callchain-idx.c:213:7, inlined from ‘arch_skip_callchain_idx’ at arch/powerpc/util/skip-callchain-idx.c:265:7: arch/powerpc/util/skip-callchain-idx.c:54:18: error: ‘dwarf_frame_register’ accessing 96 bytes \ in a region of size 64 [-Werror=stringop-overflow=] 54 | result = dwarf_frame_register(frame, ra_regno, ops_mem, &ops, &nops); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/powerpc/util/skip-callchain-idx.c: In function ‘arch_skip_callchain_idx’: arch/powerpc/util/skip-callchain-idx.c:54:18: note: referencing argument 3 of type ‘Dwarf_Op *’ In file included from /usr/include/elfutils/libdwfl.h:32, from arch/powerpc/util/skip-callchain-idx.c:10: /usr/include/elfutils/libdw.h:1069:12: note: in a call to function ‘dwarf_frame_register’ 1069 | extern int dwarf_frame_register (Dwarf_Frame *frame, int regno, | ^~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors The dwarf_frame_register args changed with [1], Updating ops_mem accordingly. [1] https://sourceware.org/git/?p=elfutils.git;a=commit;h=5621fe5443da23112170235dd5cac161e5c75e65 Reviewed-by: Kajol Jain Signed-off-by: Jiri Olsa Acked-by: Mark Wieelard Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sukadev Bhattiprolu Link: https://lore.kernel.org/r/20210928195253.1267023-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/util/skip-callchain-idx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c index 3018a054526a..20cd6244863b 100644 --- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c +++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c @@ -45,7 +45,7 @@ static const Dwfl_Callbacks offline_callbacks = { */ static int check_return_reg(int ra_regno, Dwarf_Frame *frame) { - Dwarf_Op ops_mem[2]; + Dwarf_Op ops_mem[3]; Dwarf_Op dummy; Dwarf_Op *ops = &dummy; size_t nops; -- cgit v1.2.3 From 27730c8cd60d1574d8337276e7a9d7d2ca92e0d1 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 29 Sep 2021 08:38:13 -0700 Subject: perf script: Fix PERF_SAMPLE_WEIGHT_STRUCT support -F weight in perf script is broken. # ./perf mem record # ./perf script -F weight Samples for 'dummy:HG' event do not have WEIGHT attribute set. Cannot print 'weight' field. The sample type, PERF_SAMPLE_WEIGHT_STRUCT, is an alternative of the PERF_SAMPLE_WEIGHT sample type. They share the same space, weight. The lower 32 bits are exactly the same for both sample type. The higher 32 bits may be different for different architecture. For a new kernel on x86, the PERF_SAMPLE_WEIGHT_STRUCT is used. For an old kernel or other ARCHs, the PERF_SAMPLE_WEIGHT is used. With -F weight, current perf script will only check the input string "weight" with the PERF_SAMPLE_WEIGHT sample type. Because the commit ea8d0ed6eae3 ("perf tools: Support PERF_SAMPLE_WEIGHT_STRUCT") didn't update the PERF_SAMPLE_WEIGHT_STRUCT sample type for perf script. For a new kernel on x86, the check fails. Use PERF_SAMPLE_WEIGHT_TYPE, which supports both sample types, to replace PERF_SAMPLE_WEIGHT Fixes: ea8d0ed6eae37b01 ("perf tools: Support PERF_SAMPLE_WEIGHT_STRUCT") Reported-by: Joe Mario Reviewed-by: Kajol Jain Signed-off-by: Kan Liang Tested-by: Jiri Olsa Tested-by: Joe Mario Acked-by: Jiri Olsa Acked-by: Joe Mario Cc: Andi Kleen Link: https://lore.kernel.org/r/1632929894-102778-1-git-send-email-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 42eb6bdf3c58..c32c2eb16d7d 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -459,7 +459,7 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session) return -EINVAL; if (PRINT_FIELD(WEIGHT) && - evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT, "WEIGHT", PERF_OUTPUT_WEIGHT)) + evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT_TYPE, "WEIGHT", PERF_OUTPUT_WEIGHT)) return -EINVAL; if (PRINT_FIELD(SYM) && -- cgit v1.2.3 From 8bb7eca972ad531c9b149c0a51ab43a417385813 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 31 Oct 2021 13:53:10 -0700 Subject: Linux 5.15 --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 30c7c81d0437..ed6e7ec60eff 100644 --- a/Makefile +++ b/Makefile @@ -2,8 +2,8 @@ VERSION = 5 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc7 -NAME = Opossums on Parade +EXTRAVERSION = +NAME = Trick or Treat # *DOCUMENTATION* # To see a list of typical targets execute "make help" -- cgit v1.2.3