From 82ad759143ed77673db0d93d53c1cde7b99917ee Mon Sep 17 00:00:00 2001 From: Philipp Puschmann Date: Wed, 27 Feb 2019 16:17:33 +0100 Subject: ASoC: tlv320aic3x: fix reset gpio reference counting This patch fixes a bug that prevents freeing the reset gpio on unloading the module. aic3x_i2c_probe is called when loading the module and it calls list_add with a probably uninitialized list entry aic3x->list (next = prev = NULL)). So even if list_del is called it does nothing and in the end the gpio_reset is not freed. Then a repeated module probing fails silently because gpio_request fails. When moving INIT_LIST_HEAD to aic3x_i2c_probe we also have to move list_del to aic3x_i2c_remove because aic3x_remove may be called multiple times without aic3x_i2c_remove being called which leads to a NULL pointer dereference. Signed-off-by: Philipp Puschmann Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320aic3x.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/tlv320aic3x.c b/sound/soc/codecs/tlv320aic3x.c index 6aa0edf8c5ef..cea3ebecdb12 100644 --- a/sound/soc/codecs/tlv320aic3x.c +++ b/sound/soc/codecs/tlv320aic3x.c @@ -1609,7 +1609,6 @@ static int aic3x_probe(struct snd_soc_component *component) struct aic3x_priv *aic3x = snd_soc_component_get_drvdata(component); int ret, i; - INIT_LIST_HEAD(&aic3x->list); aic3x->component = component; for (i = 0; i < ARRAY_SIZE(aic3x->supplies); i++) { @@ -1692,7 +1691,6 @@ static void aic3x_remove(struct snd_soc_component *component) struct aic3x_priv *aic3x = snd_soc_component_get_drvdata(component); int i; - list_del(&aic3x->list); for (i = 0; i < ARRAY_SIZE(aic3x->supplies); i++) regulator_unregister_notifier(aic3x->supplies[i].consumer, &aic3x->disable_nb[i].nb); @@ -1890,6 +1888,7 @@ static int aic3x_i2c_probe(struct i2c_client *i2c, if (ret != 0) goto err_gpio; + INIT_LIST_HEAD(&aic3x->list); list_add(&aic3x->list, &reset_list); return 0; @@ -1906,6 +1905,8 @@ static int aic3x_i2c_remove(struct i2c_client *client) { struct aic3x_priv *aic3x = i2c_get_clientdata(client); + list_del(&aic3x->list); + if (gpio_is_valid(aic3x->gpio_reset) && !aic3x_is_shared_reset(aic3x)) { gpio_set_value(aic3x->gpio_reset, 0); -- cgit v1.2.3 From f060f46f09bb920d1e0d436d654996033b856e26 Mon Sep 17 00:00:00 2001 From: KaiChieh Chuang Date: Wed, 27 Feb 2019 09:30:44 +0800 Subject: ASoC: mediatek: btcvsd add loopback add direct loopback path from rx to tx Signed-off-by: KaiChieh Chuang Signed-off-by: Mark Brown --- sound/soc/mediatek/common/mtk-btcvsd.c | 69 +++++++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/sound/soc/mediatek/common/mtk-btcvsd.c b/sound/soc/mediatek/common/mtk-btcvsd.c index 1b8bcdaf02d1..9a163d7064d1 100644 --- a/sound/soc/mediatek/common/mtk-btcvsd.c +++ b/sound/soc/mediatek/common/mtk-btcvsd.c @@ -49,6 +49,7 @@ enum bt_sco_state { BT_SCO_STATE_IDLE, BT_SCO_STATE_RUNNING, BT_SCO_STATE_ENDING, + BT_SCO_STATE_LOOPBACK, }; enum bt_sco_direct { @@ -486,7 +487,8 @@ static irqreturn_t mtk_btcvsd_snd_irq_handler(int irq_id, void *dev) if (bt->rx->state != BT_SCO_STATE_RUNNING && bt->rx->state != BT_SCO_STATE_ENDING && bt->tx->state != BT_SCO_STATE_RUNNING && - bt->tx->state != BT_SCO_STATE_ENDING) { + bt->tx->state != BT_SCO_STATE_ENDING && + bt->tx->state != BT_SCO_STATE_LOOPBACK) { dev_warn(bt->dev, "%s(), in idle state: rx->state: %d, tx->state: %d\n", __func__, bt->rx->state, bt->tx->state); goto irq_handler_exit; @@ -512,6 +514,42 @@ static irqreturn_t mtk_btcvsd_snd_irq_handler(int irq_id, void *dev) buf_cnt_tx = btsco_packet_info[packet_type][2]; buf_cnt_rx = btsco_packet_info[packet_type][3]; + if (bt->tx->state == BT_SCO_STATE_LOOPBACK) { + u8 *src, *dst; + unsigned long connsys_addr_rx, ap_addr_rx; + unsigned long connsys_addr_tx, ap_addr_tx; + + connsys_addr_rx = *bt->bt_reg_pkt_r; + ap_addr_rx = (unsigned long)bt->bt_sram_bank2_base + + (connsys_addr_rx & 0xFFFF); + + connsys_addr_tx = *bt->bt_reg_pkt_w; + ap_addr_tx = (unsigned long)bt->bt_sram_bank2_base + + (connsys_addr_tx & 0xFFFF); + + if (connsys_addr_tx == 0xdeadfeed || + connsys_addr_rx == 0xdeadfeed) { + /* bt return 0xdeadfeed if read reg during bt sleep */ + dev_warn(bt->dev, "%s(), connsys_addr_tx == 0xdeadfeed\n", + __func__); + goto irq_handler_exit; + } + + src = (u8 *)ap_addr_rx; + dst = (u8 *)ap_addr_tx; + + mtk_btcvsd_snd_data_transfer(BT_SCO_DIRECT_BT2ARM, src, + bt->tx->temp_packet_buf, + packet_length, + packet_num); + mtk_btcvsd_snd_data_transfer(BT_SCO_DIRECT_ARM2BT, + bt->tx->temp_packet_buf, dst, + packet_length, + packet_num); + bt->rx->rw_cnt++; + bt->tx->rw_cnt++; + } + if (bt->rx->state == BT_SCO_STATE_RUNNING || bt->rx->state == BT_SCO_STATE_ENDING) { if (bt->rx->xrun) { @@ -1067,6 +1105,33 @@ static int btcvsd_band_set(struct snd_kcontrol *kcontrol, return 0; } +static int btcvsd_loopback_get(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol); + struct mtk_btcvsd_snd *bt = snd_soc_component_get_drvdata(cmpnt); + bool lpbk_en = bt->tx->state == BT_SCO_STATE_LOOPBACK; + + ucontrol->value.integer.value[0] = lpbk_en; + return 0; +} + +static int btcvsd_loopback_set(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol); + struct mtk_btcvsd_snd *bt = snd_soc_component_get_drvdata(cmpnt); + + if (ucontrol->value.integer.value[0]) { + mtk_btcvsd_snd_set_state(bt, bt->tx, BT_SCO_STATE_LOOPBACK); + mtk_btcvsd_snd_set_state(bt, bt->rx, BT_SCO_STATE_LOOPBACK); + } else { + mtk_btcvsd_snd_set_state(bt, bt->tx, BT_SCO_STATE_RUNNING); + mtk_btcvsd_snd_set_state(bt, bt->rx, BT_SCO_STATE_RUNNING); + } + return 0; +} + static int btcvsd_tx_mute_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { @@ -1202,6 +1267,8 @@ static int btcvsd_tx_timestamp_get(struct snd_kcontrol *kcontrol, static const struct snd_kcontrol_new mtk_btcvsd_snd_controls[] = { SOC_ENUM_EXT("BTCVSD Band", btcvsd_enum[0], btcvsd_band_get, btcvsd_band_set), + SOC_SINGLE_BOOL_EXT("BTCVSD Loopback Switch", 0, + btcvsd_loopback_get, btcvsd_loopback_set), SOC_SINGLE_BOOL_EXT("BTCVSD Tx Mute Switch", 0, btcvsd_tx_mute_get, btcvsd_tx_mute_set), SOC_SINGLE_BOOL_EXT("BTCVSD Tx Irq Received Switch", 0, -- cgit v1.2.3 From b805d78d300bcf2c83d6df7da0c818b0fee41427 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 28 Feb 2019 15:18:59 +0800 Subject: xfrm: policy: Fix out-of-bound array accesses in __xfrm_policy_unlink UBSAN report this: UBSAN: Undefined behaviour in net/xfrm/xfrm_policy.c:1289:24 index 6 is out of range for type 'unsigned int [6]' CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.4.162-514.55.6.9.x86_64+ #13 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 0000000000000000 1466cf39b41b23c9 ffff8801f6b07a58 ffffffff81cb35f4 0000000041b58ab3 ffffffff83230f9c ffffffff81cb34e0 ffff8801f6b07a80 ffff8801f6b07a20 1466cf39b41b23c9 ffffffff851706e0 ffff8801f6b07ae8 Call Trace: [] __dump_stack lib/dump_stack.c:15 [inline] [] dump_stack+0x114/0x1a0 lib/dump_stack.c:51 [] ubsan_epilogue+0x12/0x8f lib/ubsan.c:164 [] __ubsan_handle_out_of_bounds+0x16e/0x1b2 lib/ubsan.c:382 [] __xfrm_policy_unlink+0x3dd/0x5b0 net/xfrm/xfrm_policy.c:1289 [] xfrm_policy_delete+0x52/0xb0 net/xfrm/xfrm_policy.c:1309 [] xfrm_policy_timer+0x30b/0x590 net/xfrm/xfrm_policy.c:243 [] call_timer_fn+0x237/0x990 kernel/time/timer.c:1144 [] __run_timers kernel/time/timer.c:1218 [inline] [] run_timer_softirq+0x6ce/0xb80 kernel/time/timer.c:1401 [] __do_softirq+0x299/0xe10 kernel/softirq.c:273 [] invoke_softirq kernel/softirq.c:350 [inline] [] irq_exit+0x216/0x2c0 kernel/softirq.c:391 [] exiting_irq arch/x86/include/asm/apic.h:652 [inline] [] smp_apic_timer_interrupt+0x8b/0xc0 arch/x86/kernel/apic/apic.c:926 [] apic_timer_interrupt+0xa5/0xb0 arch/x86/entry/entry_64.S:735 [] ? native_safe_halt+0x6/0x10 arch/x86/include/asm/irqflags.h:52 [] arch_safe_halt arch/x86/include/asm/paravirt.h:111 [inline] [] default_idle+0x27/0x430 arch/x86/kernel/process.c:446 [] arch_cpu_idle+0x15/0x20 arch/x86/kernel/process.c:437 [] default_idle_call+0x53/0x90 kernel/sched/idle.c:92 [] cpuidle_idle_call kernel/sched/idle.c:156 [inline] [] cpu_idle_loop kernel/sched/idle.c:251 [inline] [] cpu_startup_entry+0x60d/0x9a0 kernel/sched/idle.c:299 [] start_secondary+0x3c9/0x560 arch/x86/kernel/smpboot.c:245 The issue is triggered as this: xfrm_add_policy -->verify_newpolicy_info //check the index provided by user with XFRM_POLICY_MAX //In my case, the index is 0x6E6BB6, so it pass the check. -->xfrm_policy_construct //copy the user's policy and set xfrm_policy_timer -->xfrm_policy_insert --> __xfrm_policy_link //use the orgin dir, in my case is 2 --> xfrm_gen_index //generate policy index, there is 0x6E6BB6 then xfrm_policy_timer be fired xfrm_policy_timer --> xfrm_policy_id2dir //get dir from (policy index & 7), in my case is 6 --> xfrm_policy_delete --> __xfrm_policy_unlink //access policy_count[dir], trigger out of range access Add xfrm_policy_id2dir check in verify_newpolicy_info, make sure the computed dir is valid, to fix the issue. Reported-by: Hulk Robot Fixes: e682adf021be ("xfrm: Try to honor policy index if it's supplied by user") Signed-off-by: YueHaibing Acked-by: Herbert Xu Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index a131f9ff979e..8d4d52fd457b 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1424,7 +1424,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) ret = verify_policy_dir(p->dir); if (ret) return ret; - if (p->index && ((p->index & XFRM_POLICY_MAX) != p->dir)) + if (p->index && (xfrm_policy_id2dir(p->index) != p->dir)) return -EINVAL; return 0; -- cgit v1.2.3 From 2e95f984aae4cf0608d0ba2189c756f2bd50b44a Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 28 Feb 2019 15:30:34 +0000 Subject: ASoC: hdmi-codec: fix S/PDIF DAI When using the S/PDIF DAI, there is no requirement to call snd_soc_dai_set_fmt() as there is no DAI format definition that defines S/PDIF. In any case, S/PDIF does not have separate clocks, this is embedded into the data stream. Consequently, when attempting to use TDA998x in S/PDIF mode, the attempt to configure TDA998x via the hw_params callback fails as the hdmi_codec_daifmt is left initialised to zero. Since the S/PDIF DAI will only be used by S/PDIF, prepare the hdmi_codec_daifmt structure for this format. Signed-off-by: Russell King Reviewed-by: Jyri Sarha Signed-off-by: Mark Brown --- sound/soc/codecs/hdmi-codec.c | 118 +++++++++++++++++++++--------------------- 1 file changed, 59 insertions(+), 59 deletions(-) diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c index e5b6769b9797..d5f73c837281 100644 --- a/sound/soc/codecs/hdmi-codec.c +++ b/sound/soc/codecs/hdmi-codec.c @@ -529,73 +529,71 @@ static int hdmi_codec_set_fmt(struct snd_soc_dai *dai, { struct hdmi_codec_priv *hcp = snd_soc_dai_get_drvdata(dai); struct hdmi_codec_daifmt cf = { 0 }; - int ret = 0; dev_dbg(dai->dev, "%s()\n", __func__); - if (dai->id == DAI_ID_SPDIF) { - cf.fmt = HDMI_SPDIF; - } else { - switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { - case SND_SOC_DAIFMT_CBM_CFM: - cf.bit_clk_master = 1; - cf.frame_clk_master = 1; - break; - case SND_SOC_DAIFMT_CBS_CFM: - cf.frame_clk_master = 1; - break; - case SND_SOC_DAIFMT_CBM_CFS: - cf.bit_clk_master = 1; - break; - case SND_SOC_DAIFMT_CBS_CFS: - break; - default: - return -EINVAL; - } + if (dai->id == DAI_ID_SPDIF) + return 0; + + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: + cf.bit_clk_master = 1; + cf.frame_clk_master = 1; + break; + case SND_SOC_DAIFMT_CBS_CFM: + cf.frame_clk_master = 1; + break; + case SND_SOC_DAIFMT_CBM_CFS: + cf.bit_clk_master = 1; + break; + case SND_SOC_DAIFMT_CBS_CFS: + break; + default: + return -EINVAL; + } - switch (fmt & SND_SOC_DAIFMT_INV_MASK) { - case SND_SOC_DAIFMT_NB_NF: - break; - case SND_SOC_DAIFMT_NB_IF: - cf.frame_clk_inv = 1; - break; - case SND_SOC_DAIFMT_IB_NF: - cf.bit_clk_inv = 1; - break; - case SND_SOC_DAIFMT_IB_IF: - cf.frame_clk_inv = 1; - cf.bit_clk_inv = 1; - break; - } + switch (fmt & SND_SOC_DAIFMT_INV_MASK) { + case SND_SOC_DAIFMT_NB_NF: + break; + case SND_SOC_DAIFMT_NB_IF: + cf.frame_clk_inv = 1; + break; + case SND_SOC_DAIFMT_IB_NF: + cf.bit_clk_inv = 1; + break; + case SND_SOC_DAIFMT_IB_IF: + cf.frame_clk_inv = 1; + cf.bit_clk_inv = 1; + break; + } - switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { - case SND_SOC_DAIFMT_I2S: - cf.fmt = HDMI_I2S; - break; - case SND_SOC_DAIFMT_DSP_A: - cf.fmt = HDMI_DSP_A; - break; - case SND_SOC_DAIFMT_DSP_B: - cf.fmt = HDMI_DSP_B; - break; - case SND_SOC_DAIFMT_RIGHT_J: - cf.fmt = HDMI_RIGHT_J; - break; - case SND_SOC_DAIFMT_LEFT_J: - cf.fmt = HDMI_LEFT_J; - break; - case SND_SOC_DAIFMT_AC97: - cf.fmt = HDMI_AC97; - break; - default: - dev_err(dai->dev, "Invalid DAI interface format\n"); - return -EINVAL; - } + switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { + case SND_SOC_DAIFMT_I2S: + cf.fmt = HDMI_I2S; + break; + case SND_SOC_DAIFMT_DSP_A: + cf.fmt = HDMI_DSP_A; + break; + case SND_SOC_DAIFMT_DSP_B: + cf.fmt = HDMI_DSP_B; + break; + case SND_SOC_DAIFMT_RIGHT_J: + cf.fmt = HDMI_RIGHT_J; + break; + case SND_SOC_DAIFMT_LEFT_J: + cf.fmt = HDMI_LEFT_J; + break; + case SND_SOC_DAIFMT_AC97: + cf.fmt = HDMI_AC97; + break; + default: + dev_err(dai->dev, "Invalid DAI interface format\n"); + return -EINVAL; } hcp->daifmt[dai->id] = cf; - return ret; + return 0; } static int hdmi_codec_digital_mute(struct snd_soc_dai *dai, int mute) @@ -792,8 +790,10 @@ static int hdmi_codec_probe(struct platform_device *pdev) i++; } - if (hcd->spdif) + if (hcd->spdif) { hcp->daidrv[i] = hdmi_spdif_dai; + hcp->daifmt[DAI_ID_SPDIF].fmt = HDMI_SPDIF; + } dev_set_drvdata(dev, hcp); -- cgit v1.2.3 From 102cefc8e879b707be0024fdc7bce1deeb359a5f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 1 Mar 2019 14:43:10 -0600 Subject: ASoC: ab8500: Mark expected switch fall-through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. This patch fixes the following warning: In file included from sound/soc/codecs/ab8500-codec.c:24: sound/soc/codecs/ab8500-codec.c: In function ‘ab8500_codec_set_dai_fmt’: ./include/linux/device.h:1485:2: warning: this statement may fall through [-Wimplicit-fallthrough=] _dev_err(dev, dev_fmt(fmt), ##__VA_ARGS__) ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sound/soc/codecs/ab8500-codec.c:2129:3: note: in expansion of macro ‘dev_err’ dev_err(dai->component->dev, ^~~~~~~ sound/soc/codecs/ab8500-codec.c:2132:2: note: here default: ^~~~~~~ Warning level 3 was used: -Wimplicit-fallthrough=3 This patch is part of the ongoing efforts to enable -Wimplicit-fallthrough. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Mark Brown --- sound/soc/codecs/ab8500-codec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/ab8500-codec.c b/sound/soc/codecs/ab8500-codec.c index 03bbbcd3b6c1..87616b126018 100644 --- a/sound/soc/codecs/ab8500-codec.c +++ b/sound/soc/codecs/ab8500-codec.c @@ -2129,6 +2129,7 @@ static int ab8500_codec_set_dai_fmt(struct snd_soc_dai *dai, unsigned int fmt) dev_err(dai->component->dev, "%s: ERROR: The device is either a master or a slave.\n", __func__); + /* fall through */ default: dev_err(dai->component->dev, "%s: ERROR: Unsupporter master mask 0x%x\n", -- cgit v1.2.3 From 5f8a1000c3e630c3ac06f1d664eeaa755bce8823 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Thu, 28 Feb 2019 14:19:21 +0100 Subject: ASoC: stm32: sai: fix iec958 controls indexation Allow indexation of sai iec958 controls according to device id. Signed-off-by: Olivier Moysan Signed-off-by: Mark Brown --- sound/soc/stm/stm32_sai_sub.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c index f9297228c41c..506360b7bc01 100644 --- a/sound/soc/stm/stm32_sai_sub.c +++ b/sound/soc/stm/stm32_sai_sub.c @@ -100,7 +100,7 @@ * @slot_mask: rx or tx active slots mask. set at init or at runtime * @data_size: PCM data width. corresponds to PCM substream width. * @spdif_frm_cnt: S/PDIF playback frame counter - * @snd_aes_iec958: iec958 data + * @iec958: iec958 data * @ctrl_lock: control lock */ struct stm32_sai_sub_data { @@ -1068,11 +1068,12 @@ static int stm32_sai_pcm_new(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *cpu_dai) { struct stm32_sai_sub_data *sai = dev_get_drvdata(cpu_dai->dev); + struct snd_kcontrol_new knew = iec958_ctls; if (STM_SAI_PROTOCOL_IS_SPDIF(sai)) { dev_dbg(&sai->pdev->dev, "%s: register iec controls", __func__); - return snd_ctl_add(rtd->pcm->card, - snd_ctl_new1(&iec958_ctls, sai)); + knew.device = rtd->pcm->device; + return snd_ctl_add(rtd->pcm->card, snd_ctl_new1(&knew, sai)); } return 0; -- cgit v1.2.3 From b8468192971807c43a80d6e2c41f83141cb7b211 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Thu, 28 Feb 2019 14:19:22 +0100 Subject: ASoC: stm32: sai: fix exposed capabilities in spdif mode Change capabilities exposed in SAI S/PDIF mode, to match actually supported formats. In S/PDIF mode only 32 bits stereo is supported. Signed-off-by: Olivier Moysan Signed-off-by: Mark Brown --- sound/soc/stm/stm32_sai_sub.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c index 506360b7bc01..e418f446e03b 100644 --- a/sound/soc/stm/stm32_sai_sub.c +++ b/sound/soc/stm/stm32_sai_sub.c @@ -682,6 +682,14 @@ static int stm32_sai_startup(struct snd_pcm_substream *substream, sai->substream = substream; + if (STM_SAI_PROTOCOL_IS_SPDIF(sai)) { + snd_pcm_hw_constraint_mask64(substream->runtime, + SNDRV_PCM_HW_PARAM_FORMAT, + SNDRV_PCM_FMTBIT_S32_LE); + snd_pcm_hw_constraint_single(substream->runtime, + SNDRV_PCM_HW_PARAM_CHANNELS, 2); + } + ret = clk_prepare_enable(sai->sai_ck); if (ret < 0) { dev_err(cpu_dai->dev, "Failed to enable clock: %d\n", ret); -- cgit v1.2.3 From 26f98e82dd49b7c3cc5ef0edd882aa732a62b672 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Thu, 28 Feb 2019 14:19:23 +0100 Subject: ASoC: stm32: sai: fix race condition in irq handler When snd_pcm_stop_xrun() is called in interrupt routine, substream context may have already been released. Add protection on substream context. Signed-off-by: Olivier Moysan Signed-off-by: Mark Brown --- sound/soc/stm/stm32_sai_sub.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c index e418f446e03b..cad415e03b5e 100644 --- a/sound/soc/stm/stm32_sai_sub.c +++ b/sound/soc/stm/stm32_sai_sub.c @@ -102,6 +102,7 @@ * @spdif_frm_cnt: S/PDIF playback frame counter * @iec958: iec958 data * @ctrl_lock: control lock + * @irq_lock: prevent race condition with IRQ */ struct stm32_sai_sub_data { struct platform_device *pdev; @@ -133,6 +134,7 @@ struct stm32_sai_sub_data { unsigned int spdif_frm_cnt; struct snd_aes_iec958 iec958; struct mutex ctrl_lock; /* protect resources accessed by controls */ + spinlock_t irq_lock; /* used to prevent race condition with IRQ */ }; enum stm32_sai_fifo_th { @@ -474,8 +476,10 @@ static irqreturn_t stm32_sai_isr(int irq, void *devid) status = SNDRV_PCM_STATE_XRUN; } - if (status != SNDRV_PCM_STATE_RUNNING) + spin_lock(&sai->irq_lock); + if (status != SNDRV_PCM_STATE_RUNNING && sai->substream) snd_pcm_stop_xrun(sai->substream); + spin_unlock(&sai->irq_lock); return IRQ_HANDLED; } @@ -679,8 +683,11 @@ static int stm32_sai_startup(struct snd_pcm_substream *substream, { struct stm32_sai_sub_data *sai = snd_soc_dai_get_drvdata(cpu_dai); int imr, cr2, ret; + unsigned long flags; + spin_lock_irqsave(&sai->irq_lock, flags); sai->substream = substream; + spin_unlock_irqrestore(&sai->irq_lock, flags); if (STM_SAI_PROTOCOL_IS_SPDIF(sai)) { snd_pcm_hw_constraint_mask64(substream->runtime, @@ -1059,6 +1066,7 @@ static void stm32_sai_shutdown(struct snd_pcm_substream *substream, struct snd_soc_dai *cpu_dai) { struct stm32_sai_sub_data *sai = snd_soc_dai_get_drvdata(cpu_dai); + unsigned long flags; regmap_update_bits(sai->regmap, STM_SAI_IMR_REGX, SAI_XIMR_MASK, 0); @@ -1069,7 +1077,9 @@ static void stm32_sai_shutdown(struct snd_pcm_substream *substream, clk_rate_exclusive_put(sai->sai_mclk); + spin_lock_irqsave(&sai->irq_lock, flags); sai->substream = NULL; + spin_unlock_irqrestore(&sai->irq_lock, flags); } static int stm32_sai_pcm_new(struct snd_soc_pcm_runtime *rtd, @@ -1433,6 +1443,7 @@ static int stm32_sai_sub_probe(struct platform_device *pdev) sai->pdev = pdev; mutex_init(&sai->ctrl_lock); + spin_lock_init(&sai->irq_lock); platform_set_drvdata(pdev, sai); sai->pdata = dev_get_drvdata(pdev->dev.parent); -- cgit v1.2.3 From 71d9537fada47762a1a1b33a8a1f95a92d7edc11 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Thu, 28 Feb 2019 14:19:24 +0100 Subject: ASoC: stm32: sai: fix oversampling mode Set OSR bit if mclk/fs ratio is 512. Signed-off-by: Olivier Moysan Signed-off-by: Mark Brown --- sound/soc/stm/stm32_sai_sub.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c index cad415e03b5e..cb658463ccd1 100644 --- a/sound/soc/stm/stm32_sai_sub.c +++ b/sound/soc/stm/stm32_sai_sub.c @@ -913,7 +913,7 @@ static int stm32_sai_configure_clock(struct snd_soc_dai *cpu_dai, struct snd_pcm_hw_params *params) { struct stm32_sai_sub_data *sai = snd_soc_dai_get_drvdata(cpu_dai); - int div = 0; + int div = 0, cr1 = 0; int sai_clk_rate, mclk_ratio, den; unsigned int rate = params_rate(params); @@ -958,13 +958,19 @@ static int stm32_sai_configure_clock(struct snd_soc_dai *cpu_dai, } else { if (sai->mclk_rate) { mclk_ratio = sai->mclk_rate / rate; - if ((mclk_ratio != 512) && - (mclk_ratio != 256)) { + if (mclk_ratio == 512) { + cr1 = SAI_XCR1_OSR; + } else if (mclk_ratio != 256) { dev_err(cpu_dai->dev, "Wrong mclk ratio %d\n", mclk_ratio); return -EINVAL; } + + regmap_update_bits(sai->regmap, + STM_SAI_CR1_REGX, + SAI_XCR1_OSR, cr1); + div = stm32_sai_get_clk_div(sai, sai_clk_rate, sai->mclk_rate); if (div < 0) -- cgit v1.2.3 From d4180b4c02e7b04b8479f6237b2bd98b4c5fd19c Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Thu, 28 Feb 2019 14:19:25 +0100 Subject: ASoC: stm32: sai: fix set_sync service Add error check on set_sync function return. Add of_node_put() as of_get_parent() takes a reference which has to be released. Signed-off-by: Olivier Moysan Signed-off-by: Mark Brown --- sound/soc/stm/stm32_sai.c | 8 +++++--- sound/soc/stm/stm32_sai_sub.c | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/sound/soc/stm/stm32_sai.c b/sound/soc/stm/stm32_sai.c index 14c9591aae42..d68d62f12df5 100644 --- a/sound/soc/stm/stm32_sai.c +++ b/sound/soc/stm/stm32_sai.c @@ -105,6 +105,7 @@ static int stm32_sai_set_sync(struct stm32_sai_data *sai_client, if (!pdev) { dev_err(&sai_client->pdev->dev, "Device not found for node %pOFn\n", np_provider); + of_node_put(np_provider); return -ENODEV; } @@ -113,19 +114,20 @@ static int stm32_sai_set_sync(struct stm32_sai_data *sai_client, dev_err(&sai_client->pdev->dev, "SAI sync provider data not found\n"); ret = -EINVAL; - goto out_put_dev; + goto error; } /* Configure sync client */ ret = stm32_sai_sync_conf_client(sai_client, synci); if (ret < 0) - goto out_put_dev; + goto error; /* Configure sync provider */ ret = stm32_sai_sync_conf_provider(sai_provider, synco); -out_put_dev: +error: put_device(&pdev->dev); + of_node_put(np_provider); return ret; } diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c index cb658463ccd1..55d802f51c15 100644 --- a/sound/soc/stm/stm32_sai_sub.c +++ b/sound/soc/stm/stm32_sai_sub.c @@ -1106,7 +1106,7 @@ static int stm32_sai_pcm_new(struct snd_soc_pcm_runtime *rtd, static int stm32_sai_dai_probe(struct snd_soc_dai *cpu_dai) { struct stm32_sai_sub_data *sai = dev_get_drvdata(cpu_dai->dev); - int cr1 = 0, cr1_mask; + int cr1 = 0, cr1_mask, ret; sai->cpu_dai = cpu_dai; @@ -1136,8 +1136,10 @@ static int stm32_sai_dai_probe(struct snd_soc_dai *cpu_dai) /* Configure synchronization */ if (sai->sync == SAI_SYNC_EXTERNAL) { /* Configure synchro client and provider */ - sai->pdata->set_sync(sai->pdata, sai->np_sync_provider, - sai->synco, sai->synci); + ret = sai->pdata->set_sync(sai->pdata, sai->np_sync_provider, + sai->synco, sai->synci); + if (ret) + return ret; } cr1_mask |= SAI_XCR1_SYNCEN_MASK; -- cgit v1.2.3 From bbf62563d8622434c761cb96569c132467f88597 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 28 Feb 2019 15:30:40 +0000 Subject: ASoC: hdmi-codec: avoid limiting params->msbits in hw_params() Limiting the value of the passed in params->msbits in the hw_params() callback is redundant on three counts: 1. We already specify in the DAI driver that we can only handle up to 24 bits. This means msbits will be limited to 24 via the ALSA constraints imposed by the ASoC core, unless we have multiple codecs that can handle more bits. 2. Nothing in our hw_params() implementation uses this value. 3. The copy of the params that we are passed by the ASoC core never reads back the msbits value. Consequently, this code is unnecessary and does nothing useful. Remove it. Signed-off-by: Russell King Reviewed-by: Jyri Sarha Signed-off-by: Mark Brown --- sound/soc/codecs/hdmi-codec.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c index d5f73c837281..35df73e42cbc 100644 --- a/sound/soc/codecs/hdmi-codec.c +++ b/sound/soc/codecs/hdmi-codec.c @@ -484,9 +484,6 @@ static int hdmi_codec_hw_params(struct snd_pcm_substream *substream, params_width(params), params_rate(params), params_channels(params)); - if (params_width(params) > 24) - params->msbits = 24; - ret = snd_pcm_create_iec958_consumer_hw_params(params, hp.iec.status, sizeof(hp.iec.status)); if (ret < 0) { -- cgit v1.2.3 From c342febcde452f817cbd3896dc40953ab17c309d Mon Sep 17 00:00:00 2001 From: Jonathan Hunter Date: Mon, 4 Mar 2019 13:31:14 +0000 Subject: ASoC: soc-core: Fix probe deferral following prelink failure Commit 78a24e10cd94 ("ASoC: soc-core: clear platform pointers on error") re-worked the clean-up of any platform pointers that may have been initialised by the function snd_soc_init_platform(). This commit missed one error path where if any of the prelinks for a soundcard failed to initialise, then these platform pointers would not be cleaned-up. This then prevents the soundcard from being initialised following a probe deferral when any of the soundcard prelinks cannot be found. Fix this by ensuring that soc_cleanup_platform() is called when initialising the soundcard prelinks fails. Fixes: 78a24e10cd94 ("ASoC: soc-core: clear platform pointers on error") Signed-off-by: Jonathan Hunter Signed-off-by: Mark Brown --- sound/soc/soc-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 93d316d5bf8e..5a5764dba147 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -2797,6 +2797,7 @@ int snd_soc_register_card(struct snd_soc_card *card) ret = soc_init_dai_link(card, link); if (ret) { + soc_cleanup_platform(card); dev_err(card->dev, "ASoC: failed to init link %s\n", link->name); mutex_unlock(&client_mutex); -- cgit v1.2.3 From 6ed69184ed9c43873b8a1ee721e3bf3c08c2c6be Mon Sep 17 00:00:00 2001 From: Myungho Jung Date: Thu, 7 Mar 2019 10:23:08 +0900 Subject: xfrm: Reset secpath in xfrm failure In esp4_gro_receive() and esp6_gro_receive(), secpath can be allocated without adding xfrm state to xvec. Then, sp->xvec[sp->len - 1] would fail and result in dereferencing invalid pointer in esp4_gso_segment() and esp6_gso_segment(). Reset secpath if xfrm function returns error. Fixes: 7785bba299a8 ("esp: Add a software GRO codepath") Reported-by: syzbot+b69368fd933c6c592f4c@syzkaller.appspotmail.com Signed-off-by: Myungho Jung Signed-off-by: Steffen Klassert --- net/ipv4/esp4_offload.c | 8 +++++--- net/ipv6/esp6_offload.c | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 8756e0e790d2..d3170a8001b2 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -52,13 +52,13 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, goto out; if (sp->len == XFRM_MAX_DEPTH) - goto out; + goto out_reset; x = xfrm_state_lookup(dev_net(skb->dev), skb->mark, (xfrm_address_t *)&ip_hdr(skb)->daddr, spi, IPPROTO_ESP, AF_INET); if (!x) - goto out; + goto out_reset; sp->xvec[sp->len++] = x; sp->olen++; @@ -66,7 +66,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, xo = xfrm_offload(skb); if (!xo) { xfrm_state_put(x); - goto out; + goto out_reset; } } @@ -82,6 +82,8 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, xfrm_input(skb, IPPROTO_ESP, spi, -2); return ERR_PTR(-EINPROGRESS); +out_reset: + secpath_reset(skb); out: skb_push(skb, offset); NAPI_GRO_CB(skb)->same_flow = 0; diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index d46b4eb645c2..cb99f6fb79b7 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -74,13 +74,13 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head, goto out; if (sp->len == XFRM_MAX_DEPTH) - goto out; + goto out_reset; x = xfrm_state_lookup(dev_net(skb->dev), skb->mark, (xfrm_address_t *)&ipv6_hdr(skb)->daddr, spi, IPPROTO_ESP, AF_INET6); if (!x) - goto out; + goto out_reset; sp->xvec[sp->len++] = x; sp->olen++; @@ -88,7 +88,7 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head, xo = xfrm_offload(skb); if (!xo) { xfrm_state_put(x); - goto out; + goto out_reset; } } @@ -109,6 +109,8 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head, xfrm_input(skb, IPPROTO_ESP, spi, -2); return ERR_PTR(-EINPROGRESS); +out_reset: + secpath_reset(skb); out: skb_push(skb, offset); NAPI_GRO_CB(skb)->same_flow = 0; -- cgit v1.2.3 From f10e0010fae8174dc20bdc872bcaa85baa925cb7 Mon Sep 17 00:00:00 2001 From: Su Yanjun Date: Wed, 6 Mar 2019 20:54:08 -0500 Subject: net: xfrm: Add '_rcu' tag for rcu protected pointer in netns_xfrm For rcu protected pointers, we'd better add '__rcu' for them. Once added '__rcu' tag for rcu protected pointer, the sparse tool reports warnings. net/xfrm/xfrm_user.c:1198:39: sparse: expected struct sock *sk net/xfrm/xfrm_user.c:1198:39: sparse: got struct sock [noderef] *nlsk [...] So introduce a new wrapper function of nlmsg_unicast to handle type conversions. This patch also fixes a direct access of a rcu protected socket. Fixes: be33690d8fcf("[XFRM]: Fix aevent related crash") Signed-off-by: Su Yanjun Signed-off-by: Steffen Klassert --- include/net/netns/xfrm.h | 2 +- net/xfrm/xfrm_user.c | 30 +++++++++++++++++++++++------- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 59f45b1e9dac..d2a36fb9f92a 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -57,7 +57,7 @@ struct netns_xfrm { struct list_head inexact_bins; - struct sock *nlsk; + struct sock __rcu *nlsk; struct sock *nlsk_stash; u32 sysctl_aevent_etime; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 8d4d52fd457b..944589832343 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1071,6 +1071,22 @@ static inline int xfrm_nlmsg_multicast(struct net *net, struct sk_buff *skb, return nlmsg_multicast(nlsk, skb, pid, group, GFP_ATOMIC); } +/* A similar wrapper like xfrm_nlmsg_multicast checking that nlsk is still + * available. + */ +static inline int xfrm_nlmsg_unicast(struct net *net, struct sk_buff *skb, + u32 pid) +{ + struct sock *nlsk = rcu_dereference(net->xfrm.nlsk); + + if (!nlsk) { + kfree_skb(skb); + return -EPIPE; + } + + return nlmsg_unicast(nlsk, skb, pid); +} + static inline unsigned int xfrm_spdinfo_msgsize(void) { return NLMSG_ALIGN(4) @@ -1195,7 +1211,7 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, err = build_spdinfo(r_skb, net, sportid, seq, *flags); BUG_ON(err < 0); - return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); + return xfrm_nlmsg_unicast(net, r_skb, sportid); } static inline unsigned int xfrm_sadinfo_msgsize(void) @@ -1254,7 +1270,7 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, err = build_sadinfo(r_skb, net, sportid, seq, *flags); BUG_ON(err < 0); - return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); + return xfrm_nlmsg_unicast(net, r_skb, sportid); } static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -1274,7 +1290,7 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { - err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid); + err = xfrm_nlmsg_unicast(net, resp_skb, NETLINK_CB(skb).portid); } xfrm_state_put(x); out_noput: @@ -1337,7 +1353,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, goto out; } - err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid); + err = xfrm_nlmsg_unicast(net, resp_skb, NETLINK_CB(skb).portid); out: xfrm_state_put(x); @@ -1903,8 +1919,8 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { - err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, - NETLINK_CB(skb).portid); + err = xfrm_nlmsg_unicast(net, resp_skb, + NETLINK_CB(skb).portid); } } else { xfrm_audit_policy_delete(xp, err ? 0 : 1, true); @@ -2062,7 +2078,7 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, err = build_aevent(r_skb, x, &c); BUG_ON(err < 0); - err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).portid); + err = xfrm_nlmsg_unicast(net, r_skb, NETLINK_CB(skb).portid); spin_unlock_bh(&x->lock); xfrm_state_put(x); return err; -- cgit v1.2.3 From 0a8a29be499cbb67df79370aaf5109085509feb8 Mon Sep 17 00:00:00 2001 From: Leonard Pollak Date: Wed, 13 Feb 2019 11:19:52 +0100 Subject: Staging: iio: meter: fixed typo This patch fixes an obvious typo, which will cause erroneously returning the Peak Voltage instead of the Peak Current. Signed-off-by: Leonard Pollak Cc: Acked-by: Michael Hennerich Signed-off-by: Jonathan Cameron --- drivers/staging/iio/meter/ade7854.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/iio/meter/ade7854.c b/drivers/staging/iio/meter/ade7854.c index 029c3bf42d4d..07774c000c5a 100644 --- a/drivers/staging/iio/meter/ade7854.c +++ b/drivers/staging/iio/meter/ade7854.c @@ -269,7 +269,7 @@ static IIO_DEV_ATTR_VPEAK(0644, static IIO_DEV_ATTR_IPEAK(0644, ade7854_read_32bit, ade7854_write_32bit, - ADE7854_VPEAK); + ADE7854_IPEAK); static IIO_DEV_ATTR_APHCAL(0644, ade7854_read_16bit, ade7854_write_16bit, -- cgit v1.2.3 From 40a7198a4a01037003c7ca714f0d048a61e729ac Mon Sep 17 00:00:00 2001 From: Mike Looijmans Date: Wed, 13 Feb 2019 08:41:47 +0100 Subject: iio/gyro/bmg160: Use millidegrees for temperature scale Standard unit for temperature is millidegrees Celcius, whereas this driver was reporting in degrees. Fix the scale factor in the driver. Signed-off-by: Mike Looijmans Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/gyro/bmg160_core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/gyro/bmg160_core.c b/drivers/iio/gyro/bmg160_core.c index 63ca31628a93..92c07ab826eb 100644 --- a/drivers/iio/gyro/bmg160_core.c +++ b/drivers/iio/gyro/bmg160_core.c @@ -582,11 +582,10 @@ static int bmg160_read_raw(struct iio_dev *indio_dev, case IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY: return bmg160_get_filter(data, val); case IIO_CHAN_INFO_SCALE: - *val = 0; switch (chan->type) { case IIO_TEMP: - *val2 = 500000; - return IIO_VAL_INT_PLUS_MICRO; + *val = 500; + return IIO_VAL_INT; case IIO_ANGL_VEL: { int i; @@ -594,6 +593,7 @@ static int bmg160_read_raw(struct iio_dev *indio_dev, for (i = 0; i < ARRAY_SIZE(bmg160_scale_table); ++i) { if (bmg160_scale_table[i].dps_range == data->dps_range) { + *val = 0; *val2 = bmg160_scale_table[i].scale; return IIO_VAL_INT_PLUS_MICRO; } -- cgit v1.2.3 From 7ce0f216221856a17fc4934b39284678a5fef2e9 Mon Sep 17 00:00:00 2001 From: Mircea Caprioru Date: Wed, 20 Feb 2019 13:08:20 +0200 Subject: staging: iio: ad7192: Fix ad7193 channel address This patch fixes the differential channels addresses for the ad7193. Signed-off-by: Mircea Caprioru Cc: Signed-off-by: Jonathan Cameron --- drivers/staging/iio/adc/ad7192.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/staging/iio/adc/ad7192.c b/drivers/staging/iio/adc/ad7192.c index acdbc07fd259..2fc8bc22b57b 100644 --- a/drivers/staging/iio/adc/ad7192.c +++ b/drivers/staging/iio/adc/ad7192.c @@ -109,10 +109,10 @@ #define AD7192_CH_AIN3 BIT(6) /* AIN3 - AINCOM */ #define AD7192_CH_AIN4 BIT(7) /* AIN4 - AINCOM */ -#define AD7193_CH_AIN1P_AIN2M 0x000 /* AIN1(+) - AIN2(-) */ -#define AD7193_CH_AIN3P_AIN4M 0x001 /* AIN3(+) - AIN4(-) */ -#define AD7193_CH_AIN5P_AIN6M 0x002 /* AIN5(+) - AIN6(-) */ -#define AD7193_CH_AIN7P_AIN8M 0x004 /* AIN7(+) - AIN8(-) */ +#define AD7193_CH_AIN1P_AIN2M 0x001 /* AIN1(+) - AIN2(-) */ +#define AD7193_CH_AIN3P_AIN4M 0x002 /* AIN3(+) - AIN4(-) */ +#define AD7193_CH_AIN5P_AIN6M 0x004 /* AIN5(+) - AIN6(-) */ +#define AD7193_CH_AIN7P_AIN8M 0x008 /* AIN7(+) - AIN8(-) */ #define AD7193_CH_TEMP 0x100 /* Temp senseor */ #define AD7193_CH_AIN2P_AIN2M 0x200 /* AIN2(+) - AIN2(-) */ #define AD7193_CH_AIN1 0x401 /* AIN1 - AINCOM */ -- cgit v1.2.3 From 20ea39ef9f2f911bd01c69519e7d69cfec79fde3 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Wed, 20 Feb 2019 17:11:32 +0200 Subject: iio: Fix scan mask selection The trialmask is expected to have all bits set to 0 after allocation. Currently kmalloc_array() is used which does not zero the memory and so random bits are set. This results in random channels being enabled when they shouldn't. Replace kmalloc_array() with kcalloc() which has the same interface but zeros the memory. Note the fix is actually required earlier than the below fixes tag, but will require a manual backport due to move from kmalloc to kmalloc_array. Signed-off-by: Lars-Peter Clausen Signed-off-by: Alexandru Ardelean Fixes commit 057ac1acdfc4 ("iio: Use kmalloc_array() in iio_scan_mask_set()"). Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-buffer.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c index cd5bfe39591b..dadd921a4a30 100644 --- a/drivers/iio/industrialio-buffer.c +++ b/drivers/iio/industrialio-buffer.c @@ -320,9 +320,8 @@ static int iio_scan_mask_set(struct iio_dev *indio_dev, const unsigned long *mask; unsigned long *trialmask; - trialmask = kmalloc_array(BITS_TO_LONGS(indio_dev->masklength), - sizeof(*trialmask), - GFP_KERNEL); + trialmask = kcalloc(BITS_TO_LONGS(indio_dev->masklength), + sizeof(*trialmask), GFP_KERNEL); if (trialmask == NULL) return -ENOMEM; if (!indio_dev->masklength) { -- cgit v1.2.3 From 409a51e0a4a5f908763191fae2c29008632eb712 Mon Sep 17 00:00:00 2001 From: Sergey Larin Date: Sat, 2 Mar 2019 19:54:55 +0300 Subject: iio: gyro: mpu3050: fix chip ID reading According to the datasheet, the last bit of CHIP_ID register controls I2C bus, and the first one is unused. Handle this correctly. Note that there are chips out there that have a value such that the id check currently fails. Signed-off-by: Sergey Larin Reviewed-by: Linus Walleij Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/gyro/mpu3050-core.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/iio/gyro/mpu3050-core.c b/drivers/iio/gyro/mpu3050-core.c index 77fac81a3adc..5ddebede31a6 100644 --- a/drivers/iio/gyro/mpu3050-core.c +++ b/drivers/iio/gyro/mpu3050-core.c @@ -29,7 +29,8 @@ #include "mpu3050.h" -#define MPU3050_CHIP_ID 0x69 +#define MPU3050_CHIP_ID 0x68 +#define MPU3050_CHIP_ID_MASK 0x7E /* * Register map: anything suffixed *_H is a big-endian high byte and always @@ -1176,8 +1177,9 @@ int mpu3050_common_probe(struct device *dev, goto err_power_down; } - if (val != MPU3050_CHIP_ID) { - dev_err(dev, "unsupported chip id %02x\n", (u8)val); + if ((val & MPU3050_CHIP_ID_MASK) != MPU3050_CHIP_ID) { + dev_err(dev, "unsupported chip id %02x\n", + (u8)(val & MPU3050_CHIP_ID_MASK)); ret = -ENODEV; goto err_power_down; } -- cgit v1.2.3 From 09c6bdee51183a575bf7546890c8c137a75a2b44 Mon Sep 17 00:00:00 2001 From: Georg Ottinger Date: Wed, 30 Jan 2019 14:42:02 +0100 Subject: iio: adc: at91: disable adc channel interrupt in timeout case Having a brief look at at91_adc_read_raw() it is obvious that in the case of a timeout the setting of AT91_ADC_CHDR and AT91_ADC_IDR registers is omitted. If 2 different channels are queried we can end up with a situation where two interrupts are enabled, but only one interrupt is cleared in the interrupt handler. Resulting in a interrupt loop and a system hang. Signed-off-by: Georg Ottinger Acked-by: Ludovic Desroches Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/at91_adc.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/iio/adc/at91_adc.c b/drivers/iio/adc/at91_adc.c index 75d2f73582a3..596841a3c4db 100644 --- a/drivers/iio/adc/at91_adc.c +++ b/drivers/iio/adc/at91_adc.c @@ -704,23 +704,29 @@ static int at91_adc_read_raw(struct iio_dev *idev, ret = wait_event_interruptible_timeout(st->wq_data_avail, st->done, msecs_to_jiffies(1000)); - if (ret == 0) - ret = -ETIMEDOUT; - if (ret < 0) { - mutex_unlock(&st->lock); - return ret; - } - - *val = st->last_value; + /* Disable interrupts, regardless if adc conversion was + * successful or not + */ at91_adc_writel(st, AT91_ADC_CHDR, AT91_ADC_CH(chan->channel)); at91_adc_writel(st, AT91_ADC_IDR, BIT(chan->channel)); - st->last_value = 0; - st->done = false; + if (ret > 0) { + /* a valid conversion took place */ + *val = st->last_value; + st->last_value = 0; + st->done = false; + ret = IIO_VAL_INT; + } else if (ret == 0) { + /* conversion timeout */ + dev_err(&idev->dev, "ADC Channel %d timeout.\n", + chan->channel); + ret = -ETIMEDOUT; + } + mutex_unlock(&st->lock); - return IIO_VAL_INT; + return ret; case IIO_CHAN_INFO_SCALE: *val = st->vref_mv; -- cgit v1.2.3 From 831d2fefdfce757fcea86742220f8cbe7ca51ddd Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sat, 9 Mar 2019 16:59:03 +0000 Subject: iio: chemical: fix missing Kconfig block for sgp30 I clearly messed up applying this patch. Not sure how but the entire Kconfig block is missing. This patch puts it back as it was in the original patch. Reported-by: Andreas Brauchli Fixes: ce514124161a ("iio: chemical: sgp30: Support Sensirion SGP30/SGPC3 sensors") Signed-off-by: Jonathan Cameron --- drivers/iio/chemical/Kconfig | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/iio/chemical/Kconfig b/drivers/iio/chemical/Kconfig index d5d146e9e372..5248e180b442 100644 --- a/drivers/iio/chemical/Kconfig +++ b/drivers/iio/chemical/Kconfig @@ -71,6 +71,19 @@ config PMS7003 To compile this driver as a module, choose M here: the module will be called pms7003. +config SENSIRION_SGP30 + tristate "Sensirion SGPxx gas sensors" + depends on I2C + select CRC8 + help + Say Y here to build I2C interface support for the following + Sensirion SGP gas sensors: + * SGP30 gas sensor + * SGPC3 low power gas sensor + + To compile this driver as module, choose M here: the + module will be called sgp30. + config SPS30 tristate "SPS30 particulate matter sensor" depends on I2C -- cgit v1.2.3 From 9436f45dd53595e21566a8c6627411077dfdb776 Mon Sep 17 00:00:00 2001 From: Mike Looijmans Date: Wed, 6 Mar 2019 08:31:47 +0100 Subject: iio:chemical:bme680: Fix, report temperature in millidegrees The standard unit for temperature is millidegrees Celcius. Adapt the driver to report in millidegrees instead of degrees. Signed-off-by: Mike Looijmans Fixes: 1b3bd8592780 ("iio: chemical: Add support for Bosch BME680 sensor"); Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/chemical/bme680_core.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/iio/chemical/bme680_core.c b/drivers/iio/chemical/bme680_core.c index 70c1fe4366f4..fefe32b5b69d 100644 --- a/drivers/iio/chemical/bme680_core.c +++ b/drivers/iio/chemical/bme680_core.c @@ -583,8 +583,7 @@ static int bme680_gas_config(struct bme680_data *data) return ret; } -static int bme680_read_temp(struct bme680_data *data, - int *val, int *val2) +static int bme680_read_temp(struct bme680_data *data, int *val) { struct device *dev = regmap_get_device(data->regmap); int ret; @@ -617,10 +616,9 @@ static int bme680_read_temp(struct bme680_data *data, * compensate_press/compensate_humid to get compensated * pressure/humidity readings. */ - if (val && val2) { - *val = comp_temp; - *val2 = 100; - return IIO_VAL_FRACTIONAL; + if (val) { + *val = comp_temp * 10; /* Centidegrees to millidegrees */ + return IIO_VAL_INT; } return ret; @@ -635,7 +633,7 @@ static int bme680_read_press(struct bme680_data *data, s32 adc_press; /* Read and compensate temperature to get a reading of t_fine */ - ret = bme680_read_temp(data, NULL, NULL); + ret = bme680_read_temp(data, NULL); if (ret < 0) return ret; @@ -668,7 +666,7 @@ static int bme680_read_humid(struct bme680_data *data, u32 comp_humidity; /* Read and compensate temperature to get a reading of t_fine */ - ret = bme680_read_temp(data, NULL, NULL); + ret = bme680_read_temp(data, NULL); if (ret < 0) return ret; @@ -761,7 +759,7 @@ static int bme680_read_raw(struct iio_dev *indio_dev, case IIO_CHAN_INFO_PROCESSED: switch (chan->type) { case IIO_TEMP: - return bme680_read_temp(data, val, val2); + return bme680_read_temp(data, val); case IIO_PRESSURE: return bme680_read_press(data, val, val2); case IIO_HUMIDITYRELATIVE: -- cgit v1.2.3 From 73f3bc6da506711302bb67572440eb84b1ec4a2c Mon Sep 17 00:00:00 2001 From: Mike Looijmans Date: Wed, 6 Mar 2019 08:31:48 +0100 Subject: iio:chemical:bme680: Fix SPI read interface The SPI interface implementation was completely broken. When using the SPI interface, there are only 7 address bits, the upper bit is controlled by a page select register. The core needs access to both ranges, so implement register read/write for both regions. The regmap paging functionality didn't agree with a register that needs to be read and modified, so I implemented a custom paging algorithm. This fixes that the device wouldn't even probe in SPI mode. The SPI interface then isn't different from I2C, merged them into the core, and the I2C/SPI named registers are no longer needed. Implemented register value caching for the registers to reduce the I2C/SPI data transfers considerably. The calibration set reads as all zeroes until some undefined point in time, and I couldn't determine what makes it valid. The datasheet mentions these registers but does not provide any hints on when they become valid, and they aren't even enumerated in the memory map. So check the calibration and retry reading it from the device after each measurement until it provides something valid. Despite the size this is suitable for a stable backport given that it seems the SPI support never worked. Signed-off-by: Mike Looijmans Fixes: 1b3bd8592780 ("iio: chemical: Add support for Bosch BME680 sensor"); Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/chemical/bme680.h | 6 +- drivers/iio/chemical/bme680_core.c | 38 ++++++++++++ drivers/iio/chemical/bme680_i2c.c | 21 ------- drivers/iio/chemical/bme680_spi.c | 115 +++++++++++++++++++++++++------------ 4 files changed, 118 insertions(+), 62 deletions(-) diff --git a/drivers/iio/chemical/bme680.h b/drivers/iio/chemical/bme680.h index 0ae89b87e2d6..4edc5d21cb9f 100644 --- a/drivers/iio/chemical/bme680.h +++ b/drivers/iio/chemical/bme680.h @@ -2,11 +2,9 @@ #ifndef BME680_H_ #define BME680_H_ -#define BME680_REG_CHIP_I2C_ID 0xD0 -#define BME680_REG_CHIP_SPI_ID 0x50 +#define BME680_REG_CHIP_ID 0xD0 #define BME680_CHIP_ID_VAL 0x61 -#define BME680_REG_SOFT_RESET_I2C 0xE0 -#define BME680_REG_SOFT_RESET_SPI 0x60 +#define BME680_REG_SOFT_RESET 0xE0 #define BME680_CMD_SOFTRESET 0xB6 #define BME680_REG_STATUS 0x73 #define BME680_SPI_MEM_PAGE_BIT BIT(4) diff --git a/drivers/iio/chemical/bme680_core.c b/drivers/iio/chemical/bme680_core.c index fefe32b5b69d..ccde4c65ff93 100644 --- a/drivers/iio/chemical/bme680_core.c +++ b/drivers/iio/chemical/bme680_core.c @@ -63,9 +63,23 @@ struct bme680_data { s32 t_fine; }; +static const struct regmap_range bme680_volatile_ranges[] = { + regmap_reg_range(BME680_REG_MEAS_STAT_0, BME680_REG_GAS_R_LSB), + regmap_reg_range(BME680_REG_STATUS, BME680_REG_STATUS), + regmap_reg_range(BME680_T2_LSB_REG, BME680_GH3_REG), +}; + +static const struct regmap_access_table bme680_volatile_table = { + .yes_ranges = bme680_volatile_ranges, + .n_yes_ranges = ARRAY_SIZE(bme680_volatile_ranges), +}; + const struct regmap_config bme680_regmap_config = { .reg_bits = 8, .val_bits = 8, + .max_register = 0xef, + .volatile_table = &bme680_volatile_table, + .cache_type = REGCACHE_RBTREE, }; EXPORT_SYMBOL(bme680_regmap_config); @@ -316,6 +330,10 @@ static s16 bme680_compensate_temp(struct bme680_data *data, s64 var1, var2, var3; s16 calc_temp; + /* If the calibration is invalid, attempt to reload it */ + if (!calib->par_t2) + bme680_read_calib(data, calib); + var1 = (adc_temp >> 3) - (calib->par_t1 << 1); var2 = (var1 * calib->par_t2) >> 11; var3 = ((var1 >> 1) * (var1 >> 1)) >> 12; @@ -865,8 +883,28 @@ int bme680_core_probe(struct device *dev, struct regmap *regmap, { struct iio_dev *indio_dev; struct bme680_data *data; + unsigned int val; int ret; + ret = regmap_write(regmap, BME680_REG_SOFT_RESET, + BME680_CMD_SOFTRESET); + if (ret < 0) { + dev_err(dev, "Failed to reset chip\n"); + return ret; + } + + ret = regmap_read(regmap, BME680_REG_CHIP_ID, &val); + if (ret < 0) { + dev_err(dev, "Error reading chip ID\n"); + return ret; + } + + if (val != BME680_CHIP_ID_VAL) { + dev_err(dev, "Wrong chip ID, got %x expected %x\n", + val, BME680_CHIP_ID_VAL); + return -ENODEV; + } + indio_dev = devm_iio_device_alloc(dev, sizeof(*data)); if (!indio_dev) return -ENOMEM; diff --git a/drivers/iio/chemical/bme680_i2c.c b/drivers/iio/chemical/bme680_i2c.c index b2f805b6b36a..de9c9e3d23ea 100644 --- a/drivers/iio/chemical/bme680_i2c.c +++ b/drivers/iio/chemical/bme680_i2c.c @@ -23,8 +23,6 @@ static int bme680_i2c_probe(struct i2c_client *client, { struct regmap *regmap; const char *name = NULL; - unsigned int val; - int ret; regmap = devm_regmap_init_i2c(client, &bme680_regmap_config); if (IS_ERR(regmap)) { @@ -33,25 +31,6 @@ static int bme680_i2c_probe(struct i2c_client *client, return PTR_ERR(regmap); } - ret = regmap_write(regmap, BME680_REG_SOFT_RESET_I2C, - BME680_CMD_SOFTRESET); - if (ret < 0) { - dev_err(&client->dev, "Failed to reset chip\n"); - return ret; - } - - ret = regmap_read(regmap, BME680_REG_CHIP_I2C_ID, &val); - if (ret < 0) { - dev_err(&client->dev, "Error reading I2C chip ID\n"); - return ret; - } - - if (val != BME680_CHIP_ID_VAL) { - dev_err(&client->dev, "Wrong chip ID, got %x expected %x\n", - val, BME680_CHIP_ID_VAL); - return -ENODEV; - } - if (id) name = id->name; diff --git a/drivers/iio/chemical/bme680_spi.c b/drivers/iio/chemical/bme680_spi.c index d0b7bdd3f066..3b838068a7e4 100644 --- a/drivers/iio/chemical/bme680_spi.c +++ b/drivers/iio/chemical/bme680_spi.c @@ -12,28 +12,93 @@ #include "bme680.h" +struct bme680_spi_bus_context { + struct spi_device *spi; + u8 current_page; +}; + +/* + * In SPI mode there are only 7 address bits, a "page" register determines + * which part of the 8-bit range is active. This function looks at the address + * and writes the page selection bit if needed + */ +static int bme680_regmap_spi_select_page( + struct bme680_spi_bus_context *ctx, u8 reg) +{ + struct spi_device *spi = ctx->spi; + int ret; + u8 buf[2]; + u8 page = (reg & 0x80) ? 0 : 1; /* Page "1" is low range */ + + if (page == ctx->current_page) + return 0; + + /* + * Data sheet claims we're only allowed to change bit 4, so we must do + * a read-modify-write on each and every page select + */ + buf[0] = BME680_REG_STATUS; + ret = spi_write_then_read(spi, buf, 1, buf + 1, 1); + if (ret < 0) { + dev_err(&spi->dev, "failed to set page %u\n", page); + return ret; + } + + buf[0] = BME680_REG_STATUS; + if (page) + buf[1] |= BME680_SPI_MEM_PAGE_BIT; + else + buf[1] &= ~BME680_SPI_MEM_PAGE_BIT; + + ret = spi_write(spi, buf, 2); + if (ret < 0) { + dev_err(&spi->dev, "failed to set page %u\n", page); + return ret; + } + + ctx->current_page = page; + + return 0; +} + static int bme680_regmap_spi_write(void *context, const void *data, size_t count) { - struct spi_device *spi = context; + struct bme680_spi_bus_context *ctx = context; + struct spi_device *spi = ctx->spi; + int ret; u8 buf[2]; memcpy(buf, data, 2); + + ret = bme680_regmap_spi_select_page(ctx, buf[0]); + if (ret) + return ret; + /* * The SPI register address (= full register address without bit 7) * and the write command (bit7 = RW = '0') */ buf[0] &= ~0x80; - return spi_write_then_read(spi, buf, 2, NULL, 0); + return spi_write(spi, buf, 2); } static int bme680_regmap_spi_read(void *context, const void *reg, size_t reg_size, void *val, size_t val_size) { - struct spi_device *spi = context; + struct bme680_spi_bus_context *ctx = context; + struct spi_device *spi = ctx->spi; + int ret; + u8 addr = *(const u8 *)reg; + + ret = bme680_regmap_spi_select_page(ctx, addr); + if (ret) + return ret; - return spi_write_then_read(spi, reg, reg_size, val, val_size); + addr |= 0x80; /* bit7 = RW = '1' */ + + return spi_write_then_read(spi, &addr, 1, val, val_size); } static struct regmap_bus bme680_regmap_bus = { @@ -46,8 +111,8 @@ static struct regmap_bus bme680_regmap_bus = { static int bme680_spi_probe(struct spi_device *spi) { const struct spi_device_id *id = spi_get_device_id(spi); + struct bme680_spi_bus_context *bus_context; struct regmap *regmap; - unsigned int val; int ret; spi->bits_per_word = 8; @@ -57,45 +122,21 @@ static int bme680_spi_probe(struct spi_device *spi) return ret; } + bus_context = devm_kzalloc(&spi->dev, sizeof(*bus_context), GFP_KERNEL); + if (!bus_context) + return -ENOMEM; + + bus_context->spi = spi; + bus_context->current_page = 0xff; /* Undefined on warm boot */ + regmap = devm_regmap_init(&spi->dev, &bme680_regmap_bus, - &spi->dev, &bme680_regmap_config); + bus_context, &bme680_regmap_config); if (IS_ERR(regmap)) { dev_err(&spi->dev, "Failed to register spi regmap %d\n", (int)PTR_ERR(regmap)); return PTR_ERR(regmap); } - ret = regmap_write(regmap, BME680_REG_SOFT_RESET_SPI, - BME680_CMD_SOFTRESET); - if (ret < 0) { - dev_err(&spi->dev, "Failed to reset chip\n"); - return ret; - } - - /* after power-on reset, Page 0(0x80-0xFF) of spi_mem_page is active */ - ret = regmap_read(regmap, BME680_REG_CHIP_SPI_ID, &val); - if (ret < 0) { - dev_err(&spi->dev, "Error reading SPI chip ID\n"); - return ret; - } - - if (val != BME680_CHIP_ID_VAL) { - dev_err(&spi->dev, "Wrong chip ID, got %x expected %x\n", - val, BME680_CHIP_ID_VAL); - return -ENODEV; - } - /* - * select Page 1 of spi_mem_page to enable access to - * to registers from address 0x00 to 0x7F. - */ - ret = regmap_write_bits(regmap, BME680_REG_STATUS, - BME680_SPI_MEM_PAGE_BIT, - BME680_SPI_MEM_PAGE_1_VAL); - if (ret < 0) { - dev_err(&spi->dev, "failed to set page 1 of spi_mem_page\n"); - return ret; - } - return bme680_core_probe(&spi->dev, regmap, id->name); } -- cgit v1.2.3 From fe2d3df639a7940a125a33d6460529b9689c5406 Mon Sep 17 00:00:00 2001 From: "he, bo" Date: Wed, 6 Mar 2019 10:32:20 +0800 Subject: io: accel: kxcjk1013: restore the range after resume. On some laptops, kxcjk1013 is powered off when system enters S3. We need restore the range regiter during resume. Otherwise, the sensor doesn't work properly after S3. Signed-off-by: he, bo Signed-off-by: Chen, Hu Reviewed-by: Hans de Goede Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/accel/kxcjk-1013.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/accel/kxcjk-1013.c b/drivers/iio/accel/kxcjk-1013.c index 7096e577b23f..50f3ff386bea 100644 --- a/drivers/iio/accel/kxcjk-1013.c +++ b/drivers/iio/accel/kxcjk-1013.c @@ -1437,6 +1437,8 @@ static int kxcjk1013_resume(struct device *dev) mutex_lock(&data->mutex); ret = kxcjk1013_set_mode(data, OPERATION); + if (ret == 0) + ret = kxcjk1013_set_range(data, data->range); mutex_unlock(&data->mutex); return ret; -- cgit v1.2.3 From 7d01427aaa78fa611f84c1a05fde66a41a6598be Mon Sep 17 00:00:00 2001 From: Louis Taylor Date: Wed, 27 Feb 2019 11:07:20 +0000 Subject: HID: quirks: use correct format chars in dbg_hid When building with -Wformat, clang warns: drivers/hid/hid-quirks.c:1075:27: warning: format specifies type 'unsigned short' but the argument has type '__u32' (aka 'unsigned int') [-Wformat] bl_entry->driver_data, bl_entry->vendor, ^~~~~~~~~~~~~~~~ ./include/linux/hid.h:1170:48: note: expanded from macro 'dbg_hid' printk(KERN_DEBUG "%s: " format, __FILE__, ##arg); \ ~~~~~~ ^~~ drivers/hid/hid-quirks.c:1076:4: warning: format specifies type 'unsigned short' but the argument has type '__u32' (aka 'unsigned int') [-Wformat] bl_entry->product); ^~~~~~~~~~~~~~~~~ ./include/linux/hid.h:1170:48: note: expanded from macro 'dbg_hid' printk(KERN_DEBUG "%s: " format, __FILE__, ##arg); \ ~~~~~~ ^~~ drivers/hid/hid-quirks.c:1242:12: warning: format specifies type 'unsigned short' but the argument has type '__u32' (aka 'unsigned int') [-Wformat] quirks, hdev->vendor, hdev->product); ^~~~~~~~~~~~ ./include/linux/hid.h:1170:48: note: expanded from macro 'dbg_hid' printk(KERN_DEBUG "%s: " format, __FILE__, ##arg); \ ~~~~~~ ^~~ drivers/hid/hid-quirks.c:1242:26: warning: format specifies type 'unsigned short' but the argument has type '__u32' (aka 'unsigned int') [-Wformat] quirks, hdev->vendor, hdev->product); ^~~~~~~~~~~~~ ./include/linux/hid.h:1170:48: note: expanded from macro 'dbg_hid' printk(KERN_DEBUG "%s: " format, __FILE__, ##arg); \ ~~~~~~ ^~~ 4 warnings generated. This patch fixes the format strings to use the correct format type for unsigned ints. Link: https://github.com/ClangBuiltLinux/linux/issues/378 Signed-off-by: Louis Taylor Reviewed-by: Nick Desaulniers Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-quirks.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 953908f2267c..2f9a9bf55208 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -1042,7 +1042,7 @@ static struct hid_device_id *hid_exists_dquirk(const struct hid_device *hdev) } if (bl_entry != NULL) - dbg_hid("Found dynamic quirk 0x%lx for HID device 0x%hx:0x%hx\n", + dbg_hid("Found dynamic quirk 0x%lx for HID device 0x%04x:0x%04x\n", bl_entry->driver_data, bl_entry->vendor, bl_entry->product); @@ -1209,7 +1209,7 @@ static unsigned long hid_gets_squirk(const struct hid_device *hdev) quirks |= bl_entry->driver_data; if (quirks) - dbg_hid("Found squirk 0x%lx for HID device 0x%hx:0x%hx\n", + dbg_hid("Found squirk 0x%lx for HID device 0x%04x:0x%04x\n", quirks, hdev->vendor, hdev->product); return quirks; } -- cgit v1.2.3 From a23eab893476f67bd7572cdbf24498d647c86e48 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 4 Mar 2019 20:54:43 +0100 Subject: HID: hid-asus: select CONFIG_POWER_SUPPLY The newly added power supply code fails to link when the power supply core code is disabled: drivers/hid/hid-asus.o: In function `asus_battery_get_property': hid-asus.c:(.text+0x11de): undefined reference to `power_supply_get_drvdata' drivers/hid/hid-asus.o: In function `asus_probe': hid-asus.c:(.text+0x170c): undefined reference to `devm_power_supply_register' hid-asus.c:(.text+0x1734): undefined reference to `power_supply_powers' drivers/hid/hid-asus.o: In function `asus_raw_event': hid-asus.c:(.text+0x1914): undefined reference to `power_supply_changed' Select the subsystem from Kconfig as we do for other hid drivers already. Fixes: 6311d329e12a ("HID: hid-asus: Add BT keyboard dock battery monitoring support") Signed-off-by: Arnd Bergmann Signed-off-by: Benjamin Tissoires --- drivers/hid/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 6ca8d322b487..4ca0cdfa6b33 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -150,6 +150,7 @@ config HID_ASUS tristate "Asus" depends on LEDS_CLASS depends on ASUS_WMI || ASUS_WMI=n + select POWER_SUPPLY ---help--- Support for Asus notebook built-in keyboard and touchpad via i2c, and the Asus Republic of Gamers laptop keyboard special keys. -- cgit v1.2.3 From 78b92f5f00cb3dbca0553f50847232eef60ccff4 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 5 Mar 2019 14:15:25 +0300 Subject: HID: quirks: Drop misused kernel-doc annotation The kernel-doc annotation is misused for hid_mouse_ignore_list. The script complains about it: drivers/hid/hid-quirks.c:894: warning: cannot understand function prototype: 'const struct hid_device_id hid_mouse_ignore_list[] = ' Drop the annotation to make script happy. Signed-off-by: Andy Shevchenko Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-quirks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 2f9a9bf55208..1148d8c0816a 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -855,7 +855,7 @@ static const struct hid_device_id hid_ignore_list[] = { { } }; -/** +/* * hid_mouse_ignore_list - mouse devices which should not be handled by the hid layer * * There are composite devices for which we want to ignore only a certain -- cgit v1.2.3 From 1cbbd85fbcdce186649ce778ff1e08e3df35d285 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 2 Mar 2019 22:23:38 +0000 Subject: HID: uclogic: remove redudant duplicated null check on ver_ptr Currently ver_ptr is being null checked twice, once before calling usb_string and once afterwards. The second null check is redundant and can be removed, remove it. Detected by CoverityScan, CID#1477308 ("Logically dead code") Signed-off-by: Colin Ian King Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-uclogic-params.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/hid/hid-uclogic-params.c b/drivers/hid/hid-uclogic-params.c index 7710d9f957da..0187c9f8fc22 100644 --- a/drivers/hid/hid-uclogic-params.c +++ b/drivers/hid/hid-uclogic-params.c @@ -735,10 +735,6 @@ static int uclogic_params_huion_init(struct uclogic_params *params, goto cleanup; } rc = usb_string(udev, 201, ver_ptr, ver_len); - if (ver_ptr == NULL) { - rc = -ENOMEM; - goto cleanup; - } if (rc == -EPIPE) { *ver_ptr = '\0'; } else if (rc < 0) { -- cgit v1.2.3 From 42e4cedd67e4eb2abaa5e684353a55d4a01a913e Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Wed, 6 Mar 2019 11:24:45 +0100 Subject: ASoC: samsung: i2s: Fix DAPM routes for capture stream This patch sets missing stream_name of capture part of the DAI driver so we can define DAPM routing properly also for the capture stream. While at it "Playback" suffix is added to the playback stream names to clearly identify playback/capture. Together with related dts patch this fixes NULL pointer dereference when opening ALSA device for recording on Odroid XU3. Fixes: 64aba9bca5bd ("ASoC: samsung: i2s: Add widgets and routes for DPCM support") Signed-off-by: Sylwester Nawrocki Signed-off-by: Mark Brown --- sound/soc/samsung/i2s.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sound/soc/samsung/i2s.c b/sound/soc/samsung/i2s.c index 4231001226f4..ab471d550d17 100644 --- a/sound/soc/samsung/i2s.c +++ b/sound/soc/samsung/i2s.c @@ -1130,11 +1130,11 @@ static const struct snd_soc_dapm_widget samsung_i2s_widgets[] = { }; static const struct snd_soc_dapm_route samsung_i2s_dapm_routes[] = { - { "Playback Mixer", NULL, "Primary" }, - { "Playback Mixer", NULL, "Secondary" }, + { "Playback Mixer", NULL, "Primary Playback" }, + { "Playback Mixer", NULL, "Secondary Playback" }, { "Mixer DAI TX", NULL, "Playback Mixer" }, - { "Playback Mixer", NULL, "Mixer DAI RX" }, + { "Primary Capture", NULL, "Mixer DAI RX" }, }; static const struct snd_soc_component_driver samsung_i2s_component = { @@ -1155,7 +1155,8 @@ static int i2s_alloc_dais(struct samsung_i2s_priv *priv, int num_dais) { static const char *dai_names[] = { "samsung-i2s", "samsung-i2s-sec" }; - static const char *stream_names[] = { "Primary", "Secondary" }; + static const char *stream_names[] = { "Primary Playback", + "Secondary Playback" }; struct snd_soc_dai_driver *dai_drv; struct i2s_dai *dai; int i; @@ -1201,6 +1202,7 @@ static int i2s_alloc_dais(struct samsung_i2s_priv *priv, dai_drv->capture.channels_max = 2; dai_drv->capture.rates = i2s_dai_data->pcm_rates; dai_drv->capture.formats = SAMSUNG_I2S_FMTS; + dai_drv->capture.stream_name = "Primary Capture"; return 0; } -- cgit v1.2.3 From 570f18b6a8d1f0e60e8caf30e66161b6438dcc91 Mon Sep 17 00:00:00 2001 From: Rander Wang Date: Fri, 8 Mar 2019 16:38:57 +0800 Subject: ASoC:soc-pcm:fix a codec fixup issue in TDM case On HDaudio platforms, if playback is started when capture is working, there is no audible output. This can be root-caused to the use of the rx|tx_mask to store an HDaudio stream tag. If capture is stared before playback, rx_mask would be non-zero on HDaudio platform, then the channel number of playback, which is in the same codec dai with the capture, would be changed by soc_pcm_codec_params_fixup based on the tx_mask at first, then overwritten by this function based on rx_mask at last. According to the author of tx|rx_mask, tx_mask is for playback and rx_mask is for capture. And stream direction is checked at all other references of tx|rx_mask in ASoC, so here should be an error. This patch checks stream direction for tx|rx_mask for fixup function. This issue would affect not only HDaudio+ASoC, but also I2S codecs if the channel number based on rx_mask is not equal to the one for tx_mask. It could be rarely reproduecd because most drivers in kernel set the same channel number to tx|rx_mask or rx_mask is zero. Tested on all platforms using stream_tag & HDaudio and intel I2S platforms. Signed-off-by: Rander Wang Acked-by: Pierre-Louis Bossart Signed-off-by: Mark Brown --- sound/soc/soc-pcm.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index a5b40e82dea4..e70555a4ea06 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -954,10 +954,13 @@ static int soc_pcm_hw_params(struct snd_pcm_substream *substream, codec_params = *params; /* fixup params based on TDM slot masks */ - if (codec_dai->tx_mask) + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK && + codec_dai->tx_mask) soc_pcm_codec_params_fixup(&codec_params, codec_dai->tx_mask); - if (codec_dai->rx_mask) + + if (substream->stream == SNDRV_PCM_STREAM_CAPTURE && + codec_dai->rx_mask) soc_pcm_codec_params_fixup(&codec_params, codec_dai->rx_mask); -- cgit v1.2.3 From 03d0aa4d4fddce4a5d865d819a4d98bfc3d451e6 Mon Sep 17 00:00:00 2001 From: Rander Wang Date: Fri, 8 Mar 2019 16:38:58 +0800 Subject: ASoC:hdac_hda:use correct format to setup hda codec The current implementation of the hdac_hda codec results in zero-valued samples on capture and noise with headset playback when SOF is used on platforms with an on-board HDaudio codec. This is root-caused to SOF using be_hw_params_fixup, and the prepare() call using invalid runtime fields to determine the format. This patch moves the format handling to the hw_params() callback, as done already for hdac_hdmi, to make sure the fixed-up information is taken into account but keeps the codec initialization in prepare() as the stream_tag is only available at that time. Moving everything in the prepare() callback is possible but the code is less elegant so this two-step solution was chosen. The solution was tested with the SST driver with no regressions, and all the issues with SOF playback and capture are solved. Signed-off-by: Rander Wang Acked-by: Pierre-Louis Bossart Signed-off-by: Mark Brown --- sound/soc/codecs/hdac_hda.c | 53 +++++++++++++++++++++++++++++++++------------ sound/soc/codecs/hdac_hda.h | 1 + 2 files changed, 40 insertions(+), 14 deletions(-) diff --git a/sound/soc/codecs/hdac_hda.c b/sound/soc/codecs/hdac_hda.c index ffecdaaa8cf2..f889d94c8e3c 100644 --- a/sound/soc/codecs/hdac_hda.c +++ b/sound/soc/codecs/hdac_hda.c @@ -38,6 +38,9 @@ static void hdac_hda_dai_close(struct snd_pcm_substream *substream, struct snd_soc_dai *dai); static int hdac_hda_dai_prepare(struct snd_pcm_substream *substream, struct snd_soc_dai *dai); +static int hdac_hda_dai_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai); static int hdac_hda_dai_hw_free(struct snd_pcm_substream *substream, struct snd_soc_dai *dai); static int hdac_hda_dai_set_tdm_slot(struct snd_soc_dai *dai, @@ -50,6 +53,7 @@ static const struct snd_soc_dai_ops hdac_hda_dai_ops = { .startup = hdac_hda_dai_open, .shutdown = hdac_hda_dai_close, .prepare = hdac_hda_dai_prepare, + .hw_params = hdac_hda_dai_hw_params, .hw_free = hdac_hda_dai_hw_free, .set_tdm_slot = hdac_hda_dai_set_tdm_slot, }; @@ -139,6 +143,39 @@ static int hdac_hda_dai_set_tdm_slot(struct snd_soc_dai *dai, return 0; } +static int hdac_hda_dai_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) +{ + struct snd_soc_component *component = dai->component; + struct hdac_hda_priv *hda_pvt; + unsigned int format_val; + unsigned int maxbps; + + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) + maxbps = dai->driver->playback.sig_bits; + else + maxbps = dai->driver->capture.sig_bits; + + hda_pvt = snd_soc_component_get_drvdata(component); + format_val = snd_hdac_calc_stream_format(params_rate(params), + params_channels(params), + params_format(params), + maxbps, + 0); + if (!format_val) { + dev_err(dai->dev, + "invalid format_val, rate=%d, ch=%d, format=%d, maxbps=%d\n", + params_rate(params), params_channels(params), + params_format(params), maxbps); + + return -EINVAL; + } + + hda_pvt->pcm[dai->id].format_val[substream->stream] = format_val; + return 0; +} + static int hdac_hda_dai_hw_free(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { @@ -162,10 +199,9 @@ static int hdac_hda_dai_prepare(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct snd_soc_component *component = dai->component; + struct hda_pcm_stream *hda_stream; struct hdac_hda_priv *hda_pvt; - struct snd_pcm_runtime *runtime = substream->runtime; struct hdac_device *hdev; - struct hda_pcm_stream *hda_stream; unsigned int format_val; struct hda_pcm *pcm; unsigned int stream; @@ -179,19 +215,8 @@ static int hdac_hda_dai_prepare(struct snd_pcm_substream *substream, hda_stream = &pcm->stream[substream->stream]; - format_val = snd_hdac_calc_stream_format(runtime->rate, - runtime->channels, - runtime->format, - hda_stream->maxbps, - 0); - if (!format_val) { - dev_err(&hdev->dev, - "invalid format_val, rate=%d, ch=%d, format=%d\n", - runtime->rate, runtime->channels, runtime->format); - return -EINVAL; - } - stream = hda_pvt->pcm[dai->id].stream_tag[substream->stream]; + format_val = hda_pvt->pcm[dai->id].format_val[substream->stream]; ret = snd_hda_codec_prepare(&hda_pvt->codec, hda_stream, stream, format_val, substream); diff --git a/sound/soc/codecs/hdac_hda.h b/sound/soc/codecs/hdac_hda.h index e444ef593360..6b1bd4f428e7 100644 --- a/sound/soc/codecs/hdac_hda.h +++ b/sound/soc/codecs/hdac_hda.h @@ -8,6 +8,7 @@ struct hdac_hda_pcm { int stream_tag[2]; + unsigned int format_val[2]; }; struct hdac_hda_priv { -- cgit v1.2.3 From c899df3e9b0bf7b76e642aed1a214582ea7012d5 Mon Sep 17 00:00:00 2001 From: Rander Wang Date: Fri, 8 Mar 2019 16:38:59 +0800 Subject: ASoC:intel:skl:fix a simultaneous playback & capture issue on hda platform If playback and capture are enabled concurrently, when the capture stops the output becomes inaudile. The playback application will become stuck and underrun after a timeout. This is caused by mistaken use of the stream_id, which should only be set for playback and not for capture Tested on Apollolake and Kabylake with SST driver. Signed-off-by: Rander Wang Acked-by: Pierre-Louis Bossart Signed-off-by: Mark Brown --- sound/soc/intel/skylake/skl-pcm.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/sound/soc/intel/skylake/skl-pcm.c b/sound/soc/intel/skylake/skl-pcm.c index a4284778f117..bb463ee6ff84 100644 --- a/sound/soc/intel/skylake/skl-pcm.c +++ b/sound/soc/intel/skylake/skl-pcm.c @@ -181,6 +181,7 @@ int skl_pcm_link_dma_prepare(struct device *dev, struct skl_pipe_params *params) struct hdac_stream *hstream; struct hdac_ext_stream *stream; struct hdac_ext_link *link; + unsigned char stream_tag; hstream = snd_hdac_get_stream(bus, params->stream, params->link_dma_id + 1); @@ -199,10 +200,13 @@ int skl_pcm_link_dma_prepare(struct device *dev, struct skl_pipe_params *params) snd_hdac_ext_link_stream_setup(stream, format_val); - list_for_each_entry(link, &bus->hlink_list, list) { - if (link->index == params->link_index) - snd_hdac_ext_link_set_stream_id(link, - hstream->stream_tag); + stream_tag = hstream->stream_tag; + if (stream->hstream.direction == SNDRV_PCM_STREAM_PLAYBACK) { + list_for_each_entry(link, &bus->hlink_list, list) { + if (link->index == params->link_index) + snd_hdac_ext_link_set_stream_id(link, + stream_tag); + } } stream->link_prepared = 1; @@ -645,6 +649,7 @@ static int skl_link_hw_free(struct snd_pcm_substream *substream, struct hdac_ext_stream *link_dev = snd_soc_dai_get_dma_data(dai, substream); struct hdac_ext_link *link; + unsigned char stream_tag; dev_dbg(dai->dev, "%s: %s\n", __func__, dai->name); @@ -654,7 +659,11 @@ static int skl_link_hw_free(struct snd_pcm_substream *substream, if (!link) return -EINVAL; - snd_hdac_ext_link_clear_stream_id(link, hdac_stream(link_dev)->stream_tag); + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { + stream_tag = hdac_stream(link_dev)->stream_tag; + snd_hdac_ext_link_clear_stream_id(link, stream_tag); + } + snd_hdac_ext_stream_release(link_dev, HDAC_EXT_STREAM_TYPE_LINK); return 0; } -- cgit v1.2.3 From a9764869779081e8bf24da07ac040e8f3efcf13a Mon Sep 17 00:00:00 2001 From: KaiChieh Chuang Date: Fri, 8 Mar 2019 13:05:53 +0800 Subject: ASoC: dpcm: prevent snd_soc_dpcm use after free The dpcm get from fe_clients/be_clients may be free before use Add a spin lock at snd_soc_card level, to protect the dpcm instance. The lock may be used in atomic context, so use spin lock. Use irq spin lock version, since the lock may be used in interrupts. possible race condition between void dpcm_be_disconnect( ... list_del(&dpcm->list_be); list_del(&dpcm->list_fe); kfree(dpcm); ... and for_each_dpcm_fe() for_each_dpcm_be*() race condition example Thread 1: snd_soc_dapm_mixer_update_power() -> soc_dpcm_runtime_update() -> dpcm_be_disconnect() -> kfree(dpcm); Thread 2: dpcm_fe_dai_trigger() -> dpcm_be_dai_trigger() -> snd_soc_dpcm_can_be_free_stop() -> if (dpcm->fe == fe) Excpetion Scenario: two FE link to same BE FE1 -> BE FE2 -> Thread 1: switch of mixer between FE2 -> BE Thread 2: pcm_stop FE1 Exception: Unable to handle kernel paging request at virtual address dead0000000000e0 pc=<> [] dpcm_be_dai_trigger+0x29c/0x47c sound/soc/soc-pcm.c:3226 if (dpcm->fe == fe) lr=<> [] dpcm_fe_dai_do_trigger+0x94/0x26c Backtrace: [] notify_die+0x68/0xb8 [] die+0x118/0x2a8 [] __do_kernel_fault+0x13c/0x14c [] do_translation_fault+0x64/0xa0 [] do_mem_abort+0x4c/0xd0 [] el1_da+0x24/0x40 [] dpcm_be_dai_trigger+0x29c/0x47c [] dpcm_fe_dai_do_trigger+0x94/0x26c [] dpcm_fe_dai_trigger+0x3c/0x44 [] snd_pcm_do_stop+0x50/0x5c [] snd_pcm_action+0xb4/0x13c [] snd_pcm_drop+0xa0/0x128 [] snd_pcm_common_ioctl+0x9d8/0x30f0 [] snd_pcm_ioctl_compat+0x29c/0x2f14 [] compat_SyS_ioctl+0x128/0x244 [] el0_svc_naked+0x34/0x38 [] 0xffffffffffffffff Signed-off-by: KaiChieh Chuang Signed-off-by: Mark Brown --- include/sound/soc.h | 2 ++ sound/soc/soc-core.c | 1 + sound/soc/soc-pcm.c | 40 +++++++++++++++++++++++++++++++++------- 3 files changed, 36 insertions(+), 7 deletions(-) diff --git a/include/sound/soc.h b/include/sound/soc.h index eb7db605955b..1e2be35ed36f 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -1083,6 +1083,8 @@ struct snd_soc_card { struct mutex mutex; struct mutex dapm_mutex; + spinlock_t dpcm_lock; + bool instantiated; bool topology_shortname_created; diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 5a5764dba147..d88757659729 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -2820,6 +2820,7 @@ int snd_soc_register_card(struct snd_soc_card *card) card->instantiated = 0; mutex_init(&card->mutex); mutex_init(&card->dapm_mutex); + spin_lock_init(&card->dpcm_lock); return snd_soc_bind_card(card); } diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index e70555a4ea06..90504c149931 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -1216,6 +1216,7 @@ static int dpcm_be_connect(struct snd_soc_pcm_runtime *fe, struct snd_soc_pcm_runtime *be, int stream) { struct snd_soc_dpcm *dpcm; + unsigned long flags; /* only add new dpcms */ for_each_dpcm_be(fe, stream, dpcm) { @@ -1231,8 +1232,10 @@ static int dpcm_be_connect(struct snd_soc_pcm_runtime *fe, dpcm->fe = fe; be->dpcm[stream].runtime = fe->dpcm[stream].runtime; dpcm->state = SND_SOC_DPCM_LINK_STATE_NEW; + spin_lock_irqsave(&fe->card->dpcm_lock, flags); list_add(&dpcm->list_be, &fe->dpcm[stream].be_clients); list_add(&dpcm->list_fe, &be->dpcm[stream].fe_clients); + spin_unlock_irqrestore(&fe->card->dpcm_lock, flags); dev_dbg(fe->dev, "connected new DPCM %s path %s %s %s\n", stream ? "capture" : "playback", fe->dai_link->name, @@ -1278,6 +1281,7 @@ static void dpcm_be_reparent(struct snd_soc_pcm_runtime *fe, void dpcm_be_disconnect(struct snd_soc_pcm_runtime *fe, int stream) { struct snd_soc_dpcm *dpcm, *d; + unsigned long flags; for_each_dpcm_be_safe(fe, stream, dpcm, d) { dev_dbg(fe->dev, "ASoC: BE %s disconnect check for %s\n", @@ -1297,8 +1301,10 @@ void dpcm_be_disconnect(struct snd_soc_pcm_runtime *fe, int stream) #ifdef CONFIG_DEBUG_FS debugfs_remove(dpcm->debugfs_state); #endif + spin_lock_irqsave(&fe->card->dpcm_lock, flags); list_del(&dpcm->list_be); list_del(&dpcm->list_fe); + spin_unlock_irqrestore(&fe->card->dpcm_lock, flags); kfree(dpcm); } } @@ -1550,10 +1556,13 @@ int dpcm_process_paths(struct snd_soc_pcm_runtime *fe, void dpcm_clear_pending_state(struct snd_soc_pcm_runtime *fe, int stream) { struct snd_soc_dpcm *dpcm; + unsigned long flags; + spin_lock_irqsave(&fe->card->dpcm_lock, flags); for_each_dpcm_be(fe, stream, dpcm) dpcm->be->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + spin_unlock_irqrestore(&fe->card->dpcm_lock, flags); } static void dpcm_be_dai_startup_unwind(struct snd_soc_pcm_runtime *fe, @@ -2574,6 +2583,7 @@ static int dpcm_run_update_startup(struct snd_soc_pcm_runtime *fe, int stream) struct snd_soc_dpcm *dpcm; enum snd_soc_dpcm_trigger trigger = fe->dai_link->trigger[stream]; int ret; + unsigned long flags; dev_dbg(fe->dev, "ASoC: runtime %s open on FE %s\n", stream ? "capture" : "playback", fe->dai_link->name); @@ -2643,11 +2653,13 @@ close: dpcm_be_dai_shutdown(fe, stream); disconnect: /* disconnect any non started BEs */ + spin_lock_irqsave(&fe->card->dpcm_lock, flags); for_each_dpcm_be(fe, stream, dpcm) { struct snd_soc_pcm_runtime *be = dpcm->be; if (be->dpcm[stream].state != SND_SOC_DPCM_STATE_START) dpcm->state = SND_SOC_DPCM_LINK_STATE_FREE; } + spin_unlock_irqrestore(&fe->card->dpcm_lock, flags); return ret; } @@ -3223,7 +3235,10 @@ int snd_soc_dpcm_can_be_free_stop(struct snd_soc_pcm_runtime *fe, { struct snd_soc_dpcm *dpcm; int state; + int ret = 1; + unsigned long flags; + spin_lock_irqsave(&fe->card->dpcm_lock, flags); for_each_dpcm_fe(be, stream, dpcm) { if (dpcm->fe == fe) @@ -3232,12 +3247,15 @@ int snd_soc_dpcm_can_be_free_stop(struct snd_soc_pcm_runtime *fe, state = dpcm->fe->dpcm[stream].state; if (state == SND_SOC_DPCM_STATE_START || state == SND_SOC_DPCM_STATE_PAUSED || - state == SND_SOC_DPCM_STATE_SUSPEND) - return 0; + state == SND_SOC_DPCM_STATE_SUSPEND) { + ret = 0; + break; + } } + spin_unlock_irqrestore(&fe->card->dpcm_lock, flags); /* it's safe to free/stop this BE DAI */ - return 1; + return ret; } EXPORT_SYMBOL_GPL(snd_soc_dpcm_can_be_free_stop); @@ -3250,7 +3268,10 @@ int snd_soc_dpcm_can_be_params(struct snd_soc_pcm_runtime *fe, { struct snd_soc_dpcm *dpcm; int state; + int ret = 1; + unsigned long flags; + spin_lock_irqsave(&fe->card->dpcm_lock, flags); for_each_dpcm_fe(be, stream, dpcm) { if (dpcm->fe == fe) @@ -3260,12 +3281,15 @@ int snd_soc_dpcm_can_be_params(struct snd_soc_pcm_runtime *fe, if (state == SND_SOC_DPCM_STATE_START || state == SND_SOC_DPCM_STATE_PAUSED || state == SND_SOC_DPCM_STATE_SUSPEND || - state == SND_SOC_DPCM_STATE_PREPARE) - return 0; + state == SND_SOC_DPCM_STATE_PREPARE) { + ret = 0; + break; + } } + spin_unlock_irqrestore(&fe->card->dpcm_lock, flags); /* it's safe to change hw_params */ - return 1; + return ret; } EXPORT_SYMBOL_GPL(snd_soc_dpcm_can_be_params); @@ -3304,6 +3328,7 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe, struct snd_pcm_hw_params *params = &fe->dpcm[stream].hw_params; struct snd_soc_dpcm *dpcm; ssize_t offset = 0; + unsigned long flags; /* FE state */ offset += snprintf(buf + offset, size - offset, @@ -3331,6 +3356,7 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe, goto out; } + spin_lock_irqsave(&fe->card->dpcm_lock, flags); for_each_dpcm_be(fe, stream, dpcm) { struct snd_soc_pcm_runtime *be = dpcm->be; params = &dpcm->hw_params; @@ -3351,7 +3377,7 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe, params_channels(params), params_rate(params)); } - + spin_unlock_irqrestore(&fe->card->dpcm_lock, flags); out: return offset; } -- cgit v1.2.3 From 844a4a362dbec166b44d6b9b3dd45b08cb273703 Mon Sep 17 00:00:00 2001 From: John Hsu Date: Mon, 11 Mar 2019 09:36:45 +0800 Subject: ASoC: nau8824: fix the issue of the widget with prefix name The driver has two issues when machine add prefix name for codec. (1)The stream name of DAI can't find the AIF widgets. (2)The drivr can enable/disalbe the MICBIAS and SAR widgets. The patch will fix these issues caused by prefixed name added. Signed-off-by: John Hsu Signed-off-by: Mark Brown --- sound/soc/codecs/nau8824.c | 46 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/sound/soc/codecs/nau8824.c b/sound/soc/codecs/nau8824.c index 87ed3dc496dc..5ab05e75edea 100644 --- a/sound/soc/codecs/nau8824.c +++ b/sound/soc/codecs/nau8824.c @@ -681,8 +681,8 @@ static const struct snd_soc_dapm_widget nau8824_dapm_widgets[] = { SND_SOC_DAPM_ADC("ADCR", NULL, NAU8824_REG_ANALOG_ADC_2, NAU8824_ADCR_EN_SFT, 0), - SND_SOC_DAPM_AIF_OUT("AIFTX", "HiFi Capture", 0, SND_SOC_NOPM, 0, 0), - SND_SOC_DAPM_AIF_IN("AIFRX", "HiFi Playback", 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_OUT("AIFTX", "Capture", 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_IN("AIFRX", "Playback", 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_DAC("DACL", NULL, NAU8824_REG_RDAC, NAU8824_DACL_EN_SFT, 0), @@ -831,6 +831,36 @@ static void nau8824_int_status_clear_all(struct regmap *regmap) } } +static void nau8824_dapm_disable_pin(struct nau8824 *nau8824, const char *pin) +{ + struct snd_soc_dapm_context *dapm = nau8824->dapm; + const char *prefix = dapm->component->name_prefix; + char prefixed_pin[80]; + + if (prefix) { + snprintf(prefixed_pin, sizeof(prefixed_pin), "%s %s", + prefix, pin); + snd_soc_dapm_disable_pin(dapm, prefixed_pin); + } else { + snd_soc_dapm_disable_pin(dapm, pin); + } +} + +static void nau8824_dapm_enable_pin(struct nau8824 *nau8824, const char *pin) +{ + struct snd_soc_dapm_context *dapm = nau8824->dapm; + const char *prefix = dapm->component->name_prefix; + char prefixed_pin[80]; + + if (prefix) { + snprintf(prefixed_pin, sizeof(prefixed_pin), "%s %s", + prefix, pin); + snd_soc_dapm_force_enable_pin(dapm, prefixed_pin); + } else { + snd_soc_dapm_force_enable_pin(dapm, pin); + } +} + static void nau8824_eject_jack(struct nau8824 *nau8824) { struct snd_soc_dapm_context *dapm = nau8824->dapm; @@ -839,8 +869,8 @@ static void nau8824_eject_jack(struct nau8824 *nau8824) /* Clear all interruption status */ nau8824_int_status_clear_all(regmap); - snd_soc_dapm_disable_pin(dapm, "SAR"); - snd_soc_dapm_disable_pin(dapm, "MICBIAS"); + nau8824_dapm_disable_pin(nau8824, "SAR"); + nau8824_dapm_disable_pin(nau8824, "MICBIAS"); snd_soc_dapm_sync(dapm); /* Enable the insertion interruption, disable the ejection @@ -870,8 +900,8 @@ static void nau8824_jdet_work(struct work_struct *work) struct regmap *regmap = nau8824->regmap; int adc_value, event = 0, event_mask = 0; - snd_soc_dapm_force_enable_pin(dapm, "MICBIAS"); - snd_soc_dapm_force_enable_pin(dapm, "SAR"); + nau8824_dapm_enable_pin(nau8824, "MICBIAS"); + nau8824_dapm_enable_pin(nau8824, "SAR"); snd_soc_dapm_sync(dapm); msleep(100); @@ -882,8 +912,8 @@ static void nau8824_jdet_work(struct work_struct *work) if (adc_value < HEADSET_SARADC_THD) { event |= SND_JACK_HEADPHONE; - snd_soc_dapm_disable_pin(dapm, "SAR"); - snd_soc_dapm_disable_pin(dapm, "MICBIAS"); + nau8824_dapm_disable_pin(nau8824, "SAR"); + nau8824_dapm_disable_pin(nau8824, "MICBIAS"); snd_soc_dapm_sync(dapm); } else { event |= SND_JACK_HEADSET; -- cgit v1.2.3 From a39fe6e2061615496c12825d6d249fedf1974f8a Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Mon, 11 Mar 2019 16:39:28 +0100 Subject: ASoC: stm32: i2s: fix registers declaration in regmap - Declare SR as volatile, as it is changed by hardware. - Remove TXDR from readable and volatile register list, as it is intended for write accesses only. Signed-off-by: Olivier Moysan Signed-off-by: Mark Brown --- sound/soc/stm/stm32_i2s.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/soc/stm/stm32_i2s.c b/sound/soc/stm/stm32_i2s.c index 47c334de6b09..8968458eec62 100644 --- a/sound/soc/stm/stm32_i2s.c +++ b/sound/soc/stm/stm32_i2s.c @@ -281,7 +281,6 @@ static bool stm32_i2s_readable_reg(struct device *dev, unsigned int reg) case STM32_I2S_CFG2_REG: case STM32_I2S_IER_REG: case STM32_I2S_SR_REG: - case STM32_I2S_TXDR_REG: case STM32_I2S_RXDR_REG: case STM32_I2S_CGFR_REG: return true; @@ -293,7 +292,7 @@ static bool stm32_i2s_readable_reg(struct device *dev, unsigned int reg) static bool stm32_i2s_volatile_reg(struct device *dev, unsigned int reg) { switch (reg) { - case STM32_I2S_TXDR_REG: + case STM32_I2S_SR_REG: case STM32_I2S_RXDR_REG: return true; default: -- cgit v1.2.3 From ba164a49f8f7390b036713bf8a70a150a938c670 Mon Sep 17 00:00:00 2001 From: Jiada Wang Date: Thu, 7 Mar 2019 15:15:53 +0900 Subject: ASoC: rsnd: src: Avoid a potential deadlock lockdep warns us that priv->lock and k->k_lock can cause a deadlock when after acquire of k->k_lock, process is interrupted by src, while in another routine of src .init, k->k_lock is acquired with priv->lock held. This patch avoids a potential deadlock by not calling soc_device_match() in SRC .init callback, instead it adds new soc fields in priv->flags to differentiate SoCs. Fixes: linux-next commit 7674bec4fc09 ("ASoC: rsnd: update BSDSR/BSDISR handling") Signed-off-by: Jiada Wang Acked-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/sh/rcar/core.c | 2 ++ sound/soc/sh/rcar/rsnd.h | 5 +++++ sound/soc/sh/rcar/src.c | 9 +-------- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c index 9834474684b1..8d3758f862f1 100644 --- a/sound/soc/sh/rcar/core.c +++ b/sound/soc/sh/rcar/core.c @@ -110,6 +110,8 @@ static const struct of_device_id rsnd_of_match[] = { { .compatible = "renesas,rcar_sound-gen1", .data = (void *)RSND_GEN1 }, { .compatible = "renesas,rcar_sound-gen2", .data = (void *)RSND_GEN2 }, { .compatible = "renesas,rcar_sound-gen3", .data = (void *)RSND_GEN3 }, + /* Special Handling */ + { .compatible = "renesas,rcar_sound-r8a77990", .data = (void *)(RSND_GEN3 | RSND_SOC_E) }, {}, }; MODULE_DEVICE_TABLE(of, rsnd_of_match); diff --git a/sound/soc/sh/rcar/rsnd.h b/sound/soc/sh/rcar/rsnd.h index 90625c57847b..0e6ef4e18400 100644 --- a/sound/soc/sh/rcar/rsnd.h +++ b/sound/soc/sh/rcar/rsnd.h @@ -607,6 +607,8 @@ struct rsnd_priv { #define RSND_GEN1 (1 << 0) #define RSND_GEN2 (2 << 0) #define RSND_GEN3 (3 << 0) +#define RSND_SOC_MASK (0xFF << 4) +#define RSND_SOC_E (1 << 4) /* E1/E2/E3 */ /* * below value will be filled on rsnd_gen_probe() @@ -679,6 +681,9 @@ struct rsnd_priv { #define rsnd_is_gen1(priv) (((priv)->flags & RSND_GEN_MASK) == RSND_GEN1) #define rsnd_is_gen2(priv) (((priv)->flags & RSND_GEN_MASK) == RSND_GEN2) #define rsnd_is_gen3(priv) (((priv)->flags & RSND_GEN_MASK) == RSND_GEN3) +#define rsnd_is_e3(priv) (((priv)->flags & \ + (RSND_GEN_MASK | RSND_SOC_MASK)) == \ + (RSND_GEN3 | RSND_SOC_E)) #define rsnd_flags_has(p, f) ((p)->flags & (f)) #define rsnd_flags_set(p, f) ((p)->flags |= (f)) diff --git a/sound/soc/sh/rcar/src.c b/sound/soc/sh/rcar/src.c index db81e066b92e..45096a66c074 100644 --- a/sound/soc/sh/rcar/src.c +++ b/sound/soc/sh/rcar/src.c @@ -14,7 +14,6 @@ */ #include "rsnd.h" -#include #define SRC_NAME "src" @@ -189,18 +188,12 @@ const static u32 chan222222[] = { 0x00000006, /* 1 to 2 */ }; -static const struct soc_device_attribute ov_soc[] = { - { .soc_id = "r8a77990" }, /* E3 */ - { /* sentinel */ } -}; - static void rsnd_src_set_convert_rate(struct rsnd_dai_stream *io, struct rsnd_mod *mod) { struct rsnd_priv *priv = rsnd_mod_to_priv(mod); struct device *dev = rsnd_priv_to_dev(priv); struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); - const struct soc_device_attribute *soc = soc_device_match(ov_soc); int is_play = rsnd_io_is_play(io); int use_src = 0; u32 fin, fout; @@ -307,7 +300,7 @@ static void rsnd_src_set_convert_rate(struct rsnd_dai_stream *io, /* * E3 need to overwrite */ - if (soc) + if (rsnd_is_e3(priv)) switch (rsnd_mod_id(mod)) { case 0: case 4: -- cgit v1.2.3 From 399706df420ea8bc8b31283a919e6475f737d0ea Mon Sep 17 00:00:00 2001 From: Jiada Wang Date: Thu, 7 Mar 2019 15:15:54 +0900 Subject: ASoC: rsnd: src: fix compiler warnings compiler complains about following declarations sound/soc/sh/rcar/src.c:174:1: warning: 'static' is not at beginning of declaration [-Wold-style-declaration] const static u32 bsdsr_table_pattern1[] = { ^~~~~ sound/soc/sh/rcar/src.c:183:1: warning: 'static' is not at beginning of declaration [-Wold-style-declaration] const static u32 bsdsr_table_pattern2[] = { ^~~~~ sound/soc/sh/rcar/src.c:192:1: warning: 'static' is not at beginning of declaration [-Wold-style-declaration] const static u32 bsisr_table[] = { ^~~~~ sound/soc/sh/rcar/src.c:201:1: warning: 'static' is not at beginning of declaration [-Wold-style-declaration] const static u32 chan288888[] = { ^~~~~ sound/soc/sh/rcar/src.c:210:1: warning: 'static' is not at beginning of declaration [-Wold-style-declaration] const static u32 chan244888[] = { ^~~~~ sound/soc/sh/rcar/src.c:219:1: warning: 'static' is not at beginning of declaration [-Wold-style-declaration] const static u32 chan222222[] = { ^~~~~ This patch moves the 'static' keyword to the front of the declaration to fix the compiler warnings Fixes: linux-next commit 7674bec4fc09 ("ASoC: rsnd: update BSDSR/BSDISR handling") Signed-off-by: Jiada Wang Acked-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/sh/rcar/src.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/soc/sh/rcar/src.c b/sound/soc/sh/rcar/src.c index 45096a66c074..585ffba0244b 100644 --- a/sound/soc/sh/rcar/src.c +++ b/sound/soc/sh/rcar/src.c @@ -134,7 +134,7 @@ unsigned int rsnd_src_get_rate(struct rsnd_priv *priv, return rate; } -const static u32 bsdsr_table_pattern1[] = { +static const u32 bsdsr_table_pattern1[] = { 0x01800000, /* 6 - 1/6 */ 0x01000000, /* 6 - 1/4 */ 0x00c00000, /* 6 - 1/3 */ @@ -143,7 +143,7 @@ const static u32 bsdsr_table_pattern1[] = { 0x00400000, /* 6 - 1 */ }; -const static u32 bsdsr_table_pattern2[] = { +static const u32 bsdsr_table_pattern2[] = { 0x02400000, /* 6 - 1/6 */ 0x01800000, /* 6 - 1/4 */ 0x01200000, /* 6 - 1/3 */ @@ -152,7 +152,7 @@ const static u32 bsdsr_table_pattern2[] = { 0x00600000, /* 6 - 1 */ }; -const static u32 bsisr_table[] = { +static const u32 bsisr_table[] = { 0x00100060, /* 6 - 1/6 */ 0x00100040, /* 6 - 1/4 */ 0x00100030, /* 6 - 1/3 */ @@ -161,7 +161,7 @@ const static u32 bsisr_table[] = { 0x00100020, /* 6 - 1 */ }; -const static u32 chan288888[] = { +static const u32 chan288888[] = { 0x00000006, /* 1 to 2 */ 0x000001fe, /* 1 to 8 */ 0x000001fe, /* 1 to 8 */ @@ -170,7 +170,7 @@ const static u32 chan288888[] = { 0x000001fe, /* 1 to 8 */ }; -const static u32 chan244888[] = { +static const u32 chan244888[] = { 0x00000006, /* 1 to 2 */ 0x0000001e, /* 1 to 4 */ 0x0000001e, /* 1 to 4 */ @@ -179,7 +179,7 @@ const static u32 chan244888[] = { 0x000001fe, /* 1 to 8 */ }; -const static u32 chan222222[] = { +static const u32 chan222222[] = { 0x00000006, /* 1 to 2 */ 0x00000006, /* 1 to 2 */ 0x00000006, /* 1 to 2 */ -- cgit v1.2.3 From 54d1cf78b0f4ba348a7c7fb8b7d0708d71b6cc8a Mon Sep 17 00:00:00 2001 From: John Hsu Date: Wed, 13 Mar 2019 16:23:44 +0800 Subject: ASoC: nau8810: fix the issue of widget with prefixed name The driver changes the stream name of DAC and ADC to avoid the issue of widget with prefixed name. When the machine adds prefixed name for codec, the stream name of DAI may not find the widgets. Signed-off-by: John Hsu Signed-off-by: Mark Brown --- sound/soc/codecs/nau8810.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/nau8810.c b/sound/soc/codecs/nau8810.c index bfd74b86c9d2..645aa0794123 100644 --- a/sound/soc/codecs/nau8810.c +++ b/sound/soc/codecs/nau8810.c @@ -411,9 +411,9 @@ static const struct snd_soc_dapm_widget nau8810_dapm_widgets[] = { SND_SOC_DAPM_MIXER("Mono Mixer", NAU8810_REG_POWER3, NAU8810_MOUTMX_EN_SFT, 0, &nau8810_mono_mixer_controls[0], ARRAY_SIZE(nau8810_mono_mixer_controls)), - SND_SOC_DAPM_DAC("DAC", "HiFi Playback", NAU8810_REG_POWER3, + SND_SOC_DAPM_DAC("DAC", "Playback", NAU8810_REG_POWER3, NAU8810_DAC_EN_SFT, 0), - SND_SOC_DAPM_ADC("ADC", "HiFi Capture", NAU8810_REG_POWER2, + SND_SOC_DAPM_ADC("ADC", "Capture", NAU8810_REG_POWER2, NAU8810_ADC_EN_SFT, 0), SND_SOC_DAPM_PGA("SpkN Out", NAU8810_REG_POWER3, NAU8810_NSPK_EN_SFT, 0, NULL, 0), -- cgit v1.2.3 From 2b13bee3884926cba22061efa75bd315e871de24 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Tue, 12 Mar 2019 18:40:06 +0100 Subject: ASoC: samsung: odroid: Fix clock configuration for 44100 sample rate After commit fbeec965b8d1c ("ASoC: samsung: odroid: Fix 32000 sample rate handling") the audio root clock frequency is configured improperly for 44100 sample rate. Due to clock rate rounding it's 20070401 Hz instead of 22579000 Hz. This results in a too low value of the PSR clock divider in the CPU DAI driver and too fast actual sample rate for fs=44100. E.g. 1 kHz tone has actual 1780 Hz frequency (1 kHz * 20070401/22579000 * 2). Fix this by increasing the correction passed to clk_set_rate() to take into account inaccuracy of the EPLL frequency properly. Fixes: fbeec965b8d1c ("ASoC: samsung: odroid: Fix 32000 sample rate handling") Reported-by: JaeChul Lee Signed-off-by: Sylwester Nawrocki Signed-off-by: Mark Brown --- sound/soc/samsung/odroid.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/samsung/odroid.c b/sound/soc/samsung/odroid.c index 694512f980fd..1dc54c4206f0 100644 --- a/sound/soc/samsung/odroid.c +++ b/sound/soc/samsung/odroid.c @@ -91,11 +91,11 @@ static int odroid_card_be_hw_params(struct snd_pcm_substream *substream, return ret; /* - * We add 1 to the rclk_freq value in order to avoid too low clock + * We add 2 to the rclk_freq value in order to avoid too low clock * frequency values due to the EPLL output frequency not being exact * multiple of the audio sampling rate. */ - rclk_freq = params_rate(params) * rfs + 1; + rclk_freq = params_rate(params) * rfs + 2; ret = clk_set_rate(priv->sclk_i2s, rclk_freq); if (ret < 0) -- cgit v1.2.3 From 11cf9d863dcb583345723b0ed72173348761e9c0 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sat, 9 Mar 2019 17:37:21 +0530 Subject: fs/dax: Deposit pagetable even when installing zero page Architectures like ppc64 use the deposited page table to store hardware page table slot information. Make sure we deposit a page table when using zero page at the pmd level for hash. Without this we hit Unable to handle kernel paging request for data at address 0x00000000 Faulting instruction address: 0xc000000000082a74 Oops: Kernel access of bad area, sig: 11 [#1] .... NIP [c000000000082a74] __hash_page_thp+0x224/0x5b0 LR [c0000000000829a4] __hash_page_thp+0x154/0x5b0 Call Trace: hash_page_mm+0x43c/0x740 do_hash_page+0x2c/0x3c copy_from_iter_flushcache+0xa4/0x4a0 pmem_copy_from_iter+0x2c/0x50 [nd_pmem] dax_copy_from_iter+0x40/0x70 dax_iomap_actor+0x134/0x360 iomap_apply+0xfc/0x1b0 dax_iomap_rw+0xac/0x130 ext4_file_write_iter+0x254/0x460 [ext4] __vfs_write+0x120/0x1e0 vfs_write+0xd8/0x220 SyS_write+0x6c/0x110 system_call+0x3c/0x130 Fixes: b5beae5e224f ("powerpc/pseries: Add driver for PAPR SCM regions") Cc: Reviewed-by: Jan Kara Signed-off-by: Aneesh Kumar K.V Signed-off-by: Dan Williams --- fs/dax.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/fs/dax.c b/fs/dax.c index ca0671d55aa6..e5e54da1715f 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "internal.h" #define CREATE_TRACE_POINTS @@ -1407,7 +1408,9 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, { struct address_space *mapping = vmf->vma->vm_file->f_mapping; unsigned long pmd_addr = vmf->address & PMD_MASK; + struct vm_area_struct *vma = vmf->vma; struct inode *inode = mapping->host; + pgtable_t pgtable = NULL; struct page *zero_page; spinlock_t *ptl; pmd_t pmd_entry; @@ -1422,12 +1425,22 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn, DAX_PMD | DAX_ZERO_PAGE, false); + if (arch_needs_pgtable_deposit()) { + pgtable = pte_alloc_one(vma->vm_mm); + if (!pgtable) + return VM_FAULT_OOM; + } + ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd); if (!pmd_none(*(vmf->pmd))) { spin_unlock(ptl); goto fallback; } + if (pgtable) { + pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable); + mm_inc_nr_ptes(vma->vm_mm); + } pmd_entry = mk_pmd(zero_page, vmf->vma->vm_page_prot); pmd_entry = pmd_mkhuge(pmd_entry); set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry); @@ -1436,6 +1449,8 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, return VM_FAULT_NOPAGE; fallback: + if (pgtable) + pte_free(vma->vm_mm, pgtable); trace_dax_pmd_load_hole_fallback(inode, vmf, zero_page, *entry); return VM_FAULT_FALLBACK; } -- cgit v1.2.3 From 6ee02a54ef990a71bf542b6f0a4e3321de9d9c66 Mon Sep 17 00:00:00 2001 From: Su Yanjun Date: Thu, 14 Mar 2019 14:59:42 +0800 Subject: xfrm6_tunnel: Fix potential panic when unloading xfrm6_tunnel module When unloading xfrm6_tunnel module, xfrm6_tunnel_fini directly frees the xfrm6_tunnel_spi_kmem. Maybe someone has gotten the xfrm6_tunnel_spi, so need to wait it. Fixes: 91cc3bb0b04ff("xfrm6_tunnel: RCU conversion") Signed-off-by: Su Yanjun Acked-by: Herbert Xu Signed-off-by: Steffen Klassert --- net/ipv6/xfrm6_tunnel.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index bc65db782bfb..12cb3aa990af 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -402,6 +402,10 @@ static void __exit xfrm6_tunnel_fini(void) xfrm6_tunnel_deregister(&xfrm6_tunnel_handler, AF_INET6); xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6); unregister_pernet_subsys(&xfrm6_tunnel_net_ops); + /* Someone maybe has gotten the xfrm6_tunnel_spi. + * So need to wait it. + */ + rcu_barrier(); kmem_cache_destroy(xfrm6_tunnel_spi_kmem); } -- cgit v1.2.3 From 06003531502d06bc89d32528f6ec96bf978790f9 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dagenais Date: Wed, 6 Mar 2019 15:56:06 -0500 Subject: iio: dac: mcp4725: add missing powerdown bits in store eeprom When issuing the write DAC register and write eeprom command, the two powerdown bits (PD0 and PD1) are assumed by the chip to be present in the bytes sent. Leaving them at 0 implies "powerdown disabled" which is a different state that the current one. By adding the current state of the powerdown in the i2c write, the chip will correctly power-on exactly like as it is at the moment of store_eeprom call. This is documented in MCP4725's datasheet, FIGURE 6-2: "Write Commands for DAC Input Register and EEPROM" and MCP4726's datasheet, FIGURE 6-3: "Write All Memory Command". Signed-off-by: Jean-Francois Dagenais Acked-by: Peter Meerwald-Stadler Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/dac/mcp4725.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/dac/mcp4725.c b/drivers/iio/dac/mcp4725.c index 6d71fd905e29..c701a45469f6 100644 --- a/drivers/iio/dac/mcp4725.c +++ b/drivers/iio/dac/mcp4725.c @@ -92,6 +92,7 @@ static ssize_t mcp4725_store_eeprom(struct device *dev, inoutbuf[0] = 0x60; /* write EEPROM */ inoutbuf[0] |= data->ref_mode << 3; + inoutbuf[0] |= data->powerdown ? ((data->powerdown_mode + 1) << 1) : 0; inoutbuf[1] = data->dac_value >> 4; inoutbuf[2] = (data->dac_value & 0xf) << 4; -- cgit v1.2.3 From 62039b6aef63380ba7a37c113bbaeee8a55c5342 Mon Sep 17 00:00:00 2001 From: Sven Van Asbroeck Date: Sun, 10 Mar 2019 14:58:24 -0400 Subject: iio: adc: xilinx: fix potential use-after-free on remove When cancel_delayed_work() returns, the delayed work may still be running. This means that the core could potentially free the private structure (struct xadc) while the delayed work is still using it. This is a potential use-after-free. Fix by calling cancel_delayed_work_sync(), which waits for any residual work to finish before returning. Signed-off-by: Sven Van Asbroeck Signed-off-by: Jonathan Cameron --- drivers/iio/adc/xilinx-xadc-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/xilinx-xadc-core.c b/drivers/iio/adc/xilinx-xadc-core.c index b13c61539d46..ef3afaeed194 100644 --- a/drivers/iio/adc/xilinx-xadc-core.c +++ b/drivers/iio/adc/xilinx-xadc-core.c @@ -1322,7 +1322,7 @@ static int xadc_remove(struct platform_device *pdev) } free_irq(xadc->irq, indio_dev); clk_disable_unprepare(xadc->clk); - cancel_delayed_work(&xadc->zynq_unmask_work); + cancel_delayed_work_sync(&xadc->zynq_unmask_work); kfree(xadc->data); kfree(indio_dev->channels); -- cgit v1.2.3 From 862e4644fd2d7df8998edc65e0963ea2f567bde9 Mon Sep 17 00:00:00 2001 From: Sven Van Asbroeck Date: Sun, 10 Mar 2019 14:58:25 -0400 Subject: iio: adc: xilinx: fix potential use-after-free on probe If probe errors out after request_irq(), its error path does not explicitly cancel the delayed work, which may have been scheduled by the interrupt handler. This means the delayed work may still be running when the core frees the private structure (struct xadc). This is a potential use-after-free. Fix by inserting cancel_delayed_work_sync() in the probe error path. Signed-off-by: Sven Van Asbroeck Signed-off-by: Jonathan Cameron --- drivers/iio/adc/xilinx-xadc-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/adc/xilinx-xadc-core.c b/drivers/iio/adc/xilinx-xadc-core.c index ef3afaeed194..5a1b63f9d041 100644 --- a/drivers/iio/adc/xilinx-xadc-core.c +++ b/drivers/iio/adc/xilinx-xadc-core.c @@ -1292,6 +1292,7 @@ static int xadc_probe(struct platform_device *pdev) err_free_irq: free_irq(xadc->irq, indio_dev); + cancel_delayed_work_sync(&xadc->zynq_unmask_work); err_clk_disable_unprepare: clk_disable_unprepare(xadc->clk); err_free_samplerate_trigger: -- cgit v1.2.3 From 2e4b88f73966adead360e47621df0183586fac32 Mon Sep 17 00:00:00 2001 From: Sven Van Asbroeck Date: Sun, 10 Mar 2019 14:58:26 -0400 Subject: iio: adc: xilinx: prevent touching unclocked h/w on remove In remove, the clock is disabled before canceling the delayed work. This means that the delayed work may be touching unclocked hardware. Fix by disabling the clock after the delayed work is fully canceled. This is consistent with the probe error path order. Signed-off-by: Sven Van Asbroeck Signed-off-by: Jonathan Cameron --- drivers/iio/adc/xilinx-xadc-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/xilinx-xadc-core.c b/drivers/iio/adc/xilinx-xadc-core.c index 5a1b63f9d041..6401ca7a9a20 100644 --- a/drivers/iio/adc/xilinx-xadc-core.c +++ b/drivers/iio/adc/xilinx-xadc-core.c @@ -1322,8 +1322,8 @@ static int xadc_remove(struct platform_device *pdev) iio_triggered_buffer_cleanup(indio_dev); } free_irq(xadc->irq, indio_dev); - clk_disable_unprepare(xadc->clk); cancel_delayed_work_sync(&xadc->zynq_unmask_work); + clk_disable_unprepare(xadc->clk); kfree(xadc->data); kfree(indio_dev->channels); -- cgit v1.2.3 From 3d02d7082e5823598090530c3988a35f69689943 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Wed, 13 Mar 2019 12:40:02 +0100 Subject: iio: cros_ec: Fix the maths for gyro scale calculation Calculation did not use IIO_DEGREE_TO_RAD and implemented a variant to avoid precision loss as we aim a nano value. The offset added to avoid rounding error, though, doesn't give us a close result to the expected value. E.g. For 1000dps, the result should be: (1000 * pi ) / 180 >> 15 ~= 0.000532632218 But with current calculation we get $ cat scale 0.000547890 Fix the calculation by just doing the maths involved for a nano value val * pi * 10e12 / (180 * 2^15) so we get a closer result. $ cat scale 0.000532632 Fixes: c14dca07a31d ("iio: cros_ec_sensors: add ChromeOS EC Contiguous Sensors driver") Signed-off-by: Gwendal Grignou Signed-off-by: Enric Balletbo i Serra Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c index 89cb0066a6e0..8d76afb87d87 100644 --- a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c +++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c @@ -103,9 +103,10 @@ static int cros_ec_sensors_read(struct iio_dev *indio_dev, * Do not use IIO_DEGREE_TO_RAD to avoid precision * loss. Round to the nearest integer. */ - *val = div_s64(val64 * 314159 + 9000000ULL, 1000); - *val2 = 18000 << (CROS_EC_SENSOR_BITS - 1); - ret = IIO_VAL_FRACTIONAL; + *val = 0; + *val2 = div_s64(val64 * 3141592653ULL, + 180 << (CROS_EC_SENSOR_BITS - 1)); + ret = IIO_VAL_INT_PLUS_NANO; break; case MOTIONSENSE_TYPE_MAG: /* -- cgit v1.2.3 From f6a7bf2ccf22db3e1b936ebc03898f9c025c051e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 14 Mar 2019 10:00:52 +0100 Subject: iio: pms7003: select IIO_TRIGGERED_BUFFER Without IIO_TRIGGERED_BUFFER, this driver fails to link: drivers/iio/chemical/pms7003.o: In function `pms7003_probe': pms7003.c:(.text+0x21c): undefined reference to `devm_iio_triggered_buffer_setup' pms7003.c:(.text+0x21c): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `devm_iio_triggered_buffer_setup' Fixes: a1d642266c14 ("iio: chemical: add support for Plantower PMS7003 sensor") Signed-off-by: Arnd Bergmann Acked-by: Tomasz Duszynski Signed-off-by: Jonathan Cameron --- drivers/iio/chemical/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/chemical/Kconfig b/drivers/iio/chemical/Kconfig index 5248e180b442..92c684d2b67e 100644 --- a/drivers/iio/chemical/Kconfig +++ b/drivers/iio/chemical/Kconfig @@ -64,6 +64,7 @@ config IAQCORE config PMS7003 tristate "Plantower PMS7003 particulate matter sensor" depends on SERIAL_DEV_BUS + select IIO_TRIGGERED_BUFFER help Say Y here to build support for the Plantower PMS7003 particulate matter sensor. -- cgit v1.2.3 From 924726888f660b2a86382a5dd051ec9ca1b18190 Mon Sep 17 00:00:00 2001 From: "Leonidas P. Papadakos" Date: Fri, 1 Mar 2019 00:29:23 +0200 Subject: arm64: dts: rockchip: fix rk3328-roc-cc gmac2io tx/rx_delay The rk3328-roc-cc board exhibits tx stability issues with large packets, as does the rock64 board, which was fixed with this patch https://patchwork.kernel.org/patch/10178969/ A similar patch was merged for the rk3328-roc-cc here https://patchwork.kernel.org/patch/10804863/ but it doesn't include the tx/rx_delay tweaks, and I find that they help with an issue where large transfers would bring the ethernet link down, causing a link reset regularly. Signed-off-by: Leonidas P. Papadakos Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts index 33c44e857247..0e34354b2092 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts @@ -108,8 +108,8 @@ snps,reset-gpio = <&gpio1 RK_PC2 GPIO_ACTIVE_LOW>; snps,reset-active-low; snps,reset-delays-us = <0 10000 50000>; - tx_delay = <0x25>; - rx_delay = <0x11>; + tx_delay = <0x24>; + rx_delay = <0x18>; status = "okay"; }; -- cgit v1.2.3 From eb523a4960b6d61bfda1229907ea58841f0340ae Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Mon, 18 Feb 2019 15:59:26 -0300 Subject: arm64: dts: rockchip: add DDC bus on Rock Pi 4 A DDC I2C bus specifier is required for DDC EDID probing to work properly. Fixes: 1b5715c602fda ("arm64: dts: rockchip: add ROCK Pi 4 DTS support") Signed-off-by: Ezequiel Garcia Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts index 4a543f2117d4..844eac939a97 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts @@ -158,6 +158,7 @@ }; &hdmi { + ddc-i2c-bus = <&i2c3>; pinctrl-names = "default"; pinctrl-0 = <&hdmi_cec>; status = "okay"; -- cgit v1.2.3 From 6b2fde3dbfab6ebc45b0cd605e17ca5057ff9a3b Mon Sep 17 00:00:00 2001 From: Jonas Karlman Date: Sun, 24 Feb 2019 21:51:22 +0000 Subject: ARM: dts: rockchip: fix rk3288 cpu opp node reference The following error can be seen during boot: of: /cpus/cpu@501: Couldn't find opp node Change cpu nodes to use operating-points-v2 in order to fix this. Fixes: ce76de984649 ("ARM: dts: rockchip: convert rk3288 to operating-points-v2") Cc: stable@vger.kernel.org Signed-off-by: Jonas Karlman Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rk3288.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index ca7d52daa8fb..09868dcee34b 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -70,7 +70,7 @@ compatible = "arm,cortex-a12"; reg = <0x501>; resets = <&cru SRST_CORE1>; - operating-points = <&cpu_opp_table>; + operating-points-v2 = <&cpu_opp_table>; #cooling-cells = <2>; /* min followed by max */ clock-latency = <40000>; clocks = <&cru ARMCLK>; @@ -80,7 +80,7 @@ compatible = "arm,cortex-a12"; reg = <0x502>; resets = <&cru SRST_CORE2>; - operating-points = <&cpu_opp_table>; + operating-points-v2 = <&cpu_opp_table>; #cooling-cells = <2>; /* min followed by max */ clock-latency = <40000>; clocks = <&cru ARMCLK>; @@ -90,7 +90,7 @@ compatible = "arm,cortex-a12"; reg = <0x503>; resets = <&cru SRST_CORE3>; - operating-points = <&cpu_opp_table>; + operating-points-v2 = <&cpu_opp_table>; #cooling-cells = <2>; /* min followed by max */ clock-latency = <40000>; clocks = <&cru ARMCLK>; -- cgit v1.2.3 From a8772e5d826d0f61f8aa9c284b3ab49035d5273d Mon Sep 17 00:00:00 2001 From: Tomohiro Mayama Date: Sun, 10 Mar 2019 01:10:12 +0900 Subject: arm64: dts: rockchip: Fix vcc_host1_5v GPIO polarity on rk3328-rock64 This patch makes USB ports functioning again. Fixes: 955bebde057e ("arm64: dts: rockchip: add rk3328-rock64 board") Cc: stable@vger.kernel.org Suggested-by: Robin Murphy Signed-off-by: Tomohiro Mayama Tested-by: Katsuhiro Suzuki Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3328-rock64.dts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts index 2157a528276b..79b4d1d4b5d6 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts @@ -46,8 +46,7 @@ vcc_host1_5v: vcc_otg_5v: vcc-host1-5v-regulator { compatible = "regulator-fixed"; - enable-active-high; - gpio = <&gpio0 RK_PA2 GPIO_ACTIVE_HIGH>; + gpio = <&gpio0 RK_PA2 GPIO_ACTIVE_LOW>; pinctrl-names = "default"; pinctrl-0 = <&usb20_host_drv>; regulator-name = "vcc_host1_5v"; -- cgit v1.2.3 From 8dbc4d5ddb59f49cb3e85bccf42a4720b27a6576 Mon Sep 17 00:00:00 2001 From: David Summers Date: Sat, 9 Mar 2019 15:39:21 +0000 Subject: ARM: dts: rockchip: Fix SD card detection on rk3288-tinker The Problem: On ASUS Tinker Board S, when booting from the eMMC, and there is card in the sd slot, there are constant errors. Also when warm reboot, uboot can not access the sd slot Cause: Identified by Robin Murphy @ ARM. The Card Detect on rk3288 devices is pulled up by vccio-sd; so when the regulator powers this off, card detect gives spurious errors. A second problem, is during power down, vccio-sd apprears to be powered down. This causes a problem when warm rebooting from the sd card. This was identified by Jonas Karlman. History: A common fault on these rk3288 board, which impliment the reference design. When this arose before: http://lists.infradead.org/pipermail/linux-arm-kernel/2014-August/281153.html And Ulf and Jaehoon clearly said this was a broken card detect design, which should be solved via polling Solution: Hence broken-cd is set as a property. This cures the errors. The powering down of vccio-sd during reboot is cured by adding regulator-boot-on. This solutions has been fairly widely reviewed and tested. Fixes: e58c5e739d6f ("ARM: dts: rockchip: move shared tinker-board nodes to a common dtsi") Cc: stable@vger.kernel.org [Heiko: slightly inaccurate fixes but tinker is a sbc (aka like a Pi) where we can hopefully expect people not to rely on overly old stable kernels] Signed-off-by: David Summers Reviewed-by: Jonas Karlman Tested-by: Jonas Karlman Reviewed-by: Robin Murphy Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rk3288-tinker.dtsi | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/rk3288-tinker.dtsi b/arch/arm/boot/dts/rk3288-tinker.dtsi index aa107ee41b8b..ef653c3209bc 100644 --- a/arch/arm/boot/dts/rk3288-tinker.dtsi +++ b/arch/arm/boot/dts/rk3288-tinker.dtsi @@ -254,6 +254,7 @@ }; vccio_sd: LDO_REG5 { + regulator-boot-on; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <3300000>; regulator-name = "vccio_sd"; @@ -430,7 +431,7 @@ bus-width = <4>; cap-mmc-highspeed; cap-sd-highspeed; - card-detect-delay = <200>; + broken-cd; disable-wp; /* wp not hooked up */ pinctrl-names = "default"; pinctrl-0 = <&sdmmc_clk &sdmmc_cmd &sdmmc_cd &sdmmc_bus4>; -- cgit v1.2.3 From 6fd8b9780ec1a49ac46e0aaf8775247205e66231 Mon Sep 17 00:00:00 2001 From: Peter Geis Date: Wed, 13 Mar 2019 18:45:36 +0000 Subject: arm64: dts: rockchip: fix rk3328 rgmii high tx error rate Several rk3328 based boards experience high rgmii tx error rates. This is due to several pins in the rk3328.dtsi rgmii pinmux that are missing a defined pull strength setting. This causes the pinmux driver to default to 2ma (bit mask 00). These pins are only defined in the rk3328.dtsi, and are not listed in the rk3328 specification. The TRM only lists them as "Reserved" (RK3328 TRM V1.1, 3.3.3 Detail Register Description, GRF_GPIO0B_IOMUX, GRF_GPIO0C_IOMUX, GRF_GPIO0D_IOMUX). However, removal of these pins from the rgmii pinmux definition causes the interface to fail to transmit. Also, the rgmii tx and rx pins defined in the dtsi are not consistent with the rk3328 specification, with tx pins currently set to 12ma and rx pins set to 2ma. Fix this by setting tx pins to 8ma and the rx pins to 4ma, consistent with the specification. Defining the drive strength for the undefined pins eliminated the high tx packet error rate observed under heavy data transfers. Aligning the drive strength to the TRM values eliminated the occasional packet retry errors under iperf3 testing. This allows much higher data rates with no recorded tx errors. Tested on the rk3328-roc-cc board. Fixes: 52e02d377a72 ("arm64: dts: rockchip: add core dtsi file for RK3328 SoCs") Cc: stable@vger.kernel.org Signed-off-by: Peter Geis Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3328.dtsi | 44 ++++++++++++++++---------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index 84f14b132e8f..c55a3f1a87ff 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -1642,50 +1642,50 @@ rgmiim1_pins: rgmiim1-pins { rockchip,pins = /* mac_txclk */ - <1 RK_PB4 2 &pcfg_pull_none_12ma>, + <1 RK_PB4 2 &pcfg_pull_none_8ma>, /* mac_rxclk */ - <1 RK_PB5 2 &pcfg_pull_none_2ma>, + <1 RK_PB5 2 &pcfg_pull_none_4ma>, /* mac_mdio */ - <1 RK_PC3 2 &pcfg_pull_none_2ma>, + <1 RK_PC3 2 &pcfg_pull_none_4ma>, /* mac_txen */ - <1 RK_PD1 2 &pcfg_pull_none_12ma>, + <1 RK_PD1 2 &pcfg_pull_none_8ma>, /* mac_clk */ - <1 RK_PC5 2 &pcfg_pull_none_2ma>, + <1 RK_PC5 2 &pcfg_pull_none_4ma>, /* mac_rxdv */ - <1 RK_PC6 2 &pcfg_pull_none_2ma>, + <1 RK_PC6 2 &pcfg_pull_none_4ma>, /* mac_mdc */ - <1 RK_PC7 2 &pcfg_pull_none_2ma>, + <1 RK_PC7 2 &pcfg_pull_none_4ma>, /* mac_rxd1 */ - <1 RK_PB2 2 &pcfg_pull_none_2ma>, + <1 RK_PB2 2 &pcfg_pull_none_4ma>, /* mac_rxd0 */ - <1 RK_PB3 2 &pcfg_pull_none_2ma>, + <1 RK_PB3 2 &pcfg_pull_none_4ma>, /* mac_txd1 */ - <1 RK_PB0 2 &pcfg_pull_none_12ma>, + <1 RK_PB0 2 &pcfg_pull_none_8ma>, /* mac_txd0 */ - <1 RK_PB1 2 &pcfg_pull_none_12ma>, + <1 RK_PB1 2 &pcfg_pull_none_8ma>, /* mac_rxd3 */ - <1 RK_PB6 2 &pcfg_pull_none_2ma>, + <1 RK_PB6 2 &pcfg_pull_none_4ma>, /* mac_rxd2 */ - <1 RK_PB7 2 &pcfg_pull_none_2ma>, + <1 RK_PB7 2 &pcfg_pull_none_4ma>, /* mac_txd3 */ - <1 RK_PC0 2 &pcfg_pull_none_12ma>, + <1 RK_PC0 2 &pcfg_pull_none_8ma>, /* mac_txd2 */ - <1 RK_PC1 2 &pcfg_pull_none_12ma>, + <1 RK_PC1 2 &pcfg_pull_none_8ma>, /* mac_txclk */ - <0 RK_PB0 1 &pcfg_pull_none>, + <0 RK_PB0 1 &pcfg_pull_none_8ma>, /* mac_txen */ - <0 RK_PB4 1 &pcfg_pull_none>, + <0 RK_PB4 1 &pcfg_pull_none_8ma>, /* mac_clk */ - <0 RK_PD0 1 &pcfg_pull_none>, + <0 RK_PD0 1 &pcfg_pull_none_4ma>, /* mac_txd1 */ - <0 RK_PC0 1 &pcfg_pull_none>, + <0 RK_PC0 1 &pcfg_pull_none_8ma>, /* mac_txd0 */ - <0 RK_PC1 1 &pcfg_pull_none>, + <0 RK_PC1 1 &pcfg_pull_none_8ma>, /* mac_txd3 */ - <0 RK_PC7 1 &pcfg_pull_none>, + <0 RK_PC7 1 &pcfg_pull_none_8ma>, /* mac_txd2 */ - <0 RK_PC6 1 &pcfg_pull_none>; + <0 RK_PC6 1 &pcfg_pull_none_8ma>; }; rmiim1_pins: rmiim1-pins { -- cgit v1.2.3 From 09f91381fa5de1d44bc323d8bf345f5d57b3d9b5 Mon Sep 17 00:00:00 2001 From: Peter Geis Date: Wed, 13 Mar 2019 19:02:30 +0000 Subject: arm64: dts: rockchip: fix rk3328 sdmmc0 write errors Various rk3328 based boards experience occasional sdmmc0 write errors. This is due to the rk3328.dtsi tx drive levels being set to 4ma, vs 8ma per the rk3328 datasheet default settings. Fix this by setting the tx signal pins to 8ma. Inspiration from tonymac32's patch, https://github.com/ayufan-rock64/linux-kernel/commit/dc1212b347e0da17c5460bcc0a56b07d02bac3f8 Fixes issues on the rk3328-roc-cc and the rk3328-rock64 (as per the above commit message). Tested on the rk3328-roc-cc board. Fixes: 52e02d377a72 ("arm64: dts: rockchip: add core dtsi file for RK3328 SoCs") Cc: stable@vger.kernel.org Signed-off-by: Peter Geis Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3328.dtsi | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index c55a3f1a87ff..dabef1a21649 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -1445,11 +1445,11 @@ sdmmc0 { sdmmc0_clk: sdmmc0-clk { - rockchip,pins = <1 RK_PA6 1 &pcfg_pull_none_4ma>; + rockchip,pins = <1 RK_PA6 1 &pcfg_pull_none_8ma>; }; sdmmc0_cmd: sdmmc0-cmd { - rockchip,pins = <1 RK_PA4 1 &pcfg_pull_up_4ma>; + rockchip,pins = <1 RK_PA4 1 &pcfg_pull_up_8ma>; }; sdmmc0_dectn: sdmmc0-dectn { @@ -1461,14 +1461,14 @@ }; sdmmc0_bus1: sdmmc0-bus1 { - rockchip,pins = <1 RK_PA0 1 &pcfg_pull_up_4ma>; + rockchip,pins = <1 RK_PA0 1 &pcfg_pull_up_8ma>; }; sdmmc0_bus4: sdmmc0-bus4 { - rockchip,pins = <1 RK_PA0 1 &pcfg_pull_up_4ma>, - <1 RK_PA1 1 &pcfg_pull_up_4ma>, - <1 RK_PA2 1 &pcfg_pull_up_4ma>, - <1 RK_PA3 1 &pcfg_pull_up_4ma>; + rockchip,pins = <1 RK_PA0 1 &pcfg_pull_up_8ma>, + <1 RK_PA1 1 &pcfg_pull_up_8ma>, + <1 RK_PA2 1 &pcfg_pull_up_8ma>, + <1 RK_PA3 1 &pcfg_pull_up_8ma>; }; sdmmc0_gpio: sdmmc0-gpio { -- cgit v1.2.3 From 6b538cc21334b83f09b25dec4aa2d2726bf07ed0 Mon Sep 17 00:00:00 2001 From: Rodrigo Rivas Costa Date: Fri, 15 Mar 2019 20:09:10 +0100 Subject: HID: steam: fix deadlock with input devices. When using this driver with the wireless dongle and some usermode program that monitors every input device (acpid, for example), while another usermode client opens and closes the low-level device repeadedly, the system eventually deadlocks. The reason is that steam_input_register_device() must not be called with the mutex held, because the input subsystem has its own synchronization that clashes with this one: it is possible that steam_input_open() is called before input_register_device() returns, and since steam_input_open() needs to lock the mutex, it deadlocks. However we must hold the mutex when calling any function that sends commands to the controller. If not, random commands end up falling fail. Reported-by: Simon Gene Gottlieb Signed-off-by: Rodrigo Rivas Costa Tested-by: Simon Gene Gottlieb Signed-off-by: Jiri Kosina --- drivers/hid/hid-steam.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c index 8141cadfca0e..8dae0f9b819e 100644 --- a/drivers/hid/hid-steam.c +++ b/drivers/hid/hid-steam.c @@ -499,6 +499,7 @@ static void steam_battery_unregister(struct steam_device *steam) static int steam_register(struct steam_device *steam) { int ret; + bool client_opened; /* * This function can be called several times in a row with the @@ -511,9 +512,11 @@ static int steam_register(struct steam_device *steam) * Unlikely, but getting the serial could fail, and it is not so * important, so make up a serial number and go on. */ + mutex_lock(&steam->mutex); if (steam_get_serial(steam) < 0) strlcpy(steam->serial_no, "XXXXXXXXXX", sizeof(steam->serial_no)); + mutex_unlock(&steam->mutex); hid_info(steam->hdev, "Steam Controller '%s' connected", steam->serial_no); @@ -528,13 +531,15 @@ static int steam_register(struct steam_device *steam) } mutex_lock(&steam->mutex); - if (!steam->client_opened) { + client_opened = steam->client_opened; + if (!client_opened) steam_set_lizard_mode(steam, lizard_mode); + mutex_unlock(&steam->mutex); + + if (!client_opened) ret = steam_input_register(steam); - } else { + else ret = 0; - } - mutex_unlock(&steam->mutex); return ret; } @@ -630,14 +635,21 @@ static void steam_client_ll_close(struct hid_device *hdev) { struct steam_device *steam = hdev->driver_data; + unsigned long flags; + bool connected; + + spin_lock_irqsave(&steam->lock, flags); + connected = steam->connected; + spin_unlock_irqrestore(&steam->lock, flags); + mutex_lock(&steam->mutex); steam->client_opened = false; + if (connected) + steam_set_lizard_mode(steam, lizard_mode); mutex_unlock(&steam->mutex); - if (steam->connected) { - steam_set_lizard_mode(steam, lizard_mode); + if (connected) steam_input_register(steam); - } } static int steam_client_ll_raw_request(struct hid_device *hdev, -- cgit v1.2.3 From 94a9992f7dbdfb28976b565af220e0c4a117144a Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 8 Mar 2019 13:11:17 +0800 Subject: HID: Increase maximum report size allowed by hid_field_extract() Commit 71f6fa90a353 ("HID: increase maximum global item tag report size to 256") increases the max report size from 128 to 256. We also need to update the report size in hid_field_extract() otherwise it complains and truncates now valid report size: [ 406.165461] hid-sensor-hub 001F:8086:22D8.0002: hid_field_extract() called with n (192) > 32! (kworker/5:1) BugLink: https://bugs.launchpad.net/bugs/1818547 Fixes: 71f6fa90a353 ("HID: increase maximum global item tag report size to 256") Signed-off-by: Kai-Heng Feng Signed-off-by: Jiri Kosina --- drivers/hid/hid-core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 9993b692598f..860e21ec6a49 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1301,10 +1301,10 @@ static u32 __extract(u8 *report, unsigned offset, int n) u32 hid_field_extract(const struct hid_device *hid, u8 *report, unsigned offset, unsigned n) { - if (n > 32) { - hid_warn(hid, "hid_field_extract() called with n (%d) > 32! (%s)\n", + if (n > 256) { + hid_warn(hid, "hid_field_extract() called with n (%d) > 256! (%s)\n", n, current->comm); - n = 32; + n = 256; } return __extract(report, offset, n); -- cgit v1.2.3 From 9729e3b65a64dfe210972223624d8152ba502e98 Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Thu, 7 Mar 2019 10:35:58 +0800 Subject: ASoC: mediatek: mt8183: skip for i2s5 in mck_disable Skip for i2s5 in mck_disable which is also bypassed in mck_enable. Signed-off-by: Tzung-Bi Shih Signed-off-by: Mark Brown --- sound/soc/mediatek/mt8183/mt8183-afe-clk.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/soc/mediatek/mt8183/mt8183-afe-clk.c b/sound/soc/mediatek/mt8183/mt8183-afe-clk.c index f523ad103acc..48e81c5d52fc 100644 --- a/sound/soc/mediatek/mt8183/mt8183-afe-clk.c +++ b/sound/soc/mediatek/mt8183/mt8183-afe-clk.c @@ -605,6 +605,10 @@ void mt8183_mck_disable(struct mtk_base_afe *afe, int mck_id) int m_sel_id = mck_div[mck_id].m_sel_id; int div_clk_id = mck_div[mck_id].div_clk_id; + /* i2s5 mck not support */ + if (mck_id == MT8183_I2S5_MCK) + return; + clk_disable_unprepare(afe_priv->clk[div_clk_id]); if (m_sel_id >= 0) clk_disable_unprepare(afe_priv->clk[m_sel_id]); -- cgit v1.2.3 From 4834d7070c85a5fb69637265dbbb05d13043280c Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Fri, 8 Mar 2019 11:36:08 +0800 Subject: ASoC: rt5682: Check JD status when system resume The IRQ function may not work when system suspend. We remove snd_soc_dapm_force_enable_pin function call to make sure the bias off when idle and run into suspend/resume function. Signed-off-by: Shuming Fan Signed-off-by: Mark Brown --- sound/soc/codecs/rt5682.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index 9d5acd2d04ab..b7e2e9937e6d 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -910,13 +910,20 @@ static int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert) { struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component); - struct snd_soc_dapm_context *dapm = - snd_soc_component_get_dapm(component); unsigned int val, count; if (jack_insert) { - snd_soc_dapm_force_enable_pin(dapm, "CBJ Power"); - snd_soc_dapm_sync(dapm); + + snd_soc_component_update_bits(component, RT5682_PWR_ANLG_1, + RT5682_PWR_VREF2, RT5682_PWR_VREF2); + snd_soc_component_update_bits(component, + RT5682_PWR_ANLG_1, RT5682_PWR_FV2, 0); + usleep_range(15000, 20000); + snd_soc_component_update_bits(component, + RT5682_PWR_ANLG_1, RT5682_PWR_FV2, RT5682_PWR_FV2); + snd_soc_component_update_bits(component, RT5682_PWR_ANLG_3, + RT5682_PWR_CBJ, RT5682_PWR_CBJ); + snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1, RT5682_TRIG_JD_MASK, RT5682_TRIG_JD_HIGH); @@ -944,8 +951,10 @@ static int rt5682_headset_detect(struct snd_soc_component *component, rt5682_enable_push_button_irq(component, false); snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1, RT5682_TRIG_JD_MASK, RT5682_TRIG_JD_LOW); - snd_soc_dapm_disable_pin(dapm, "CBJ Power"); - snd_soc_dapm_sync(dapm); + snd_soc_component_update_bits(component, RT5682_PWR_ANLG_1, + RT5682_PWR_VREF2, 0); + snd_soc_component_update_bits(component, RT5682_PWR_ANLG_3, + RT5682_PWR_CBJ, 0); rt5682->jack_type = 0; } @@ -1591,8 +1600,6 @@ static const struct snd_soc_dapm_widget rt5682_dapm_widgets[] = { 0, NULL, 0), SND_SOC_DAPM_SUPPLY("Vref1", RT5682_PWR_ANLG_1, RT5682_PWR_VREF1_BIT, 0, rt5655_set_verf, SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU), - SND_SOC_DAPM_SUPPLY("Vref2", RT5682_PWR_ANLG_1, RT5682_PWR_VREF2_BIT, 0, - rt5655_set_verf, SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU), /* ASRC */ SND_SOC_DAPM_SUPPLY_S("DAC STO1 ASRC", 1, RT5682_PLL_TRACK_1, @@ -1627,9 +1634,6 @@ static const struct snd_soc_dapm_widget rt5682_dapm_widgets[] = { SND_SOC_DAPM_PGA("BST1 CBJ", SND_SOC_NOPM, 0, 0, NULL, 0), - SND_SOC_DAPM_SUPPLY("CBJ Power", RT5682_PWR_ANLG_3, - RT5682_PWR_CBJ_BIT, 0, NULL, 0), - /* REC Mixer */ SND_SOC_DAPM_MIXER("RECMIX1L", SND_SOC_NOPM, 0, 0, rt5682_rec1_l_mix, ARRAY_SIZE(rt5682_rec1_l_mix)), @@ -1792,17 +1796,13 @@ static const struct snd_soc_dapm_route rt5682_dapm_routes[] = { /*Vref*/ {"MICBIAS1", NULL, "Vref1"}, - {"MICBIAS1", NULL, "Vref2"}, {"MICBIAS2", NULL, "Vref1"}, - {"MICBIAS2", NULL, "Vref2"}, {"CLKDET SYS", NULL, "CLKDET"}, {"IN1P", NULL, "LDO2"}, {"BST1 CBJ", NULL, "IN1P"}, - {"BST1 CBJ", NULL, "CBJ Power"}, - {"CBJ Power", NULL, "Vref2"}, {"RECMIX1L", "CBJ Switch", "BST1 CBJ"}, {"RECMIX1L", NULL, "RECMIX1L Power"}, @@ -1912,9 +1912,7 @@ static const struct snd_soc_dapm_route rt5682_dapm_routes[] = { {"HP Amp", NULL, "Capless"}, {"HP Amp", NULL, "Charge Pump"}, {"HP Amp", NULL, "CLKDET SYS"}, - {"HP Amp", NULL, "CBJ Power"}, {"HP Amp", NULL, "Vref1"}, - {"HP Amp", NULL, "Vref2"}, {"HPOL Playback", "Switch", "HP Amp"}, {"HPOR Playback", "Switch", "HP Amp"}, {"HPOL", NULL, "HPOL Playback"}, @@ -2363,6 +2361,8 @@ static int rt5682_resume(struct snd_soc_component *component) regcache_cache_only(rt5682->regmap, false); regcache_sync(rt5682->regmap); + rt5682_irq(0, rt5682); + return 0; } #else -- cgit v1.2.3 From 675212bfb23394514b7f68ebf3954ba936281ccc Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Mon, 18 Mar 2019 15:17:13 +0800 Subject: ASoC: rt5682: fix jack type detection issue The jack type detection needs the main bias power of analog. The modification makes sure the main bias power on/off while jack plug/unplug. Signed-off-by: Shuming Fan Signed-off-by: Mark Brown --- sound/soc/codecs/rt5682.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index b7e2e9937e6d..b98974edf6ad 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -915,7 +915,8 @@ static int rt5682_headset_detect(struct snd_soc_component *component, if (jack_insert) { snd_soc_component_update_bits(component, RT5682_PWR_ANLG_1, - RT5682_PWR_VREF2, RT5682_PWR_VREF2); + RT5682_PWR_VREF2 | RT5682_PWR_MB, + RT5682_PWR_VREF2 | RT5682_PWR_MB); snd_soc_component_update_bits(component, RT5682_PWR_ANLG_1, RT5682_PWR_FV2, 0); usleep_range(15000, 20000); @@ -952,7 +953,7 @@ static int rt5682_headset_detect(struct snd_soc_component *component, snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1, RT5682_TRIG_JD_MASK, RT5682_TRIG_JD_LOW); snd_soc_component_update_bits(component, RT5682_PWR_ANLG_1, - RT5682_PWR_VREF2, 0); + RT5682_PWR_VREF2 | RT5682_PWR_MB, 0); snd_soc_component_update_bits(component, RT5682_PWR_ANLG_3, RT5682_PWR_CBJ, 0); @@ -2301,16 +2302,13 @@ static int rt5682_set_bias_level(struct snd_soc_component *component, switch (level) { case SND_SOC_BIAS_PREPARE: regmap_update_bits(rt5682->regmap, RT5682_PWR_ANLG_1, - RT5682_PWR_MB | RT5682_PWR_BG, - RT5682_PWR_MB | RT5682_PWR_BG); + RT5682_PWR_BG, RT5682_PWR_BG); regmap_update_bits(rt5682->regmap, RT5682_PWR_DIG_1, RT5682_DIG_GATE_CTRL | RT5682_PWR_LDO, RT5682_DIG_GATE_CTRL | RT5682_PWR_LDO); break; case SND_SOC_BIAS_STANDBY: - regmap_update_bits(rt5682->regmap, RT5682_PWR_ANLG_1, - RT5682_PWR_MB, RT5682_PWR_MB); regmap_update_bits(rt5682->regmap, RT5682_PWR_DIG_1, RT5682_DIG_GATE_CTRL, RT5682_DIG_GATE_CTRL); break; @@ -2318,7 +2316,7 @@ static int rt5682_set_bias_level(struct snd_soc_component *component, regmap_update_bits(rt5682->regmap, RT5682_PWR_DIG_1, RT5682_DIG_GATE_CTRL | RT5682_PWR_LDO, 0); regmap_update_bits(rt5682->regmap, RT5682_PWR_ANLG_1, - RT5682_PWR_MB | RT5682_PWR_BG, 0); + RT5682_PWR_BG, 0); break; default: -- cgit v1.2.3 From 1c5b6a27e432e4fe170a924c8b41012271496a4c Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Mon, 18 Mar 2019 15:17:42 +0800 Subject: ASoC: rt5682: recording has no sound after booting If ASRC turns on, HW will use clk_dac as the reference clock whether recording or playback. Both of clk_dac and clk_adc should set proper clock while using ASRC. Signed-off-by: Shuming Fan Signed-off-by: Mark Brown --- sound/soc/codecs/rt5682.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index b98974edf6ad..86a7fa31c294 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -1208,7 +1208,7 @@ static int set_filter_clk(struct snd_soc_dapm_widget *w, struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component); - int ref, val, reg, sft, mask, idx = -EINVAL; + int ref, val, reg, idx = -EINVAL; static const int div_f[] = {1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48}; static const int div_o[] = {1, 2, 4, 6, 8, 12, 16, 24, 32, 48}; @@ -1222,15 +1222,10 @@ static int set_filter_clk(struct snd_soc_dapm_widget *w, idx = rt5682_div_sel(rt5682, ref, div_f, ARRAY_SIZE(div_f)); - if (w->shift == RT5682_PWR_ADC_S1F_BIT) { + if (w->shift == RT5682_PWR_ADC_S1F_BIT) reg = RT5682_PLL_TRACK_3; - sft = RT5682_ADC_OSR_SFT; - mask = RT5682_ADC_OSR_MASK; - } else { + else reg = RT5682_PLL_TRACK_2; - sft = RT5682_DAC_OSR_SFT; - mask = RT5682_DAC_OSR_MASK; - } snd_soc_component_update_bits(component, reg, RT5682_FILTER_CLK_DIV_MASK, idx << RT5682_FILTER_CLK_DIV_SFT); @@ -1242,7 +1237,8 @@ static int set_filter_clk(struct snd_soc_dapm_widget *w, } snd_soc_component_update_bits(component, RT5682_ADDA_CLK_1, - mask, idx << sft); + RT5682_ADC_OSR_MASK | RT5682_DAC_OSR_MASK, + (idx << RT5682_ADC_OSR_SFT) | (idx << RT5682_DAC_OSR_SFT)); return 0; } -- cgit v1.2.3 From 00206a69ee32f03e6f40837684dcbe475ea02266 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Mon, 18 Mar 2019 02:32:36 +0100 Subject: percpu: stop printing kernel addresses Since commit ad67b74d2469d9b8 ("printk: hash addresses printed with %p"), at boot "____ptrval____" is printed instead of actual addresses: percpu: Embedded 38 pages/cpu @(____ptrval____) s124376 r0 d31272 u524288 Instead of changing the print to "%px", and leaking kernel addresses, just remove the print completely, cfr. e.g. commit 071929dbdd865f77 ("arm64: Stop printing the virtual memory layout"). Signed-off-by: Matteo Croce Signed-off-by: Dennis Zhou --- mm/percpu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/percpu.c b/mm/percpu.c index 2e6fc8d552c9..68dd2e7e73b5 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -2567,8 +2567,8 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, ai->groups[group].base_offset = areas[group] - base; } - pr_info("Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n", - PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size, + pr_info("Embedded %zu pages/cpu s%zu r%zu d%zu u%zu\n", + PFN_DOWN(size_sum), ai->static_size, ai->reserved_size, ai->dyn_size, ai->unit_size); rc = pcpu_setup_first_chunk(ai, base); @@ -2692,8 +2692,8 @@ int __init pcpu_page_first_chunk(size_t reserved_size, } /* we're ready, commit */ - pr_info("%d %s pages/cpu @%p s%zu r%zu d%zu\n", - unit_pages, psize_str, vm.addr, ai->static_size, + pr_info("%d %s pages/cpu s%zu r%zu d%zu\n", + unit_pages, psize_str, ai->static_size, ai->reserved_size, ai->dyn_size); rc = pcpu_setup_first_chunk(ai, vm.addr); -- cgit v1.2.3 From 45b06682113b102bdf38678311da93a689b0b78d Mon Sep 17 00:00:00 2001 From: Matthias Wieloch Date: Mon, 18 Mar 2019 11:50:45 +0100 Subject: clk: at91: fix programmable clock for sama5d2 The prescaler formula of the programmable clock has changed for sama5d2. Update the driver accordingly. Fixes: a2038077de9a ("clk: at91: add sama5d2 PMC driver") Cc: # v4.20+ Signed-off-by: Nicolas Ferre [nicolas.ferre@microchip.com: adapt the prescaler range, fix clk_programmable_recalc_rate, split patch] Signed-off-by: Matthias Wieloch Signed-off-by: Alexandre Belloni Signed-off-by: Stephen Boyd --- drivers/clk/at91/clk-programmable.c | 57 ++++++++++++++++++++++++++++--------- drivers/clk/at91/pmc.h | 2 ++ drivers/clk/at91/sama5d2.c | 10 ++++++- 3 files changed, 54 insertions(+), 15 deletions(-) diff --git a/drivers/clk/at91/clk-programmable.c b/drivers/clk/at91/clk-programmable.c index 89d6f3736dbf..f8edbb65eda3 100644 --- a/drivers/clk/at91/clk-programmable.c +++ b/drivers/clk/at91/clk-programmable.c @@ -20,8 +20,7 @@ #define PROG_ID_MAX 7 #define PROG_STATUS_MASK(id) (1 << ((id) + 8)) -#define PROG_PRES_MASK 0x7 -#define PROG_PRES(layout, pckr) ((pckr >> layout->pres_shift) & PROG_PRES_MASK) +#define PROG_PRES(layout, pckr) ((pckr >> layout->pres_shift) & layout->pres_mask) #define PROG_MAX_RM9200_CSS 3 struct clk_programmable { @@ -37,20 +36,29 @@ static unsigned long clk_programmable_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) { struct clk_programmable *prog = to_clk_programmable(hw); + const struct clk_programmable_layout *layout = prog->layout; unsigned int pckr; + unsigned long rate; regmap_read(prog->regmap, AT91_PMC_PCKR(prog->id), &pckr); - return parent_rate >> PROG_PRES(prog->layout, pckr); + if (layout->is_pres_direct) + rate = parent_rate / (PROG_PRES(layout, pckr) + 1); + else + rate = parent_rate >> PROG_PRES(layout, pckr); + + return rate; } static int clk_programmable_determine_rate(struct clk_hw *hw, struct clk_rate_request *req) { + struct clk_programmable *prog = to_clk_programmable(hw); + const struct clk_programmable_layout *layout = prog->layout; struct clk_hw *parent; long best_rate = -EINVAL; unsigned long parent_rate; - unsigned long tmp_rate; + unsigned long tmp_rate = 0; int shift; int i; @@ -60,10 +68,18 @@ static int clk_programmable_determine_rate(struct clk_hw *hw, continue; parent_rate = clk_hw_get_rate(parent); - for (shift = 0; shift < PROG_PRES_MASK; shift++) { - tmp_rate = parent_rate >> shift; - if (tmp_rate <= req->rate) - break; + if (layout->is_pres_direct) { + for (shift = 0; shift <= layout->pres_mask; shift++) { + tmp_rate = parent_rate / (shift + 1); + if (tmp_rate <= req->rate) + break; + } + } else { + for (shift = 0; shift < layout->pres_mask; shift++) { + tmp_rate = parent_rate >> shift; + if (tmp_rate <= req->rate) + break; + } } if (tmp_rate > req->rate) @@ -137,16 +153,23 @@ static int clk_programmable_set_rate(struct clk_hw *hw, unsigned long rate, if (!div) return -EINVAL; - shift = fls(div) - 1; + if (layout->is_pres_direct) { + shift = div - 1; - if (div != (1 << shift)) - return -EINVAL; + if (shift > layout->pres_mask) + return -EINVAL; + } else { + shift = fls(div) - 1; - if (shift >= PROG_PRES_MASK) - return -EINVAL; + if (div != (1 << shift)) + return -EINVAL; + + if (shift >= layout->pres_mask) + return -EINVAL; + } regmap_update_bits(prog->regmap, AT91_PMC_PCKR(prog->id), - PROG_PRES_MASK << layout->pres_shift, + layout->pres_mask << layout->pres_shift, shift << layout->pres_shift); return 0; @@ -202,19 +225,25 @@ at91_clk_register_programmable(struct regmap *regmap, } const struct clk_programmable_layout at91rm9200_programmable_layout = { + .pres_mask = 0x7, .pres_shift = 2, .css_mask = 0x3, .have_slck_mck = 0, + .is_pres_direct = 0, }; const struct clk_programmable_layout at91sam9g45_programmable_layout = { + .pres_mask = 0x7, .pres_shift = 2, .css_mask = 0x3, .have_slck_mck = 1, + .is_pres_direct = 0, }; const struct clk_programmable_layout at91sam9x5_programmable_layout = { + .pres_mask = 0x7, .pres_shift = 4, .css_mask = 0x7, .have_slck_mck = 0, + .is_pres_direct = 0, }; diff --git a/drivers/clk/at91/pmc.h b/drivers/clk/at91/pmc.h index 672a79bda88c..a0e5ce9c9b9e 100644 --- a/drivers/clk/at91/pmc.h +++ b/drivers/clk/at91/pmc.h @@ -71,9 +71,11 @@ struct clk_pll_characteristics { }; struct clk_programmable_layout { + u8 pres_mask; u8 pres_shift; u8 css_mask; u8 have_slck_mck; + u8 is_pres_direct; }; extern const struct clk_programmable_layout at91rm9200_programmable_layout; diff --git a/drivers/clk/at91/sama5d2.c b/drivers/clk/at91/sama5d2.c index 1f70cb164b06..81943fac4537 100644 --- a/drivers/clk/at91/sama5d2.c +++ b/drivers/clk/at91/sama5d2.c @@ -125,6 +125,14 @@ static const struct { .pll = true }, }; +static const struct clk_programmable_layout sama5d2_programmable_layout = { + .pres_mask = 0xff, + .pres_shift = 4, + .css_mask = 0x7, + .have_slck_mck = 0, + .is_pres_direct = 1, +}; + static void __init sama5d2_pmc_setup(struct device_node *np) { struct clk_range range = CLK_RANGE(0, 0); @@ -249,7 +257,7 @@ static void __init sama5d2_pmc_setup(struct device_node *np) hw = at91_clk_register_programmable(regmap, name, parent_names, 6, i, - &at91sam9x5_programmable_layout); + &sama5d2_programmable_layout); if (IS_ERR(hw)) goto err_free; } -- cgit v1.2.3 From 3df64d7b0a4f70f1797f23cfd4cca5c4d48131fe Mon Sep 17 00:00:00 2001 From: CK Hu Date: Mon, 14 Jan 2019 17:36:48 +0800 Subject: drm/mediatek: Implement gem prime vmap/vunmap function For some application which need kernel virtual address, such as fbcon, implement these function to map/unmap kernel virtual address of prime buffer. Signed-off-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 2 ++ drivers/gpu/drm/mediatek/mtk_drm_gem.c | 46 ++++++++++++++++++++++++++++++++++ drivers/gpu/drm/mediatek/mtk_drm_gem.h | 3 +++ 3 files changed, 51 insertions(+) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index cf59ea9bccfd..4c3375cfb68f 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -341,6 +341,8 @@ static struct drm_driver mtk_drm_driver = { .gem_prime_get_sg_table = mtk_gem_prime_get_sg_table, .gem_prime_import_sg_table = mtk_gem_prime_import_sg_table, .gem_prime_mmap = mtk_drm_gem_mmap_buf, + .gem_prime_vmap = mtk_drm_gem_prime_vmap, + .gem_prime_vunmap = mtk_drm_gem_prime_vunmap, .fops = &mtk_drm_fops, .name = DRIVER_NAME, diff --git a/drivers/gpu/drm/mediatek/mtk_drm_gem.c b/drivers/gpu/drm/mediatek/mtk_drm_gem.c index 259b7b0de1d2..38483e9ee071 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_gem.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_gem.c @@ -241,3 +241,49 @@ err_gem_free: kfree(mtk_gem); return ERR_PTR(ret); } + +void *mtk_drm_gem_prime_vmap(struct drm_gem_object *obj) +{ + struct mtk_drm_gem_obj *mtk_gem = to_mtk_gem_obj(obj); + struct sg_table *sgt; + struct sg_page_iter iter; + unsigned int npages; + unsigned int i = 0; + + if (mtk_gem->kvaddr) + return mtk_gem->kvaddr; + + sgt = mtk_gem_prime_get_sg_table(obj); + if (IS_ERR(sgt)) + return NULL; + + npages = obj->size >> PAGE_SHIFT; + mtk_gem->pages = kcalloc(npages, sizeof(*mtk_gem->pages), GFP_KERNEL); + if (!mtk_gem->pages) + goto out; + + for_each_sg_page(sgt->sgl, &iter, sgt->orig_nents, 0) { + mtk_gem->pages[i++] = sg_page_iter_page(&iter); + if (i > npages) + break; + } + mtk_gem->kvaddr = vmap(mtk_gem->pages, npages, VM_MAP, + pgprot_writecombine(PAGE_KERNEL)); + +out: + kfree((void *)sgt); + + return mtk_gem->kvaddr; +} + +void mtk_drm_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr) +{ + struct mtk_drm_gem_obj *mtk_gem = to_mtk_gem_obj(obj); + + if (!mtk_gem->pages) + return; + + vunmap(vaddr); + mtk_gem->kvaddr = 0; + kfree((void *)mtk_gem->pages); +} diff --git a/drivers/gpu/drm/mediatek/mtk_drm_gem.h b/drivers/gpu/drm/mediatek/mtk_drm_gem.h index 534639b43a1c..c047a7ef294f 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_gem.h +++ b/drivers/gpu/drm/mediatek/mtk_drm_gem.h @@ -37,6 +37,7 @@ struct mtk_drm_gem_obj { dma_addr_t dma_addr; unsigned long dma_attrs; struct sg_table *sg; + struct page **pages; }; #define to_mtk_gem_obj(x) container_of(x, struct mtk_drm_gem_obj, base) @@ -52,5 +53,7 @@ int mtk_drm_gem_mmap_buf(struct drm_gem_object *obj, struct sg_table *mtk_gem_prime_get_sg_table(struct drm_gem_object *obj); struct drm_gem_object *mtk_gem_prime_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sg); +void *mtk_drm_gem_prime_vmap(struct drm_gem_object *obj); +void mtk_drm_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); #endif -- cgit v1.2.3 From d6db988a44294d7a91f60ecb87914694ba106305 Mon Sep 17 00:00:00 2001 From: CK Hu Date: Mon, 14 Jan 2019 17:44:44 +0800 Subject: drm/mediatek: Add Mediatek framebuffer device For Mediatek drm driver, use fbdev emulation to create a framebuffer device. Signed-off-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 4c3375cfb68f..57ce4708ef1b 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -378,6 +379,10 @@ static int mtk_drm_bind(struct device *dev) if (ret < 0) goto err_deinit; + ret = drm_fbdev_generic_setup(drm, 32); + if (ret) + DRM_ERROR("Failed to initialize fbdev: %d\n", ret); + return 0; err_deinit: -- cgit v1.2.3 From 6c44b15e1c9076d925d5236ddadf1318b0a25ce2 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Thu, 14 Mar 2019 00:24:02 -0500 Subject: HID: logitech: check the return value of create_singlethread_workqueue create_singlethread_workqueue may fail and return NULL. The fix checks if it is NULL to avoid NULL pointer dereference. Also, the fix moves the call of create_singlethread_workqueue earlier to avoid resource-release issues. Signed-off-by: Kangjie Lu Signed-off-by: Jiri Kosina --- drivers/hid/hid-logitech-hidpp.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index 15ed6177a7a3..0a243247b231 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -2111,6 +2111,13 @@ static int hidpp_ff_init(struct hidpp_device *hidpp, u8 feature_index) kfree(data); return -ENOMEM; } + data->wq = create_singlethread_workqueue("hidpp-ff-sendqueue"); + if (!data->wq) { + kfree(data->effect_ids); + kfree(data); + return -ENOMEM; + } + data->hidpp = hidpp; data->feature_index = feature_index; data->version = version; @@ -2155,7 +2162,6 @@ static int hidpp_ff_init(struct hidpp_device *hidpp, u8 feature_index) /* ignore boost value at response.fap.params[2] */ /* init the hardware command queue */ - data->wq = create_singlethread_workqueue("hidpp-ff-sendqueue"); atomic_set(&data->workqueue_size, 0); /* initialize with zero autocenter to get wheel in usable state */ -- cgit v1.2.3 From 639e5eb3c7d67e407f2a71fccd95323751398f6f Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 19 Mar 2019 11:52:04 +0000 Subject: ASoC: wm_adsp: Correct handling of compressed streams that restart Previously support was added to allow streams to be stopped and started again without the DSP being power cycled and this was done by clearing the buffer state in trigger start. Another supported use-case is using the DSP for a trigger event then opening the compressed stream later to receive the audio, unfortunately clearing the buffer state in trigger start destroys the data received from such a trigger. Correct this issue by moving the call to wm_adsp_buffer_clear to be in trigger stop instead. Fixes: 61fc060c40e6 ("ASoC: wm_adsp: Support streams which can start/stop with DSP active") Signed-off-by: Charles Keepax Signed-off-by: Mark Brown --- sound/soc/codecs/wm_adsp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index b93fdc8d2d6f..0de36b7eeeb2 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -3571,8 +3571,6 @@ int wm_adsp_compr_trigger(struct snd_compr_stream *stream, int cmd) if (ret < 0) break; - wm_adsp_buffer_clear(compr->buf); - /* Trigger the IRQ at one fragment of data */ ret = wm_adsp_buffer_write(compr->buf, HOST_BUFFER_FIELD(high_water_mark), @@ -3584,6 +3582,7 @@ int wm_adsp_compr_trigger(struct snd_compr_stream *stream, int cmd) } break; case SNDRV_PCM_TRIGGER_STOP: + wm_adsp_buffer_clear(compr->buf); break; default: ret = -EINVAL; -- cgit v1.2.3 From 48ead31ce247dc8c0b01ad99d1a97da35421493b Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 19 Mar 2019 11:52:05 +0000 Subject: ASoC: wm_adsp: Correct error messages in wm_adsp_buffer_get_error During recent logging improvements it seems two error messages lost their updates during patch application/rebasing. Add these back in. Fixes: 0d3fba3e7a56 ("ASoC: wm_adsp: Improve logging messages") Signed-off-by: Charles Keepax Signed-off-by: Mark Brown --- sound/soc/codecs/wm_adsp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 0de36b7eeeb2..b3348a213c18 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -3535,11 +3535,11 @@ static int wm_adsp_buffer_get_error(struct wm_adsp_compr_buf *buf) ret = wm_adsp_buffer_read(buf, HOST_BUFFER_FIELD(error), &buf->error); if (ret < 0) { - adsp_err(buf->dsp, "Failed to check buffer error: %d\n", ret); + compr_err(buf, "Failed to check buffer error: %d\n", ret); return ret; } if (buf->error != 0) { - adsp_err(buf->dsp, "Buffer error occurred: %d\n", buf->error); + compr_err(buf, "Buffer error occurred: %d\n", buf->error); return -EIO; } -- cgit v1.2.3 From a2225a6d155fcb247fe4c6d87f7c91807462966d Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 19 Mar 2019 11:52:06 +0000 Subject: ASoC: wm_adsp: Add locking to wm_adsp2_bus_error Best to lock across handling the bus error to ensure the DSP doesn't change power state as we are reading the status registers. Signed-off-by: Charles Keepax Signed-off-by: Mark Brown --- sound/soc/codecs/wm_adsp.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index b3348a213c18..c19a8a041d4d 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -3922,11 +3922,13 @@ irqreturn_t wm_adsp2_bus_error(struct wm_adsp *dsp) struct regmap *regmap = dsp->regmap; int ret = 0; + mutex_lock(&dsp->pwr_lock); + ret = regmap_read(regmap, dsp->base + ADSP2_LOCK_REGION_CTRL, &val); if (ret) { adsp_err(dsp, "Failed to read Region Lock Ctrl register: %d\n", ret); - return IRQ_HANDLED; + goto error; } if (val & ADSP2_WDT_TIMEOUT_STS_MASK) { @@ -3945,7 +3947,7 @@ irqreturn_t wm_adsp2_bus_error(struct wm_adsp *dsp) adsp_err(dsp, "Failed to read Bus Err Addr register: %d\n", ret); - return IRQ_HANDLED; + goto error; } adsp_err(dsp, "bus error address = 0x%x\n", @@ -3958,7 +3960,7 @@ irqreturn_t wm_adsp2_bus_error(struct wm_adsp *dsp) adsp_err(dsp, "Failed to read Pmem Xmem Err Addr register: %d\n", ret); - return IRQ_HANDLED; + goto error; } adsp_err(dsp, "xmem error address = 0x%x\n", @@ -3971,6 +3973,9 @@ irqreturn_t wm_adsp2_bus_error(struct wm_adsp *dsp) regmap_update_bits(regmap, dsp->base + ADSP2_LOCK_REGION_CTRL, ADSP2_CTRL_ERR_EINT, ADSP2_CTRL_ERR_EINT); +error: + mutex_unlock(&dsp->pwr_lock); + return IRQ_HANDLED; } EXPORT_SYMBOL_GPL(wm_adsp2_bus_error); -- cgit v1.2.3 From a2bcbc1b9ac2f982a438081a9f1b5d823332d514 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 19 Mar 2019 11:52:07 +0000 Subject: ASoC: wm_adsp: Shutdown any compressed streams on DSP watchdog timeout If a watchdog timeout is received from the DSP it is safe to assume the DSP is not functioning anymore and as such any active compressed streams should be put into an error state. Signed-off-by: Charles Keepax Signed-off-by: Mark Brown --- sound/soc/codecs/wm_adsp.c | 21 +++++++++++++++++++++ sound/soc/codecs/wm_adsp.h | 1 + 2 files changed, 22 insertions(+) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index c19a8a041d4d..5608ed5decca 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -2905,6 +2905,8 @@ int wm_adsp2_event(struct snd_soc_dapm_widget *w, if (wm_adsp_fw[dsp->fw].num_caps != 0) wm_adsp_buffer_free(dsp); + dsp->fatal_error = false; + mutex_unlock(&dsp->pwr_lock); adsp_dbg(dsp, "Execution stopped\n"); @@ -3000,6 +3002,9 @@ static int wm_adsp_compr_attach(struct wm_adsp_compr *compr) { struct wm_adsp_compr_buf *buf = NULL, *tmp; + if (compr->dsp->fatal_error) + return -EINVAL; + list_for_each_entry(tmp, &compr->dsp->buffer_list, list) { if (!tmp->name || !strcmp(compr->name, tmp->name)) { buf = tmp; @@ -3916,6 +3921,21 @@ int wm_adsp2_lock(struct wm_adsp *dsp, unsigned int lock_regions) } EXPORT_SYMBOL_GPL(wm_adsp2_lock); +static void wm_adsp_fatal_error(struct wm_adsp *dsp) +{ + struct wm_adsp_compr *compr; + + dsp->fatal_error = true; + + list_for_each_entry(compr, &dsp->compr_list, list) { + if (compr->stream) { + snd_compr_stop_error(compr->stream, + SNDRV_PCM_STATE_XRUN); + snd_compr_fragment_elapsed(compr->stream); + } + } +} + irqreturn_t wm_adsp2_bus_error(struct wm_adsp *dsp) { unsigned int val; @@ -3934,6 +3954,7 @@ irqreturn_t wm_adsp2_bus_error(struct wm_adsp *dsp) if (val & ADSP2_WDT_TIMEOUT_STS_MASK) { adsp_err(dsp, "watchdog timeout error\n"); wm_adsp_stop_watchdog(dsp); + wm_adsp_fatal_error(dsp); } if (val & (ADSP2_SLAVE_ERR_MASK | ADSP2_REGION_LOCK_ERR_MASK)) { diff --git a/sound/soc/codecs/wm_adsp.h b/sound/soc/codecs/wm_adsp.h index 59e07ad16329..8f09b4419a91 100644 --- a/sound/soc/codecs/wm_adsp.h +++ b/sound/soc/codecs/wm_adsp.h @@ -85,6 +85,7 @@ struct wm_adsp { bool preloaded; bool booted; bool running; + bool fatal_error; struct list_head ctl_list; -- cgit v1.2.3 From cef0d4948cb0a02db37ebfdc320e127c77ab1637 Mon Sep 17 00:00:00 2001 From: "He, Bo" Date: Thu, 14 Mar 2019 02:28:21 +0000 Subject: HID: debug: fix race condition with between rdesc_show() and device removal There is a race condition that could happen if hid_debug_rdesc_show() is running while hdev is in the process of going away (device removal, system suspend, etc) which could result in NULL pointer dereference: BUG: unable to handle kernel paging request at 0000000783316040 CPU: 1 PID: 1512 Comm: getevent Tainted: G U O 4.19.20-quilt-2e5dc0ac-00029-gc455a447dd55 #1 RIP: 0010:hid_dump_device+0x9b/0x160 Call Trace: hid_debug_rdesc_show+0x72/0x1d0 seq_read+0xe0/0x410 full_proxy_read+0x5f/0x90 __vfs_read+0x3a/0x170 vfs_read+0xa0/0x150 ksys_read+0x58/0xc0 __x64_sys_read+0x1a/0x20 do_syscall_64+0x55/0x110 entry_SYSCALL_64_after_hwframe+0x49/0xbe Grab driver_input_lock to make sure the input device exists throughout the whole process of dumping the rdesc. [jkosina@suse.cz: update changelog a bit] Signed-off-by: he, bo Signed-off-by: "Zhang, Jun" Signed-off-by: Jiri Kosina --- drivers/hid/hid-debug.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index ac9fda1b5a72..1384e57182af 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -1060,10 +1060,15 @@ static int hid_debug_rdesc_show(struct seq_file *f, void *p) seq_printf(f, "\n\n"); /* dump parsed data and input mappings */ + if (down_interruptible(&hdev->driver_input_lock)) + return 0; + hid_dump_device(hdev, f); seq_printf(f, "\n"); hid_dump_input_mapping(hdev, f); + up(&hdev->driver_input_lock); + return 0; } -- cgit v1.2.3 From 9b70c697e87286ade406e6a02091757307dd4b7c Mon Sep 17 00:00:00 2001 From: Maxime Jourdan Date: Tue, 19 Mar 2019 11:25:37 +0100 Subject: clk: meson-gxbb: round the vdec dividers to closest We want the video decoder clocks to always round to closest. While the muxes are already using CLK_MUX_ROUND_CLOSEST, the corresponding CLK_DIVIDER_ROUND_CLOSEST was forgotten for the dividers. Fix this by adding the flag to the two vdec dividers. Fixes: a565242eb9fc ("clk: meson: gxbb: add the video decoder clocks") Signed-off-by: Maxime Jourdan Acked-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://lkml.kernel.org/r/20190319102537.2043-1-mjourdan@baylibre.com --- drivers/clk/meson/gxbb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/meson/gxbb.c b/drivers/clk/meson/gxbb.c index 04df2e208ed6..29ffb4fde714 100644 --- a/drivers/clk/meson/gxbb.c +++ b/drivers/clk/meson/gxbb.c @@ -2216,6 +2216,7 @@ static struct clk_regmap gxbb_vdec_1_div = { .offset = HHI_VDEC_CLK_CNTL, .shift = 0, .width = 7, + .flags = CLK_DIVIDER_ROUND_CLOSEST, }, .hw.init = &(struct clk_init_data){ .name = "vdec_1_div", @@ -2261,6 +2262,7 @@ static struct clk_regmap gxbb_vdec_hevc_div = { .offset = HHI_VDEC2_CLK_CNTL, .shift = 16, .width = 7, + .flags = CLK_DIVIDER_ROUND_CLOSEST, }, .hw.init = &(struct clk_init_data){ .name = "vdec_hevc_div", -- cgit v1.2.3 From f53b9f146fa1d5c5bb6dc34e27176434b26cd0a7 Mon Sep 17 00:00:00 2001 From: Maxime Jourdan Date: Tue, 19 Mar 2019 09:26:11 +0100 Subject: clk: meson: g12a: fix VPU clock muxes mask There are 8 parents, use 0x7 Fixes: 085a4ea93d54 ("clk: meson: g12a: add peripheral clock controller") Signed-off-by: Maxime Jourdan Acked-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://lkml.kernel.org/r/20190319082611.6215-1-mjourdan@baylibre.com --- drivers/clk/meson/g12a.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/meson/g12a.c b/drivers/clk/meson/g12a.c index 0e1ce8c03259..683769f6e90d 100644 --- a/drivers/clk/meson/g12a.c +++ b/drivers/clk/meson/g12a.c @@ -967,7 +967,7 @@ static const char * const g12a_vpu_parent_names[] = { static struct clk_regmap g12a_vpu_0_sel = { .data = &(struct clk_regmap_mux_data){ .offset = HHI_VPU_CLK_CNTL, - .mask = 0x3, + .mask = 0x7, .shift = 9, }, .hw.init = &(struct clk_init_data){ @@ -1011,7 +1011,7 @@ static struct clk_regmap g12a_vpu_0 = { static struct clk_regmap g12a_vpu_1_sel = { .data = &(struct clk_regmap_mux_data){ .offset = HHI_VPU_CLK_CNTL, - .mask = 0x3, + .mask = 0x7, .shift = 25, }, .hw.init = &(struct clk_init_data){ -- cgit v1.2.3 From 9e05e49c29fde3f5e0d82542cb89e26c0bc828d0 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Wed, 13 Mar 2019 14:55:03 +0100 Subject: clk: meson-g12a: fix VPU clock parents First two VPU clock parents are wrong, fix it here. Fixes: 085a4ea93d54 ("clk: meson: g12a: add peripheral clock controller") Signed-off-by: Neil Armstrong Acked-by: Jerome Brunet Link: https://lkml.kernel.org/r/20190313135503.3198-1-narmstrong@baylibre.com --- drivers/clk/meson/g12a.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/meson/g12a.c b/drivers/clk/meson/g12a.c index 683769f6e90d..f7b11e1eeebe 100644 --- a/drivers/clk/meson/g12a.c +++ b/drivers/clk/meson/g12a.c @@ -960,7 +960,7 @@ static struct clk_regmap g12a_sd_emmc_c_clk0 = { /* VPU Clock */ static const char * const g12a_vpu_parent_names[] = { - "fclk_div4", "fclk_div3", "fclk_div5", "fclk_div7", + "fclk_div3", "fclk_div4", "fclk_div5", "fclk_div7", "mpll1", "vid_pll", "hifi_pll", "gp0_pll", }; -- cgit v1.2.3 From 0fcc4c8c044e117ac126ab6df4138ea9a67fa2a9 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 19 Mar 2019 02:36:59 +0100 Subject: device_cgroup: fix RCU imbalance in error case When dev_exception_add() returns an error (due to a failed memory allocation), make sure that we move the RCU preemption count back to where it was before we were called. We dropped the RCU read lock inside the loop body, so we can't just "break". sparse complains about this, too: $ make -s C=2 security/device_cgroup.o ./include/linux/rcupdate.h:647:9: warning: context imbalance in 'propagate_exception' - unexpected unlock Fixes: d591fb56618f ("device_cgroup: simplify cgroup tree walk in propagate_exception()") Cc: stable@vger.kernel.org Signed-off-by: Jann Horn Acked-by: Michal Hocko Signed-off-by: Tejun Heo --- security/device_cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/device_cgroup.c b/security/device_cgroup.c index cd97929fac66..dc28914fa72e 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -560,7 +560,7 @@ static int propagate_exception(struct dev_cgroup *devcg_root, devcg->behavior == DEVCG_DEFAULT_ALLOW) { rc = dev_exception_add(devcg, ex); if (rc) - break; + return rc; } else { /* * in the other possible cases: -- cgit v1.2.3 From d6752e185c3168771787a02dc6a55f32260943cc Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 15 Mar 2019 11:51:12 -0700 Subject: rtc: cros-ec: Fail suspend/resume if wake IRQ can't be configured If we encounter a failure during suspend where this RTC was programmed to wakeup the system from suspend, but that wakeup couldn't be configured because the system didn't support wakeup interrupts, we'll run into the following warning: Unbalanced IRQ 166 wake disable WARNING: CPU: 7 PID: 3071 at kernel/irq/manage.c:669 irq_set_irq_wake+0x108/0x278 This happens because the suspend process isn't aborted when the RTC fails to configure the wakeup IRQ. Instead, we continue suspending the system and then another suspend callback fails the suspend process and "unwinds" the previously suspended drivers by calling their resume callbacks. When we get back to resuming this RTC driver, we'll call disable_irq_wake() on an IRQ that hasn't been configured for wake. Let's just fail suspend/resume here if we can't configure the system to wake and the user has chosen to wakeup with this device. This fixes this warning and makes the code more robust in case there are systems out there that can't wakeup from suspend on this line but the user has chosen to do so. Cc: Enric Balletbo i Serra Cc: Evan Green Cc: Benson Leung Cc: Guenter Roeck Signed-off-by: Stephen Boyd Acked-By: Benson Leung Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-cros-ec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-cros-ec.c b/drivers/rtc/rtc-cros-ec.c index e5444296075e..4d6bf9304ceb 100644 --- a/drivers/rtc/rtc-cros-ec.c +++ b/drivers/rtc/rtc-cros-ec.c @@ -298,7 +298,7 @@ static int cros_ec_rtc_suspend(struct device *dev) struct cros_ec_rtc *cros_ec_rtc = dev_get_drvdata(&pdev->dev); if (device_may_wakeup(dev)) - enable_irq_wake(cros_ec_rtc->cros_ec->irq); + return enable_irq_wake(cros_ec_rtc->cros_ec->irq); return 0; } @@ -309,7 +309,7 @@ static int cros_ec_rtc_resume(struct device *dev) struct cros_ec_rtc *cros_ec_rtc = dev_get_drvdata(&pdev->dev); if (device_may_wakeup(dev)) - disable_irq_wake(cros_ec_rtc->cros_ec->irq); + return disable_irq_wake(cros_ec_rtc->cros_ec->irq); return 0; } -- cgit v1.2.3 From 15d82d22498784966df8e4696174a16b02cc1052 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 20 Mar 2019 11:32:14 +0100 Subject: rtc: sh: Fix invalid alarm warning for non-enabled alarm When no alarm has been programmed on RSK-RZA1, an error message is printed during boot: rtc rtc0: invalid alarm value: 2019-03-14T255:255:255 sh_rtc_read_alarm_value() returns 0xff when querying a hardware alarm field that is not enabled. __rtc_read_alarm() validates the received alarm values, and fills in missing fields when needed. While 0xff is handled fine for the year, month, and day fields, and corrected as considered being out-of-range, this is not the case for the hour, minute, and second fields, where -1 is expected for missing fields. Fix this by returning -1 instead, as this value is handled fine for all fields. Signed-off-by: Geert Uytterhoeven Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-sh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c index d417b203cbc5..1d3de2a3d1a4 100644 --- a/drivers/rtc/rtc-sh.c +++ b/drivers/rtc/rtc-sh.c @@ -374,7 +374,7 @@ static int sh_rtc_set_time(struct device *dev, struct rtc_time *tm) static inline int sh_rtc_read_alarm_value(struct sh_rtc *rtc, int reg_off) { unsigned int byte; - int value = 0xff; /* return 0xff for ignored values */ + int value = -1; /* return -1 for ignored values */ byte = readb(rtc->regbase + reg_off); if (byte & AR_ENB) { -- cgit v1.2.3 From 22e7d5148d9a0b2e5bd20913f55cce89b58cd990 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Wed, 20 Mar 2019 13:10:23 +0100 Subject: rtc: sd3078: fix manufacturer name The proper manufacturer name is Shenzhen whwave. Signed-off-by: Alexandre Belloni --- drivers/rtc/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index a71734c41693..f933c06bff4f 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -667,9 +667,9 @@ config RTC_DRV_S5M will be called rtc-s5m. config RTC_DRV_SD3078 - tristate "ZXW Crystal SD3078" + tristate "ZXW Shenzhen whwave SD3078" help - If you say yes here you get support for the ZXW Crystal + If you say yes here you get support for the ZXW Shenzhen whwave SD3078 RTC chips. This driver can also be built as a module. If so, the module -- cgit v1.2.3 From fd35759ce32b60d3eb52436894bab996dbf8cffa Mon Sep 17 00:00:00 2001 From: Peter Hutterer Date: Wed, 20 Mar 2019 08:48:23 +1000 Subject: HID: logitech: Handle 0 scroll events for the m560 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit hidpp_scroll_counter_handle_scroll() doesn't expect a 0-value scroll event, it gets interpreted as a negative scroll direction event. This can cause scroll direction resets and thus broken scrolling. Fixes: 4435ff2f09a2fc ("HID: logitech: Enable high-resolution scrolling on Logitech mice") Cc: stable@vger.kernel.org # v5.0 Reported-and-tested-by: Aimo Metsälä Signed-off-by: Peter Hutterer Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-logitech-hidpp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index 0a243247b231..199cc256e9d9 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -2614,8 +2614,9 @@ static int m560_raw_event(struct hid_device *hdev, u8 *data, int size) input_report_rel(mydata->input, REL_Y, v); v = hid_snto32(data[6], 8); - hidpp_scroll_counter_handle_scroll( - &hidpp->vertical_wheel_counter, v); + if (v != 0) + hidpp_scroll_counter_handle_scroll( + &hidpp->vertical_wheel_counter, v); input_sync(mydata->input); } -- cgit v1.2.3 From bfc01ddff2b0c33de21af436324a669e95ac7e78 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 20 Mar 2019 17:54:44 +0100 Subject: Revert "net: xfrm: Add '_rcu' tag for rcu protected pointer in netns_xfrm" This reverts commit f10e0010fae8174dc20bdc872bcaa85baa925cb7. This commit was just wrong. It caused a lot of syzbot warnings, so just revert it. Signed-off-by: Steffen Klassert --- include/net/netns/xfrm.h | 2 +- net/xfrm/xfrm_user.c | 30 +++++++----------------------- 2 files changed, 8 insertions(+), 24 deletions(-) diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index d2a36fb9f92a..59f45b1e9dac 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -57,7 +57,7 @@ struct netns_xfrm { struct list_head inexact_bins; - struct sock __rcu *nlsk; + struct sock *nlsk; struct sock *nlsk_stash; u32 sysctl_aevent_etime; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 944589832343..8d4d52fd457b 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1071,22 +1071,6 @@ static inline int xfrm_nlmsg_multicast(struct net *net, struct sk_buff *skb, return nlmsg_multicast(nlsk, skb, pid, group, GFP_ATOMIC); } -/* A similar wrapper like xfrm_nlmsg_multicast checking that nlsk is still - * available. - */ -static inline int xfrm_nlmsg_unicast(struct net *net, struct sk_buff *skb, - u32 pid) -{ - struct sock *nlsk = rcu_dereference(net->xfrm.nlsk); - - if (!nlsk) { - kfree_skb(skb); - return -EPIPE; - } - - return nlmsg_unicast(nlsk, skb, pid); -} - static inline unsigned int xfrm_spdinfo_msgsize(void) { return NLMSG_ALIGN(4) @@ -1211,7 +1195,7 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, err = build_spdinfo(r_skb, net, sportid, seq, *flags); BUG_ON(err < 0); - return xfrm_nlmsg_unicast(net, r_skb, sportid); + return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); } static inline unsigned int xfrm_sadinfo_msgsize(void) @@ -1270,7 +1254,7 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, err = build_sadinfo(r_skb, net, sportid, seq, *flags); BUG_ON(err < 0); - return xfrm_nlmsg_unicast(net, r_skb, sportid); + return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); } static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -1290,7 +1274,7 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { - err = xfrm_nlmsg_unicast(net, resp_skb, NETLINK_CB(skb).portid); + err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid); } xfrm_state_put(x); out_noput: @@ -1353,7 +1337,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, goto out; } - err = xfrm_nlmsg_unicast(net, resp_skb, NETLINK_CB(skb).portid); + err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid); out: xfrm_state_put(x); @@ -1919,8 +1903,8 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { - err = xfrm_nlmsg_unicast(net, resp_skb, - NETLINK_CB(skb).portid); + err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, + NETLINK_CB(skb).portid); } } else { xfrm_audit_policy_delete(xp, err ? 0 : 1, true); @@ -2078,7 +2062,7 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, err = build_aevent(r_skb, x, &c); BUG_ON(err < 0); - err = xfrm_nlmsg_unicast(net, r_skb, NETLINK_CB(skb).portid); + err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).portid); spin_unlock_bh(&x->lock); xfrm_state_put(x); return err; -- cgit v1.2.3 From 19441e35a43b616ea6afad91ed0d9e77268d8f6a Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Mon, 4 Mar 2019 15:52:43 +0100 Subject: ASoC: stm32: dfsdm: manage multiple prepare The DFSDM must be stopped when a new setting is applied. restart systematically DFSDM on multiple prepare calls, to apply changes. Signed-off-by: Olivier Moysan Signed-off-by: Mark Brown --- sound/soc/stm/stm32_adfsdm.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/sound/soc/stm/stm32_adfsdm.c b/sound/soc/stm/stm32_adfsdm.c index 47901983a6ff..d77f0421e4f0 100644 --- a/sound/soc/stm/stm32_adfsdm.c +++ b/sound/soc/stm/stm32_adfsdm.c @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -37,6 +38,8 @@ struct stm32_adfsdm_priv { /* PCM buffer */ unsigned char *pcm_buff; unsigned int pos; + + struct mutex lock; /* protect against race condition on iio state */ }; static const struct snd_pcm_hardware stm32_adfsdm_pcm_hw = { @@ -62,10 +65,12 @@ static void stm32_adfsdm_shutdown(struct snd_pcm_substream *substream, { struct stm32_adfsdm_priv *priv = snd_soc_dai_get_drvdata(dai); + mutex_lock(&priv->lock); if (priv->iio_active) { iio_channel_stop_all_cb(priv->iio_cb); priv->iio_active = false; } + mutex_unlock(&priv->lock); } static int stm32_adfsdm_dai_prepare(struct snd_pcm_substream *substream, @@ -74,13 +79,19 @@ static int stm32_adfsdm_dai_prepare(struct snd_pcm_substream *substream, struct stm32_adfsdm_priv *priv = snd_soc_dai_get_drvdata(dai); int ret; + mutex_lock(&priv->lock); + if (priv->iio_active) { + iio_channel_stop_all_cb(priv->iio_cb); + priv->iio_active = false; + } + ret = iio_write_channel_attribute(priv->iio_ch, substream->runtime->rate, 0, IIO_CHAN_INFO_SAMP_FREQ); if (ret < 0) { dev_err(dai->dev, "%s: Failed to set %d sampling rate\n", __func__, substream->runtime->rate); - return ret; + goto out; } if (!priv->iio_active) { @@ -92,6 +103,9 @@ static int stm32_adfsdm_dai_prepare(struct snd_pcm_substream *substream, __func__, ret); } +out: + mutex_unlock(&priv->lock); + return ret; } @@ -299,6 +313,7 @@ static int stm32_adfsdm_probe(struct platform_device *pdev) priv->dev = &pdev->dev; priv->dai_drv = stm32_adfsdm_dai; + mutex_init(&priv->lock); dev_set_drvdata(&pdev->dev, priv); -- cgit v1.2.3 From c47255b61129857b74b0d86eaf59335348be05e0 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Mon, 4 Mar 2019 15:52:44 +0100 Subject: ASoC: stm32: dfsdm: fix debugfs warnings on entry creation Register platform component with a prefix, to avoid warnings on debugfs entries creation, due to component name redundancy. Signed-off-by: Olivier Moysan Signed-off-by: Mark Brown --- sound/soc/stm/stm32_adfsdm.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/sound/soc/stm/stm32_adfsdm.c b/sound/soc/stm/stm32_adfsdm.c index d77f0421e4f0..78bed9734713 100644 --- a/sound/soc/stm/stm32_adfsdm.c +++ b/sound/soc/stm/stm32_adfsdm.c @@ -305,6 +305,7 @@ MODULE_DEVICE_TABLE(of, stm32_adfsdm_of_match); static int stm32_adfsdm_probe(struct platform_device *pdev) { struct stm32_adfsdm_priv *priv; + struct snd_soc_component *component; int ret; priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); @@ -332,9 +333,15 @@ static int stm32_adfsdm_probe(struct platform_device *pdev) if (IS_ERR(priv->iio_cb)) return PTR_ERR(priv->iio_cb); - ret = devm_snd_soc_register_component(&pdev->dev, - &stm32_adfsdm_soc_platform, - NULL, 0); + component = devm_kzalloc(&pdev->dev, sizeof(*component), GFP_KERNEL); + if (!component) + return -ENOMEM; +#ifdef CONFIG_DEBUG_FS + component->debugfs_prefix = "pcm"; +#endif + + ret = snd_soc_add_component(&pdev->dev, component, + &stm32_adfsdm_soc_platform, NULL, 0); if (ret < 0) dev_err(&pdev->dev, "%s: Failed to register PCM platform\n", __func__); @@ -342,12 +349,20 @@ static int stm32_adfsdm_probe(struct platform_device *pdev) return ret; } +static int stm32_adfsdm_remove(struct platform_device *pdev) +{ + snd_soc_unregister_component(&pdev->dev); + + return 0; +} + static struct platform_driver stm32_adfsdm_driver = { .driver = { .name = STM32_ADFSDM_DRV_NAME, .of_match_table = stm32_adfsdm_of_match, }, .probe = stm32_adfsdm_probe, + .remove = stm32_adfsdm_remove, }; module_platform_driver(stm32_adfsdm_driver); -- cgit v1.2.3 From df2f677dee3ce4afda7ee561dd4f321c40320afd Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 15 Feb 2019 21:58:23 -0800 Subject: tools/power turbostat: Restore ability to execute in topology-order turbostat executes on CPUs in "topology order". This is an optimization for measuring profoundly idle systems -- as the closest hardware is woken next... Fix a typo that was added with the sub-die-node support, that broke topology ordering on multi-node systems. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 9327c0ddc3a5..78d88b7d4e98 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -314,9 +314,8 @@ int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg int retval, pkg_no, core_no, thread_no, node_no; for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { - for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { - for (node_no = 0; node_no < topo.nodes_per_pkg; - node_no++) { + for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) { + for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) { struct thread_data *t; -- cgit v1.2.3 From 562855eeb1136009bc9d597116ac5829bf82dd06 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 19 Mar 2019 17:52:32 -0400 Subject: tools/power turbostat: Cleanup CC3-skip code no functional change Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 78d88b7d4e98..861cb795679b 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -666,7 +666,7 @@ void print_header(char *delim) if (DO_BIC(BIC_CPU_c1)) outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : "")); - if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates) + if (DO_BIC(BIC_CPU_c3)) outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : "")); if (DO_BIC(BIC_CPU_c6)) outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : "")); @@ -1002,7 +1002,7 @@ int format_counters(struct thread_data *t, struct core_data *c, if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) goto done; - if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates) + if (DO_BIC(BIC_CPU_c3)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3/tsc); if (DO_BIC(BIC_CPU_c6)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6/tsc); @@ -1817,7 +1817,7 @@ retry: if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) goto done; - if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates) { + if (DO_BIC(BIC_CPU_c3)) { if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) return -6; } @@ -4703,6 +4703,9 @@ void process_cpuid() do_knl_cstates = is_knl(family, model); do_cnl_cstates = is_cnl(family, model); + if (do_slm_cstates || do_knl_cstates || do_cnl_cstates) + BIC_NOT_PRESENT(BIC_CPU_c3); + if (!quiet) decode_misc_pwr_mgmt_msr(); -- cgit v1.2.3 From 31a1f15cea5e56fe8151ddf01278cb60c325626b Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 19 Mar 2019 17:55:06 -0400 Subject: tools/power turbostat: Cleanup CNL-specific code no functional change. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 861cb795679b..f2384ee98cdc 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -63,7 +63,6 @@ unsigned int dump_only; unsigned int do_snb_cstates; unsigned int do_knl_cstates; unsigned int do_slm_cstates; -unsigned int do_cnl_cstates; unsigned int use_c1_residency_msr; unsigned int has_aperf; unsigned int has_epb; @@ -4701,9 +4700,8 @@ void process_cpuid() } do_slm_cstates = is_slm(family, model); do_knl_cstates = is_knl(family, model); - do_cnl_cstates = is_cnl(family, model); - if (do_slm_cstates || do_knl_cstates || do_cnl_cstates) + if (do_slm_cstates || do_knl_cstates || is_cnl(family, model)) BIC_NOT_PRESENT(BIC_CPU_c3); if (!quiet) -- cgit v1.2.3 From 937807d355a375393557674e3233662a7131c46b Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 19 Mar 2019 19:09:07 -0400 Subject: tools/power turbostat: Add Icelake support From a turbostat point of view, Iceland is like Cannonlake. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index f2384ee98cdc..4f81b52aef84 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4449,6 +4449,9 @@ unsigned int intel_model_duplicates(unsigned int model) case INTEL_FAM6_KABYLAKE_MOBILE: case INTEL_FAM6_KABYLAKE_DESKTOP: return INTEL_FAM6_SKYLAKE_MOBILE; + + case INTEL_FAM6_ICELAKE_MOBILE: + return INTEL_FAM6_CANNONLAKE_MOBILE; } return model; } -- cgit v1.2.3 From 6de68fe15a0fcd0e887d73bd7a549e4dc6446481 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 14 Feb 2019 19:17:40 -0800 Subject: tools/power turbostat: Add Die column If the system has more than one software visible die per package, print a Die column. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 42 ++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 4f81b52aef84..7d6c14ecf35a 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -272,6 +272,7 @@ struct system_summary { struct cpu_topology { int physical_package_id; + int die_id; int logical_cpu_id; int physical_node_id; int logical_node_id; /* 0-based count within the package */ @@ -282,6 +283,7 @@ struct cpu_topology { struct topo_params { int num_packages; + int num_die; int num_cpus; int num_cores; int max_cpu_num; @@ -440,6 +442,7 @@ struct msr_counter bic[] = { { 0x0, "CPU" }, { 0x0, "APIC" }, { 0x0, "X2APIC" }, + { 0x0, "Die" }, }; #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) @@ -493,6 +496,7 @@ struct msr_counter bic[] = { #define BIC_CPU (1ULL << 47) #define BIC_APIC (1ULL << 48) #define BIC_X2APIC (1ULL << 49) +#define BIC_Die (1ULL << 50) #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) @@ -619,6 +623,8 @@ void print_header(char *delim) outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : "")); if (DO_BIC(BIC_Package)) outp += sprintf(outp, "%sPackage", (printed++ ? delim : "")); + if (DO_BIC(BIC_Die)) + outp += sprintf(outp, "%sDie", (printed++ ? delim : "")); if (DO_BIC(BIC_Node)) outp += sprintf(outp, "%sNode", (printed++ ? delim : "")); if (DO_BIC(BIC_Core)) @@ -902,6 +908,8 @@ int format_counters(struct thread_data *t, struct core_data *c, if (t == &average.threads) { if (DO_BIC(BIC_Package)) outp += sprintf(outp, "%s-", (printed++ ? delim : "")); + if (DO_BIC(BIC_Die)) + outp += sprintf(outp, "%s-", (printed++ ? delim : "")); if (DO_BIC(BIC_Node)) outp += sprintf(outp, "%s-", (printed++ ? delim : "")); if (DO_BIC(BIC_Core)) @@ -919,6 +927,12 @@ int format_counters(struct thread_data *t, struct core_data *c, else outp += sprintf(outp, "%s-", (printed++ ? delim : "")); } + if (DO_BIC(BIC_Die)) { + if (c) + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].die_id); + else + outp += sprintf(outp, "%s-", (printed++ ? delim : "")); + } if (DO_BIC(BIC_Node)) { if (t) outp += sprintf(outp, "%s%d", @@ -2454,6 +2468,8 @@ void free_all_buffers(void) /* * Parse a file containing a single int. + * Return 0 if file can not be opened + * Exit if file can be opened, but can not be parsed */ int parse_int_file(const char *fmt, ...) { @@ -2465,7 +2481,9 @@ int parse_int_file(const char *fmt, ...) va_start(args, fmt); vsnprintf(path, sizeof(path), fmt, args); va_end(args); - filep = fopen_or_die(path, "r"); + filep = fopen(path, "r"); + if (!filep) + return 0; if (fscanf(filep, "%d", &value) != 1) err(1, "%s: failed to parse number from file", path); fclose(filep); @@ -2486,6 +2504,11 @@ int get_physical_package_id(int cpu) return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu); } +int get_die_id(int cpu) +{ + return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/die_id", cpu); +} + int get_core_id(int cpu) { return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu); @@ -4772,6 +4795,7 @@ void topology_probe() int i; int max_core_id = 0; int max_package_id = 0; + int max_die_id = 0; int max_siblings = 0; /* Initialize num_cpus, max_cpu_num */ @@ -4838,6 +4862,11 @@ void topology_probe() if (cpus[i].physical_package_id > max_package_id) max_package_id = cpus[i].physical_package_id; + /* get die information */ + cpus[i].die_id = get_die_id(i); + if (cpus[i].die_id > max_die_id) + max_die_id = cpus[i].die_id; + /* get numa node information */ cpus[i].physical_node_id = get_physical_node_id(&cpus[i]); if (cpus[i].physical_node_id > topo.max_node_num) @@ -4863,6 +4892,13 @@ void topology_probe() if (!summary_only && topo.cores_per_node > 1) BIC_PRESENT(BIC_Core); + topo.num_die = max_die_id + 1; + if (debug > 1) + fprintf(outf, "max_die_id %d, sizing for %d die\n", + max_die_id, topo.num_die); + if (!summary_only && topo.num_die > 1) + BIC_PRESENT(BIC_Die); + topo.num_packages = max_package_id + 1; if (debug > 1) fprintf(outf, "max_package_id %d, sizing for %d packages\n", @@ -4887,8 +4923,8 @@ void topology_probe() if (cpu_is_not_present(i)) continue; fprintf(outf, - "cpu %d pkg %d node %d lnode %d core %d thread %d\n", - i, cpus[i].physical_package_id, + "cpu %d pkg %d die %d node %d lnode %d core %d thread %d\n", + i, cpus[i].physical_package_id, cpus[i].die_id, cpus[i].physical_node_id, cpus[i].logical_node_id, cpus[i].physical_core_id, -- cgit v1.2.3 From 0a42d235e50d677775056324d0762dd102a9ebb0 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Mon, 13 Aug 2018 08:45:01 -0400 Subject: tools/power turbostat: Do not display an error on systems without a cpufreq driver Running without a cpufreq driver is a valid case so warnings output in this case should not be to stderr. Use outf instead of stderr for these warnings. Signed-off-by: Prarit Bhargava Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 7d6c14ecf35a..0716abdb1bd9 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -3465,7 +3465,7 @@ dump_sysfs_pstate_config(void) base_cpu); input = fopen(path, "r"); if (input == NULL) { - fprintf(stderr, "NSFOD %s\n", path); + fprintf(outf, "NSFOD %s\n", path); return; } fgets(driver_buf, sizeof(driver_buf), input); @@ -3475,7 +3475,7 @@ dump_sysfs_pstate_config(void) base_cpu); input = fopen(path, "r"); if (input == NULL) { - fprintf(stderr, "NSFOD %s\n", path); + fprintf(outf, "NSFOD %s\n", path); return; } fgets(governor_buf, sizeof(governor_buf), input); -- cgit v1.2.3 From 9392bd98bba760be96ee67f51cb040dcf7aa190e Mon Sep 17 00:00:00 2001 From: Calvin Walton Date: Fri, 17 Aug 2018 12:34:41 -0400 Subject: tools/power turbostat: Add support for AMD Fam 17h (Zen) RAPL Based on the Open-Source Register Reference for AMD Family 17h Processors Models 00h-2Fh: https://support.amd.com/TechDocs/56255_OSRR.pdf These processors report RAPL support in bit 14 of CPUID 0x80000007 EDX, and the following MSRs are present: 0xc0010299 (RAPL_PWR_UNIT), like Intel's RAPL_POWER_UNIT 0xc001029a (CORE_ENERGY_STAT), kind of like Intel's PP0_ENERGY_STATUS 0xc001029b (PKG_ENERGY_STAT), like Intel's PKG_ENERGY_STATUS A notable difference from the Intel implementation is that AMD reports the "Cores" energy usage separately for each core, rather than a per-package total. The code has been adjusted to handle either case in a generic way. I haven't yet enabled collection of package power, due to being unable to test it on multi-node systems (TR, EPYC). Signed-off-by: Calvin Walton Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 159 +++++++++++++++++++++++++++------- 1 file changed, 130 insertions(+), 29 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 0716abdb1bd9..538878b9deb8 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -44,6 +44,7 @@ #include #include #include +#include char *proc_stat = "/proc/stat"; FILE *outf; @@ -140,9 +141,21 @@ unsigned int first_counter_read = 1; #define RAPL_CORES_ENERGY_STATUS (1 << 9) /* 0x639 MSR_PP0_ENERGY_STATUS */ +#define RAPL_PER_CORE_ENERGY (1 << 10) + /* Indicates cores energy collection is per-core, + * not per-package. */ +#define RAPL_AMD_F17H (1 << 11) + /* 0xc0010299 MSR_RAPL_PWR_UNIT */ + /* 0xc001029a MSR_CORE_ENERGY_STAT */ + /* 0xc001029b MSR_PKG_ENERGY_STAT */ #define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT) #define TJMAX_DEFAULT 100 +/* MSRs that are not yet in the kernel-provided header. */ +#define MSR_RAPL_PWR_UNIT 0xc0010299 +#define MSR_CORE_ENERGY_STAT 0xc001029a +#define MSR_PKG_ENERGY_STAT 0xc001029b + #define MAX(a, b) ((a) > (b) ? (a) : (b)) /* @@ -186,6 +199,7 @@ struct core_data { unsigned long long c7; unsigned long long mc6_us; /* duplicate as per-core for now, even though per module */ unsigned int core_temp_c; + unsigned int core_energy; /* MSR_CORE_ENERGY_STAT */ unsigned int core_id; unsigned long long counter[MAX_ADDED_COUNTERS]; } *core_even, *core_odd; @@ -684,6 +698,14 @@ void print_header(char *delim) if (DO_BIC(BIC_CoreTmp)) outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : "")); + if (do_rapl && !rapl_joules) { + if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY)) + outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : "")); + } else if (do_rapl && rapl_joules) { + if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY)) + outp += sprintf(outp, "%sCor_J", (printed++ ? delim : "")); + } + for (mp = sys.cp; mp; mp = mp->next) { if (mp->format == FORMAT_RAW) { if (mp->width == 64) @@ -738,7 +760,7 @@ void print_header(char *delim) if (do_rapl && !rapl_joules) { if (DO_BIC(BIC_PkgWatt)) outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : "")); - if (DO_BIC(BIC_CorWatt)) + if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY)) outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : "")); if (DO_BIC(BIC_GFXWatt)) outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : "")); @@ -751,7 +773,7 @@ void print_header(char *delim) } else if (do_rapl && rapl_joules) { if (DO_BIC(BIC_Pkg_J)) outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : "")); - if (DO_BIC(BIC_Cor_J)) + if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY)) outp += sprintf(outp, "%sCor_J", (printed++ ? delim : "")); if (DO_BIC(BIC_GFX_J)) outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : "")); @@ -812,6 +834,7 @@ int dump_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "c6: %016llX\n", c->c6); outp += sprintf(outp, "c7: %016llX\n", c->c7); outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c); + outp += sprintf(outp, "Joules: %0X\n", c->core_energy); for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n", @@ -1045,6 +1068,20 @@ int format_counters(struct thread_data *t, struct core_data *c, } } + /* + * If measurement interval exceeds minimum RAPL Joule Counter range, + * indicate that results are suspect by printing "**" in fraction place. + */ + if (interval_float < rapl_joule_counter_range) + fmt8 = "%s%.2f"; + else + fmt8 = "%6.0f**"; + + if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY)) + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float); + if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY)) + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units); + /* print per-package data only for 1st core in package */ if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) goto done; @@ -1097,18 +1134,9 @@ int format_counters(struct thread_data *t, struct core_data *c, if (DO_BIC(BIC_SYS_LPI)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float); - /* - * If measurement interval exceeds minimum RAPL Joule Counter range, - * indicate that results are suspect by printing "**" in fraction place. - */ - if (interval_float < rapl_joule_counter_range) - fmt8 = "%s%.2f"; - else - fmt8 = "%6.0f**"; - if (DO_BIC(BIC_PkgWatt)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float); - if (DO_BIC(BIC_CorWatt)) + if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float); if (DO_BIC(BIC_GFXWatt)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float); @@ -1116,7 +1144,7 @@ int format_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units / interval_float); if (DO_BIC(BIC_Pkg_J)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units); - if (DO_BIC(BIC_Cor_J)) + if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units); if (DO_BIC(BIC_GFX_J)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units); @@ -1261,6 +1289,8 @@ delta_core(struct core_data *new, struct core_data *old) old->core_temp_c = new->core_temp_c; old->mc6_us = new->mc6_us - old->mc6_us; + DELTA_WRAP32(new->core_energy, old->core_energy); + for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) old->counter[i] = new->counter[i]; @@ -1403,6 +1433,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data c->c7 = 0; c->mc6_us = 0; c->core_temp_c = 0; + c->core_energy = 0; p->pkg_wtd_core_c0 = 0; p->pkg_any_core_c0 = 0; @@ -1485,6 +1516,8 @@ int sum_counters(struct thread_data *t, struct core_data *c, average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c); + average.cores.core_energy += c->core_energy; + for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) continue; @@ -1857,6 +1890,12 @@ retry: c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); } + if (do_rapl & RAPL_AMD_F17H) { + if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr)) + return -14; + c->core_energy = msr & 0xFFFFFFFF; + } + for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { if (get_mp(cpu, mp, &c->counter[i])) return -10; @@ -3739,7 +3778,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ -double get_tdp(unsigned int model) +double get_tdp_intel(unsigned int model) { unsigned long long msr; @@ -3756,6 +3795,16 @@ double get_tdp(unsigned int model) } } +double get_tdp_amd(unsigned int family) +{ + switch (family) { + case 0x17: + default: + /* This is the max stock TDP of HEDT/Server Fam17h chips */ + return 250.0; + } +} + /* * rapl_dram_energy_units_probe() * Energy units are either hard-coded, or come from RAPL Energy Unit MSR. @@ -3775,21 +3824,12 @@ rapl_dram_energy_units_probe(int model, double rapl_energy_units) } } - -/* - * rapl_probe() - * - * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units - */ -void rapl_probe(unsigned int family, unsigned int model) +void rapl_probe_intel(unsigned int family, unsigned int model) { unsigned long long msr; unsigned int time_unit; double tdp; - if (!genuine_intel) - return; - if (family != 6) return; @@ -3913,13 +3953,66 @@ void rapl_probe(unsigned int family, unsigned int model) rapl_time_units = 1.0 / (1 << (time_unit)); - tdp = get_tdp(model); + tdp = get_tdp_intel(model); rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; if (!quiet) fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); +} - return; +void rapl_probe_amd(unsigned int family, unsigned int model) +{ + unsigned long long msr; + unsigned int eax, ebx, ecx, edx; + unsigned int has_rapl = 0; + double tdp; + + if (max_extended_level >= 0x80000007) { + __cpuid(0x80000007, eax, ebx, ecx, edx); + /* RAPL (Fam 17h) */ + has_rapl = edx & (1 << 14); + } + + if (!has_rapl) + return; + + switch (family) { + case 0x17: /* Zen, Zen+ */ + do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY; + if (rapl_joules) + BIC_PRESENT(BIC_Cor_J); + else + BIC_PRESENT(BIC_CorWatt); + break; + default: + return; + } + + if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr)) + return; + + rapl_time_units = ldexp(1.0, -(msr >> 16 & 0xf)); + rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f)); + rapl_power_units = ldexp(1.0, -(msr & 0xf)); + + tdp = get_tdp_amd(model); + + rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; + if (!quiet) + fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); +} + +/* + * rapl_probe() + * + * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units + */ +void rapl_probe(unsigned int family, unsigned int model) +{ + if (genuine_intel) + rapl_probe_intel(family, model); + if (authentic_amd) + rapl_probe_amd(family, model); } void perf_limit_reasons_probe(unsigned int family, unsigned int model) @@ -4024,6 +4117,7 @@ void print_power_limit_msr(int cpu, unsigned long long msr, char *label) int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) { unsigned long long msr; + const char *msr_name; int cpu; if (!do_rapl) @@ -4039,10 +4133,17 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) return -1; } - if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr)) - return -1; + if (do_rapl & RAPL_AMD_F17H) { + msr_name = "MSR_RAPL_PWR_UNIT"; + if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr)) + return -1; + } else { + msr_name = "MSR_RAPL_POWER_UNIT"; + if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr)) + return -1; + } - fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr, + fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr, rapl_power_units, rapl_energy_units, rapl_time_units); if (do_rapl & RAPL_PKG_POWER_INFO) { -- cgit v1.2.3 From 3316f99a9f1b68c578c57e76792bd19da1c7d423 Mon Sep 17 00:00:00 2001 From: Calvin Walton Date: Fri, 17 Aug 2018 12:34:42 -0400 Subject: tools/power turbostat: Also read package power on AMD F17h (Zen) The package power can also be read from an MSR. It's not clear exactly what is included, and whether it's aggregated over all nodes or reported separately. It does look like this is reported separately per CCX (I get a single value on the Ryzen R7 1700), but it might be reported separately per- die (node?) on larger processors. If that's the case, it would have to be recorded per node and aggregated for the socket. Note that although Zen has these MSRs reporting power, it looks like the actual RAPL configuration (power limits, configured TDP) is done through PCI configuration space. I have not yet found any public documentation for this. Signed-off-by: Calvin Walton Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 538878b9deb8..ee9aaeef662e 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1985,6 +1985,11 @@ retry: return -16; p->rapl_dram_perf_status = msr & 0xFFFFFFFF; } + if (do_rapl & RAPL_AMD_F17H) { + if (get_msr(cpu, MSR_PKG_ENERGY_STAT, &msr)) + return -13; + p->energy_pkg = msr & 0xFFFFFFFF; + } if (DO_BIC(BIC_PkgTmp)) { if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) return -17; @@ -3979,10 +3984,13 @@ void rapl_probe_amd(unsigned int family, unsigned int model) switch (family) { case 0x17: /* Zen, Zen+ */ do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY; - if (rapl_joules) + if (rapl_joules) { + BIC_PRESENT(BIC_Pkg_J); BIC_PRESENT(BIC_Cor_J); - else + } else { + BIC_PRESENT(BIC_PkgWatt); BIC_PRESENT(BIC_CorWatt); + } break; default: return; -- cgit v1.2.3 From 8173c336989c1a12290cd023969df2775b2df34d Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 20 Mar 2019 23:01:03 -0400 Subject: tools/power turbostat: Add checks for failure of fgets() and fscanf() Most calls to fgets() and fscanf() are followed by error checks. Add an exit-on-error in the remaining cases. Signed-off-by: Ben Hutchings Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index ee9aaeef662e..9817abd215b9 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2643,7 +2643,8 @@ int get_thread_siblings(struct cpu_topology *thiscpu) filep = fopen_or_die(path, "r"); do { offset -= BITMASK_SIZE; - fscanf(filep, "%lx%c", &map, &character); + if (fscanf(filep, "%lx%c", &map, &character) != 2) + err(1, "%s: failed to parse file", path); for (shift = 0; shift < BITMASK_SIZE; shift++) { if ((map >> shift) & 0x1) { so = shift + offset; @@ -3475,14 +3476,14 @@ dump_sysfs_cstate_config(void) input = fopen(path, "r"); if (input == NULL) continue; - fgets(name_buf, sizeof(name_buf), input); + if (!fgets(name_buf, sizeof(name_buf), input)) + err(1, "%s: failed to read file", path); /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ sp = strchr(name_buf, '-'); if (!sp) sp = strchrnul(name_buf, '\n'); *sp = '\0'; - fclose(input); sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc", @@ -3490,7 +3491,8 @@ dump_sysfs_cstate_config(void) input = fopen(path, "r"); if (input == NULL) continue; - fgets(desc, sizeof(desc), input); + if (!fgets(desc, sizeof(desc), input)) + err(1, "%s: failed to read file", path); fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc); fclose(input); @@ -3512,7 +3514,8 @@ dump_sysfs_pstate_config(void) fprintf(outf, "NSFOD %s\n", path); return; } - fgets(driver_buf, sizeof(driver_buf), input); + if (!fgets(driver_buf, sizeof(driver_buf), input)) + err(1, "%s: failed to read file", path); fclose(input); sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", @@ -3522,7 +3525,8 @@ dump_sysfs_pstate_config(void) fprintf(outf, "NSFOD %s\n", path); return; } - fgets(governor_buf, sizeof(governor_buf), input); + if (!fgets(governor_buf, sizeof(governor_buf), input)) + err(1, "%s: failed to read file", path); fclose(input); fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf); @@ -3531,7 +3535,8 @@ dump_sysfs_pstate_config(void) sprintf(path, "/sys/devices/system/cpu/cpufreq/boost"); input = fopen(path, "r"); if (input != NULL) { - fscanf(input, "%d", &turbo); + if (fscanf(input, "%d", &turbo) != 1) + err(1, "%s: failed to parse number from file", path); fprintf(outf, "cpufreq boost: %d\n", turbo); fclose(input); } @@ -3539,7 +3544,8 @@ dump_sysfs_pstate_config(void) sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo"); input = fopen(path, "r"); if (input != NULL) { - fscanf(input, "%d", &turbo); + if (fscanf(input, "%d", &turbo) != 1) + err(1, "%s: failed to parse number from file", path); fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo); fclose(input); } @@ -5464,7 +5470,8 @@ void probe_sysfs(void) input = fopen(path, "r"); if (input == NULL) continue; - fgets(name_buf, sizeof(name_buf), input); + if (!fgets(name_buf, sizeof(name_buf), input)) + err(1, "%s: failed to read file", path); /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ sp = strchr(name_buf, '-'); @@ -5491,7 +5498,8 @@ void probe_sysfs(void) input = fopen(path, "r"); if (input == NULL) continue; - fgets(name_buf, sizeof(name_buf), input); + if (!fgets(name_buf, sizeof(name_buf), input)) + err(1, "%s: failed to read file", path); /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ sp = strchr(name_buf, '-'); if (!sp) -- cgit v1.2.3 From 5ea7647b333f3580697edaaf2b17a2f6d29a82f1 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Tue, 25 Sep 2018 08:59:26 -0400 Subject: tools/power turbostat: Warn on bad ACPI LPIT data On some systems /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us or /sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us return a file error because of bad ACPI LPIT data from a misconfigured BIOS. turbostat interprets this failure as a fatal error and outputs turbostat: CPU LPI: No data available If the ACPI LPIT sysfs files return an error output a warning instead of a fatal error, disable the ACPI LPIT evaluation code, and continue. Signed-off-by: Prarit Bhargava Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 9817abd215b9..442af7892402 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2921,8 +2921,11 @@ int snapshot_cpu_lpi_us(void) fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r"); retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us); - if (retval != 1) - err(1, "CPU LPI"); + if (retval != 1) { + fprintf(stderr, "Disabling Low Power Idle CPU output\n"); + BIC_NOT_PRESENT(BIC_CPU_LPI); + return -1; + } fclose(fp); @@ -2944,9 +2947,11 @@ int snapshot_sys_lpi_us(void) fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", "r"); retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us); - if (retval != 1) - err(1, "SYS LPI"); - + if (retval != 1) { + fprintf(stderr, "Disabling Low Power Idle System output\n"); + BIC_NOT_PRESENT(BIC_SYS_LPI); + return -1; + } fclose(fp); return 0; -- cgit v1.2.3 From 0f71d089c912769251c992b8f7dcd508a472fe10 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 20 Mar 2019 23:23:25 -0400 Subject: tools/power turbostat: update version number Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 442af7892402..8d176b10daec 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -5278,7 +5278,7 @@ int get_and_dump_counters(void) } void print_version() { - fprintf(outf, "turbostat version 18.07.27" + fprintf(outf, "turbostat version 19.03.20" " - Len Brown \n"); } -- cgit v1.2.3 From 5483844c3fc18474de29f5d6733003526e0a9f78 Mon Sep 17 00:00:00 2001 From: Jeremy Sowden Date: Tue, 19 Mar 2019 15:39:20 +0000 Subject: vti4: ipip tunnel deregistration fixes. If tunnel registration failed during module initialization, the module would fail to deregister the IPPROTO_COMP protocol and would attempt to deregister the tunnel. The tunnel was not deregistered during module-exit. Fixes: dd9ee3444014e ("vti4: Fix a ipip packet processing bug in 'IPCOMP' virtual tunnel") Signed-off-by: Jeremy Sowden Signed-off-by: Steffen Klassert --- net/ipv4/ip_vti.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 68a21bf75dd0..b6235ca09fa5 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -659,9 +659,9 @@ static int __init vti_init(void) return err; rtnl_link_failed: - xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); -xfrm_tunnel_failed: xfrm4_tunnel_deregister(&ipip_handler, AF_INET); +xfrm_tunnel_failed: + xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); xfrm_proto_comp_failed: xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); xfrm_proto_ah_failed: @@ -676,6 +676,7 @@ pernet_dev_failed: static void __exit vti_fini(void) { rtnl_link_unregister(&vti_link_ops); + xfrm4_tunnel_deregister(&ipip_handler, AF_INET); xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP); -- cgit v1.2.3 From 01ce31c57b3f07c91c9d45bbaf126124cce83a5d Mon Sep 17 00:00:00 2001 From: Jeremy Sowden Date: Tue, 19 Mar 2019 15:39:21 +0000 Subject: vti4: removed duplicate log message. Removed info log-message if ipip tunnel registration fails during module-initialization: it adds nothing to the error message that is written on all failures. Fixes: dd9ee3444014e ("vti4: Fix a ipip packet processing bug in 'IPCOMP' virtual tunnel") Signed-off-by: Jeremy Sowden Signed-off-by: Steffen Klassert --- net/ipv4/ip_vti.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index b6235ca09fa5..35d8346742e2 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -646,10 +646,8 @@ static int __init vti_init(void) msg = "ipip tunnel"; err = xfrm4_tunnel_register(&ipip_handler, AF_INET); - if (err < 0) { - pr_info("%s: cant't register tunnel\n",__func__); + if (err < 0) goto xfrm_tunnel_failed; - } msg = "netlink interface"; err = rtnl_link_register(&vti_link_ops); -- cgit v1.2.3 From f0f2338a9cfaf71db895fa989ea7234e8a9b471d Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Wed, 20 Mar 2019 22:41:56 +0100 Subject: ASoC: cs4270: Set auto-increment bit for register writes The CS4270 does not by default increment the register address on consecutive writes. During normal operation it doesn't matter as all register accesses are done individually. At resume time after suspend, however, the regcache code gathers the biggest possible block of registers to sync and sends them one on one go. To fix this, set the INCR bit in all cases. Signed-off-by: Daniel Mack Signed-off-by: Mark Brown --- sound/soc/codecs/cs4270.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/cs4270.c b/sound/soc/codecs/cs4270.c index 33d74f163bd7..793a14d58667 100644 --- a/sound/soc/codecs/cs4270.c +++ b/sound/soc/codecs/cs4270.c @@ -642,6 +642,7 @@ static const struct regmap_config cs4270_regmap = { .reg_defaults = cs4270_reg_defaults, .num_reg_defaults = ARRAY_SIZE(cs4270_reg_defaults), .cache_type = REGCACHE_RBTREE, + .write_flag_mask = CS4270_I2C_INCR, .readable_reg = cs4270_reg_is_readable, .volatile_reg = cs4270_reg_is_volatile, -- cgit v1.2.3 From 53f67a78663811968f426d480bc55887d787bd94 Mon Sep 17 00:00:00 2001 From: "S.j. Wang" Date: Sat, 2 Mar 2019 05:52:19 +0000 Subject: ASoC: fsl_asrc: add constraint for the asrc of older version There is a constraint for the channel number setting on the asrc of older version (e.g. imx35), the channel number should be even, odd number isn't valid. So add this constraint when the asrc of older version is used. Acked-by: Nicolin Chen Signed-off-by: Shengjiu Wang Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_asrc.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sound/soc/fsl/fsl_asrc.c b/sound/soc/fsl/fsl_asrc.c index 528e8b108422..0b937924d2e4 100644 --- a/sound/soc/fsl/fsl_asrc.c +++ b/sound/soc/fsl/fsl_asrc.c @@ -445,6 +445,19 @@ struct dma_chan *fsl_asrc_get_dma_channel(struct fsl_asrc_pair *pair, bool dir) } EXPORT_SYMBOL_GPL(fsl_asrc_get_dma_channel); +static int fsl_asrc_dai_startup(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) +{ + struct fsl_asrc *asrc_priv = snd_soc_dai_get_drvdata(dai); + + /* Odd channel number is not valid for older ASRC (channel_bits==3) */ + if (asrc_priv->channel_bits == 3) + snd_pcm_hw_constraint_step(substream->runtime, 0, + SNDRV_PCM_HW_PARAM_CHANNELS, 2); + + return 0; +} + static int fsl_asrc_dai_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params, struct snd_soc_dai *dai) @@ -539,6 +552,7 @@ static int fsl_asrc_dai_trigger(struct snd_pcm_substream *substream, int cmd, } static const struct snd_soc_dai_ops fsl_asrc_dai_ops = { + .startup = fsl_asrc_dai_startup, .hw_params = fsl_asrc_dai_hw_params, .hw_free = fsl_asrc_dai_hw_free, .trigger = fsl_asrc_dai_trigger, -- cgit v1.2.3 From 0ff4e8c61b794a4bf6c854ab071a1abaaa80f358 Mon Sep 17 00:00:00 2001 From: "S.j. Wang" Date: Wed, 27 Feb 2019 06:31:12 +0000 Subject: ASoC: fsl_esai: fix channel swap issue when stream starts There is very low possibility ( < 0.1% ) that channel swap happened in beginning when multi output/input pin is enabled. The issue is that hardware can't send data to correct pin in the beginning with the normal enable flow. This is hardware issue, but there is no errata, the workaround flow is that: Each time playback/recording, firstly clear the xSMA/xSMB, then enable TE/RE, then enable xSMB and xSMA (xSMB must be enabled before xSMA). Which is to use the xSMA as the trigger start register, previously the xCR_TE or xCR_RE is the bit for starting. Fixes commit 43d24e76b698 ("ASoC: fsl_esai: Add ESAI CPU DAI driver") Cc: Reviewed-by: Fabio Estevam Acked-by: Nicolin Chen Signed-off-by: Shengjiu Wang Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_esai.c | 47 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/sound/soc/fsl/fsl_esai.c b/sound/soc/fsl/fsl_esai.c index afe67c865330..3623aa9a6f2e 100644 --- a/sound/soc/fsl/fsl_esai.c +++ b/sound/soc/fsl/fsl_esai.c @@ -54,6 +54,8 @@ struct fsl_esai { u32 fifo_depth; u32 slot_width; u32 slots; + u32 tx_mask; + u32 rx_mask; u32 hck_rate[2]; u32 sck_rate[2]; bool hck_dir[2]; @@ -361,21 +363,13 @@ static int fsl_esai_set_dai_tdm_slot(struct snd_soc_dai *dai, u32 tx_mask, regmap_update_bits(esai_priv->regmap, REG_ESAI_TCCR, ESAI_xCCR_xDC_MASK, ESAI_xCCR_xDC(slots)); - regmap_update_bits(esai_priv->regmap, REG_ESAI_TSMA, - ESAI_xSMA_xS_MASK, ESAI_xSMA_xS(tx_mask)); - regmap_update_bits(esai_priv->regmap, REG_ESAI_TSMB, - ESAI_xSMB_xS_MASK, ESAI_xSMB_xS(tx_mask)); - regmap_update_bits(esai_priv->regmap, REG_ESAI_RCCR, ESAI_xCCR_xDC_MASK, ESAI_xCCR_xDC(slots)); - regmap_update_bits(esai_priv->regmap, REG_ESAI_RSMA, - ESAI_xSMA_xS_MASK, ESAI_xSMA_xS(rx_mask)); - regmap_update_bits(esai_priv->regmap, REG_ESAI_RSMB, - ESAI_xSMB_xS_MASK, ESAI_xSMB_xS(rx_mask)); - esai_priv->slot_width = slot_width; esai_priv->slots = slots; + esai_priv->tx_mask = tx_mask; + esai_priv->rx_mask = rx_mask; return 0; } @@ -596,6 +590,7 @@ static int fsl_esai_trigger(struct snd_pcm_substream *substream, int cmd, bool tx = substream->stream == SNDRV_PCM_STREAM_PLAYBACK; u8 i, channels = substream->runtime->channels; u32 pins = DIV_ROUND_UP(channels, esai_priv->slots); + u32 mask; switch (cmd) { case SNDRV_PCM_TRIGGER_START: @@ -608,15 +603,38 @@ static int fsl_esai_trigger(struct snd_pcm_substream *substream, int cmd, for (i = 0; tx && i < channels; i++) regmap_write(esai_priv->regmap, REG_ESAI_ETDR, 0x0); + /* + * When set the TE/RE in the end of enablement flow, there + * will be channel swap issue for multi data line case. + * In order to workaround this issue, we switch the bit + * enablement sequence to below sequence + * 1) clear the xSMB & xSMA: which is done in probe and + * stop state. + * 2) set TE/RE + * 3) set xSMB + * 4) set xSMA: xSMA is the last one in this flow, which + * will trigger esai to start. + */ regmap_update_bits(esai_priv->regmap, REG_ESAI_xCR(tx), tx ? ESAI_xCR_TE_MASK : ESAI_xCR_RE_MASK, tx ? ESAI_xCR_TE(pins) : ESAI_xCR_RE(pins)); + mask = tx ? esai_priv->tx_mask : esai_priv->rx_mask; + + regmap_update_bits(esai_priv->regmap, REG_ESAI_xSMB(tx), + ESAI_xSMB_xS_MASK, ESAI_xSMB_xS(mask)); + regmap_update_bits(esai_priv->regmap, REG_ESAI_xSMA(tx), + ESAI_xSMA_xS_MASK, ESAI_xSMA_xS(mask)); + break; case SNDRV_PCM_TRIGGER_SUSPEND: case SNDRV_PCM_TRIGGER_STOP: case SNDRV_PCM_TRIGGER_PAUSE_PUSH: regmap_update_bits(esai_priv->regmap, REG_ESAI_xCR(tx), tx ? ESAI_xCR_TE_MASK : ESAI_xCR_RE_MASK, 0); + regmap_update_bits(esai_priv->regmap, REG_ESAI_xSMA(tx), + ESAI_xSMA_xS_MASK, 0); + regmap_update_bits(esai_priv->regmap, REG_ESAI_xSMB(tx), + ESAI_xSMB_xS_MASK, 0); /* Disable and reset FIFO */ regmap_update_bits(esai_priv->regmap, REG_ESAI_xFCR(tx), @@ -906,6 +924,15 @@ static int fsl_esai_probe(struct platform_device *pdev) return ret; } + esai_priv->tx_mask = 0xFFFFFFFF; + esai_priv->rx_mask = 0xFFFFFFFF; + + /* Clear the TSMA, TSMB, RSMA, RSMB */ + regmap_write(esai_priv->regmap, REG_ESAI_TSMA, 0); + regmap_write(esai_priv->regmap, REG_ESAI_TSMB, 0); + regmap_write(esai_priv->regmap, REG_ESAI_RSMA, 0); + regmap_write(esai_priv->regmap, REG_ESAI_RSMB, 0); + ret = devm_snd_soc_register_component(&pdev->dev, &fsl_esai_component, &fsl_esai_dai, 1); if (ret) { -- cgit v1.2.3 From 8efd6365417a044db03009724ecc1a9521524913 Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Wed, 13 Mar 2019 17:28:37 -0500 Subject: arm64: dts: stratix10: add the sysmgr-syscon property from the gmac's The gmac ethernet driver uses the "altr,sysmgr-syscon" property to configure phy settings for the gmac controller. Add the "altr,sysmgr-syscon" property to all gmac nodes. This patch fixes: [ 0.917530] socfpga-dwmac ff800000.ethernet: No sysmgr-syscon node found [ 0.924209] socfpga-dwmac ff800000.ethernet: Unable to parse OF data Cc: stable@vger.kernel.org Reported-by: Ley Foon Tan Signed-off-by: Dinh Nguyen --- arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi index 7c649f6b14cb..cd7c76e58b09 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi @@ -162,6 +162,7 @@ rx-fifo-depth = <16384>; snps,multicast-filter-bins = <256>; iommus = <&smmu 1>; + altr,sysmgr-syscon = <&sysmgr 0x44 0>; status = "disabled"; }; @@ -179,6 +180,7 @@ rx-fifo-depth = <16384>; snps,multicast-filter-bins = <256>; iommus = <&smmu 2>; + altr,sysmgr-syscon = <&sysmgr 0x48 0>; status = "disabled"; }; @@ -196,6 +198,7 @@ rx-fifo-depth = <16384>; snps,multicast-filter-bins = <256>; iommus = <&smmu 3>; + altr,sysmgr-syscon = <&sysmgr 0x4c 0>; status = "disabled"; }; -- cgit v1.2.3 From 475c6bde7228e2d624153626941f290a314e4672 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 6 Feb 2019 13:54:17 +0100 Subject: iwlwifi: mvm: fix TX crypto on 22560+ devices In the old days, we could transmit with HW crypto with an arbitrary key by filling it into TX_CMD. This was broken first with the advent of CCMP/GCMP-256 keys which don't fit there. This was broken *again* with the newer TX_CMD format on 22560+, where we simply cannot pass key material anymore. However, we forgot to update all the cases when we get a key from mac80211 and don't program it into the hardware but still return 0 for HW crypto on TX. In AP mode with WEP, we tried to fix this by programming the keys separately for each station later, but this ultimately turns out to be buggy, for example now it leaks memory when we have more than one WEP key. Fix this by simply using only SW crypto for WEP in newer devices by returning -EOPNOTSUPP instead of trying to program WEP keys later. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 64 ++++++----------------- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 1 - drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 41 +-------------- drivers/net/wireless/intel/iwlwifi/mvm/sta.h | 7 +-- 4 files changed, 19 insertions(+), 94 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 3a92c09d4692..eeaeb8475666 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -2714,9 +2714,6 @@ static void iwl_mvm_stop_ap_ibss(struct ieee80211_hw *hw, iwl_mvm_mac_ctxt_remove(mvm, vif); - kfree(mvmvif->ap_wep_key); - mvmvif->ap_wep_key = NULL; - mutex_unlock(&mvm->mutex); } @@ -3183,24 +3180,7 @@ static int iwl_mvm_mac_sta_state(struct ieee80211_hw *hw, ret = iwl_mvm_update_sta(mvm, vif, sta); } else if (old_state == IEEE80211_STA_ASSOC && new_state == IEEE80211_STA_AUTHORIZED) { - /* if wep is used, need to set the key for the station now */ - if (vif->type == NL80211_IFTYPE_AP && mvmvif->ap_wep_key) { - mvm_sta->wep_key = - kmemdup(mvmvif->ap_wep_key, - sizeof(*mvmvif->ap_wep_key) + - mvmvif->ap_wep_key->keylen, - GFP_KERNEL); - if (!mvm_sta->wep_key) { - ret = -ENOMEM; - goto out_unlock; - } - - ret = iwl_mvm_set_sta_key(mvm, vif, sta, - mvm_sta->wep_key, - STA_KEY_IDX_INVALID); - } else { - ret = 0; - } + ret = 0; /* we don't support TDLS during DCM */ if (iwl_mvm_phy_ctx_count(mvm) > 1) @@ -3242,17 +3222,6 @@ static int iwl_mvm_mac_sta_state(struct ieee80211_hw *hw, NL80211_TDLS_DISABLE_LINK); } - /* Remove STA key if this is an AP using WEP */ - if (vif->type == NL80211_IFTYPE_AP && mvmvif->ap_wep_key) { - int rm_ret = iwl_mvm_remove_sta_key(mvm, vif, sta, - mvm_sta->wep_key); - - if (!ret) - ret = rm_ret; - kfree(mvm_sta->wep_key); - mvm_sta->wep_key = NULL; - } - if (unlikely(ret && test_bit(IWL_MVM_STATUS_HW_RESTART_REQUESTED, &mvm->status))) @@ -3439,20 +3408,12 @@ static int iwl_mvm_mac_set_key(struct ieee80211_hw *hw, break; case WLAN_CIPHER_SUITE_WEP40: case WLAN_CIPHER_SUITE_WEP104: - if (vif->type == NL80211_IFTYPE_AP) { - struct iwl_mvm_vif *mvmvif = - iwl_mvm_vif_from_mac80211(vif); - - mvmvif->ap_wep_key = kmemdup(key, - sizeof(*key) + key->keylen, - GFP_KERNEL); - if (!mvmvif->ap_wep_key) - return -ENOMEM; - } - - if (vif->type != NL80211_IFTYPE_STATION) - return 0; - break; + if (vif->type == NL80211_IFTYPE_STATION) + break; + if (iwl_mvm_has_new_tx_api(mvm)) + return -EOPNOTSUPP; + /* support HW crypto on TX */ + return 0; default: /* currently FW supports only one optional cipher scheme */ if (hw->n_cipher_schemes && @@ -3540,12 +3501,17 @@ static int iwl_mvm_mac_set_key(struct ieee80211_hw *hw, ret = iwl_mvm_set_sta_key(mvm, vif, sta, key, key_offset); if (ret) { IWL_WARN(mvm, "set key failed\n"); + key->hw_key_idx = STA_KEY_IDX_INVALID; /* * can't add key for RX, but we don't need it - * in the device for TX so still return 0 + * in the device for TX so still return 0, + * unless we have new TX API where we cannot + * put key material into the TX_CMD */ - key->hw_key_idx = STA_KEY_IDX_INVALID; - ret = 0; + if (iwl_mvm_has_new_tx_api(mvm)) + ret = -EOPNOTSUPP; + else + ret = 0; } break; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index bca6f6b536d9..a50dc53df086 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -498,7 +498,6 @@ struct iwl_mvm_vif { netdev_features_t features; struct iwl_probe_resp_data __rcu *probe_resp_data; - struct ieee80211_key_conf *ap_wep_key; }; static inline struct iwl_mvm_vif * diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 498c315291cf..db26f0041a81 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -8,7 +8,7 @@ * Copyright(c) 2012 - 2015 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH - * Copyright(c) 2018 Intel Corporation + * Copyright(c) 2018 - 2019 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -31,7 +31,7 @@ * Copyright(c) 2012 - 2015 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH - * Copyright(c) 2018 Intel Corporation + * Copyright(c) 2018 - 2019 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -2333,21 +2333,6 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) iwl_mvm_enable_txq(mvm, NULL, mvmvif->cab_queue, 0, &cfg, timeout); - if (mvmvif->ap_wep_key) { - u8 key_offset = iwl_mvm_set_fw_key_idx(mvm); - - __set_bit(key_offset, mvm->fw_key_table); - - if (key_offset == STA_KEY_IDX_INVALID) - return -ENOSPC; - - ret = iwl_mvm_send_sta_key(mvm, mvmvif->mcast_sta.sta_id, - mvmvif->ap_wep_key, true, 0, NULL, 0, - key_offset, 0); - if (ret) - return ret; - } - return 0; } @@ -2419,28 +2404,6 @@ int iwl_mvm_rm_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) iwl_mvm_disable_txq(mvm, NULL, mvmvif->cab_queue, 0, 0); - if (mvmvif->ap_wep_key) { - int i; - - if (!__test_and_clear_bit(mvmvif->ap_wep_key->hw_key_idx, - mvm->fw_key_table)) { - IWL_ERR(mvm, "offset %d not used in fw key table.\n", - mvmvif->ap_wep_key->hw_key_idx); - return -ENOENT; - } - - /* track which key was deleted last */ - for (i = 0; i < STA_KEY_MAX_NUM; i++) { - if (mvm->fw_key_deleted[i] < U8_MAX) - mvm->fw_key_deleted[i]++; - } - mvm->fw_key_deleted[mvmvif->ap_wep_key->hw_key_idx] = 0; - ret = __iwl_mvm_remove_sta_key(mvm, mvmvif->mcast_sta.sta_id, - mvmvif->ap_wep_key, true); - if (ret) - return ret; - } - ret = iwl_mvm_rm_sta_common(mvm, mvmvif->mcast_sta.sta_id); if (ret) IWL_WARN(mvm, "Failed sending remove station\n"); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h index 79700c7310a1..b4d4071b865d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h @@ -8,7 +8,7 @@ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH * Copyright(c) 2015 - 2016 Intel Deutschland GmbH - * Copyright(c) 2018 Intel Corporation + * Copyright(c) 2018 - 2019 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -31,7 +31,7 @@ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH * Copyright(c) 2015 - 2016 Intel Deutschland GmbH - * Copyright(c) 2018 Intel Corporation + * Copyright(c) 2018 - 2019 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -394,7 +394,6 @@ struct iwl_mvm_rxq_dup_data { * the BA window. To be used for UAPSD only. * @ptk_pn: per-queue PTK PN data structures * @dup_data: per queue duplicate packet detection data - * @wep_key: used in AP mode. Is a duplicate of the WEP key. * @deferred_traffic_tid_map: indication bitmap of deferred traffic per-TID * @tx_ant: the index of the antenna to use for data tx to this station. Only * used during connection establishment (e.g. for the 4 way handshake @@ -426,8 +425,6 @@ struct iwl_mvm_sta { struct iwl_mvm_key_pn __rcu *ptk_pn[4]; struct iwl_mvm_rxq_dup_data *dup_data; - struct ieee80211_key_conf *wep_key; - u8 reserved_queue; /* Temporary, until the new TLC will control the Tx protection */ -- cgit v1.2.3 From d1967ce641772dd5e27b2a7a97fd625700cc589c Mon Sep 17 00:00:00 2001 From: Shahar S Matityahu Date: Sun, 10 Feb 2019 10:39:59 +0200 Subject: iwlwifi: add sync_nmi to trans ops Allow modules from outside pcie to call sync_nmi. Signed-off-by: Shahar S Matityahu Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-trans.h | 7 +++++++ drivers/net/wireless/intel/iwlwifi/pcie/internal.h | 2 +- drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 5 +++-- drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c | 2 +- drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 2 +- 5 files changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h index bbebbf3efd57..fa750711d889 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h @@ -618,6 +618,7 @@ struct iwl_trans_ops { struct iwl_trans_dump_data *(*dump_data)(struct iwl_trans *trans, u32 dump_mask); void (*debugfs_cleanup)(struct iwl_trans *trans); + void (*sync_nmi)(struct iwl_trans *trans); }; /** @@ -1245,6 +1246,12 @@ static inline void iwl_trans_fw_error(struct iwl_trans *trans) } +static inline void iwl_trans_sync_nmi(struct iwl_trans *trans) +{ + if (trans->ops->sync_nmi) + trans->ops->sync_nmi(trans); +} + /***************************************************** * transport helper functions *****************************************************/ diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index bf8b61a476c5..59213164f35e 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -1043,7 +1043,7 @@ static inline bool iwl_pcie_dbg_on(struct iwl_trans *trans) void iwl_trans_pcie_rf_kill(struct iwl_trans *trans, bool state); void iwl_trans_pcie_dump_regs(struct iwl_trans *trans); -void iwl_trans_sync_nmi(struct iwl_trans *trans); +void iwl_trans_pcie_sync_nmi(struct iwl_trans *trans); #ifdef CONFIG_IWLWIFI_DEBUGFS int iwl_trans_pcie_dbgfs_register(struct iwl_trans *trans); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index fe8269d023de..28d6ae8c9336 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -3318,7 +3318,8 @@ static void iwl_trans_pcie_resume(struct iwl_trans *trans) .unref = iwl_trans_pcie_unref, \ .dump_data = iwl_trans_pcie_dump_data, \ .d3_suspend = iwl_trans_pcie_d3_suspend, \ - .d3_resume = iwl_trans_pcie_d3_resume + .d3_resume = iwl_trans_pcie_d3_resume, \ + .sync_nmi = iwl_trans_pcie_sync_nmi #ifdef CONFIG_PM_SLEEP #define IWL_TRANS_PM_OPS \ @@ -3637,7 +3638,7 @@ out_no_pci: return ERR_PTR(ret); } -void iwl_trans_sync_nmi(struct iwl_trans *trans) +void iwl_trans_pcie_sync_nmi(struct iwl_trans *trans) { unsigned long timeout = jiffies + IWL_TRANS_NMI_TIMEOUT; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c index 88530d9f4a54..38d110338987 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c @@ -965,7 +965,7 @@ static int iwl_pcie_gen2_send_hcmd_sync(struct iwl_trans *trans, cmd_str); ret = -ETIMEDOUT; - iwl_trans_sync_nmi(trans); + iwl_trans_pcie_sync_nmi(trans); goto cancel; } diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index 9fbd37d23e85..7be73e2c4681 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -1960,7 +1960,7 @@ static int iwl_pcie_send_hcmd_sync(struct iwl_trans *trans, iwl_get_cmd_string(trans, cmd->id)); ret = -ETIMEDOUT; - iwl_trans_sync_nmi(trans); + iwl_trans_pcie_sync_nmi(trans); goto cancel; } -- cgit v1.2.3 From 8625794e363946d153c5bc57ed30ab7616a9995a Mon Sep 17 00:00:00 2001 From: Shahar S Matityahu Date: Sun, 10 Feb 2019 09:37:22 +0200 Subject: iwlwifi: dbg_ini: in case of region dump failure set memory to 0 In case the driver fails to dump a memory region, and this is the last region, then partial region would be extracted. Solve this by setting the data to zero in case of failure. This will cause dump to be a list of consecutive successful memory regions and trailing zeros with no partial memories extracted. Signed-off-by: Shahar S Matityahu Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index f119c49cd39c..da5d9d79c372 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -1614,6 +1614,7 @@ iwl_dump_ini_mem(struct iwl_fw_runtime *fwrt, if (!range) { IWL_ERR(fwrt, "Failed to fill region header: id=%d, type=%d\n", le32_to_cpu(reg->region_id), type); + memset(*data, 0, le32_to_cpu((*data)->len)); return; } @@ -1623,6 +1624,7 @@ iwl_dump_ini_mem(struct iwl_fw_runtime *fwrt, if (range_size < 0) { IWL_ERR(fwrt, "Failed to dump region: id=%d, type=%d\n", le32_to_cpu(reg->region_id), type); + memset(*data, 0, le32_to_cpu((*data)->len)); return; } range = range + range_size; -- cgit v1.2.3 From b05d57c9b647b77edf9ac4550c493cf77c3923c7 Mon Sep 17 00:00:00 2001 From: Shahar S Matityahu Date: Tue, 12 Feb 2019 09:56:49 +0200 Subject: iwlwifi: dbg_ini: fix bad dump size calculation The driver initiates the size value with the size of the struct and then adds the size of the data and checks if the size is zero so size can not be equal to zero. Solve this by getting the data size, check that it is not equal to zero and only then add the struct size. Signed-off-by: Shahar S Matityahu Fixes: 7a14c23dcdee ("iwlwifi: dbg: dump data according to the new ini TLVs") Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index da5d9d79c372..b99d38b37014 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -1809,12 +1809,12 @@ _iwl_fw_error_ini_dump(struct iwl_fw_runtime *fwrt, trigger = fwrt->dump.active_trigs[id].trig; - size = sizeof(*dump_file); - size += iwl_fw_ini_get_trigger_len(fwrt, trigger); - + size = iwl_fw_ini_get_trigger_len(fwrt, trigger); if (!size) return NULL; + size += sizeof(*dump_file); + dump_file = vzalloc(size); if (!dump_file) return NULL; -- cgit v1.2.3 From 07d35b4270efd08e27cdad9a8d5fa31158cfc4e8 Mon Sep 17 00:00:00 2001 From: Shahar S Matityahu Date: Sun, 10 Feb 2019 10:42:16 +0200 Subject: iwlwifi: use sync nmi in case of init flow failure In case of alive interrupt timeout or any failure in the init flow the driver generates FW nmi. The driver assumes that the nmi will generate SW interrupt. This assumption does not hold and leads to faulty behavior in the recovery flow. Solve this by using sync nmi, this way, even if the driver does not receive SW interrupt, it still starts the recovery flow. Also remove the wait queue from iwl_fwrt_stop_device since the driver is handling the SW interrupt synchronously. Signed-off-by: Shahar S Matityahu Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 26 +++----------------------- drivers/net/wireless/intel/iwlwifi/fw/init.c | 1 - drivers/net/wireless/intel/iwlwifi/iwl-trans.h | 7 ------- 3 files changed, 3 insertions(+), 31 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index b99d38b37014..d7380016f1c0 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -1944,14 +1944,10 @@ int iwl_fw_dbg_error_collect(struct iwl_fw_runtime *fwrt, iwl_dump_error_desc->len = 0; ret = iwl_fw_dbg_collect_desc(fwrt, iwl_dump_error_desc, false, 0); - if (ret) { + if (ret) kfree(iwl_dump_error_desc); - } else { - set_bit(STATUS_FW_WAIT_DUMP, &fwrt->trans->status); - - /* trigger nmi to halt the fw */ - iwl_force_nmi(fwrt->trans); - } + else + iwl_trans_sync_nmi(fwrt->trans); return ret; } @@ -2491,22 +2487,6 @@ IWL_EXPORT_SYMBOL(iwl_fw_dbg_apply_point); void iwl_fwrt_stop_device(struct iwl_fw_runtime *fwrt) { - /* if the wait event timeout elapses instead of wake up then - * the driver did not receive NMI interrupt and can not assume the FW - * is halted - */ - int ret = wait_event_timeout(fwrt->trans->fw_halt_waitq, - !test_bit(STATUS_FW_WAIT_DUMP, - &fwrt->trans->status), - msecs_to_jiffies(2000)); - if (!ret) { - /* failed to receive NMI interrupt, assuming the FW is stuck */ - set_bit(STATUS_FW_ERROR, &fwrt->trans->status); - - clear_bit(STATUS_FW_WAIT_DUMP, &fwrt->trans->status); - } - - /* Assuming the op mode mutex is held at this point */ iwl_fw_dbg_collect_sync(fwrt); iwl_trans_stop_device(fwrt->trans); diff --git a/drivers/net/wireless/intel/iwlwifi/fw/init.c b/drivers/net/wireless/intel/iwlwifi/fw/init.c index 7adf4e4e841a..12310e3d2fc5 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/init.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/init.c @@ -76,7 +76,6 @@ void iwl_fw_runtime_init(struct iwl_fw_runtime *fwrt, struct iwl_trans *trans, fwrt->ops_ctx = ops_ctx; INIT_DELAYED_WORK(&fwrt->dump.wk, iwl_fw_error_dump_wk); iwl_fwrt_dbgfs_register(fwrt, dbgfs_dir); - init_waitqueue_head(&fwrt->trans->fw_halt_waitq); } IWL_EXPORT_SYMBOL(iwl_fw_runtime_init); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h index fa750711d889..d8690acee40c 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h @@ -338,7 +338,6 @@ enum iwl_d3_status { * are sent * @STATUS_TRANS_IDLE: the trans is idle - general commands are not to be sent * @STATUS_TRANS_DEAD: trans is dead - avoid any read/write operation - * @STATUS_FW_WAIT_DUMP: if set, wait until cleared before collecting dump */ enum iwl_trans_status { STATUS_SYNC_HCMD_ACTIVE, @@ -351,7 +350,6 @@ enum iwl_trans_status { STATUS_TRANS_GOING_IDLE, STATUS_TRANS_IDLE, STATUS_TRANS_DEAD, - STATUS_FW_WAIT_DUMP, }; static inline int @@ -832,7 +830,6 @@ struct iwl_trans { u32 lmac_error_event_table[2]; u32 umac_error_event_table; unsigned int error_event_table_tlv_status; - wait_queue_head_t fw_halt_waitq; /* pointer to trans specific struct */ /*Ensure that this pointer will always be aligned to sizeof pointer */ @@ -1240,10 +1237,6 @@ static inline void iwl_trans_fw_error(struct iwl_trans *trans) /* prevent double restarts due to the same erroneous FW */ if (!test_and_set_bit(STATUS_FW_ERROR, &trans->status)) iwl_op_mode_nic_error(trans->op_mode); - - if (test_and_clear_bit(STATUS_FW_WAIT_DUMP, &trans->status)) - wake_up(&trans->fw_halt_waitq); - } static inline void iwl_trans_sync_nmi(struct iwl_trans *trans) -- cgit v1.2.3 From 0d5bad14226af0712c6eed06059a596fc89a4605 Mon Sep 17 00:00:00 2001 From: Ihab Zhaika Date: Mon, 25 Feb 2019 07:35:42 +0200 Subject: iwlwifi: rename structs to fit the new names rename few structs to fit the new marketing names Signed-off-by: Ihab Zhaika Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/cfg/22000.c | 12 ++++++------ drivers/net/wireless/intel/iwlwifi/iwl-config.h | 2 +- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 12 ++++++------ drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 2 +- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c index fdc56f821b5a..d5c29298ae3e 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c @@ -235,8 +235,8 @@ const struct iwl_cfg iwl_ax101_cfg_qu_hr = { .max_tx_agg_size = IEEE80211_MAX_AMPDU_BUF_HT, }; -const struct iwl_cfg iwl22260_2ax_cfg = { - .name = "Intel(R) Wireless-AX 22260", +const struct iwl_cfg iwl_ax200_cfg_cc = { + .name = "Intel(R) Wi-Fi 6 AX200 160MHz", .fw_name_pre = IWL_CC_A_FW_PRE, IWL_DEVICE_22500, /* @@ -249,7 +249,7 @@ const struct iwl_cfg iwl22260_2ax_cfg = { }; const struct iwl_cfg killer1650x_2ax_cfg = { - .name = "Killer(R) Wireless-AX 1650x Wireless Network Adapter (200NGW)", + .name = "Killer(R) Wi-Fi 6 AX1650x 160MHz Wireless Network Adapter (200NGW)", .fw_name_pre = IWL_CC_A_FW_PRE, IWL_DEVICE_22500, /* @@ -262,7 +262,7 @@ const struct iwl_cfg killer1650x_2ax_cfg = { }; const struct iwl_cfg killer1650w_2ax_cfg = { - .name = "Killer(R) Wireless-AX 1650w Wireless Network Adapter (200D2W)", + .name = "Killer(R) Wi-Fi 6 AX1650w 160MHz Wireless Network Adapter (200D2W)", .fw_name_pre = IWL_CC_A_FW_PRE, IWL_DEVICE_22500, /* @@ -328,7 +328,7 @@ const struct iwl_cfg killer1550s_2ac_cfg_qu_b0_jf_b0 = { }; const struct iwl_cfg killer1650s_2ax_cfg_qu_b0_hr_b0 = { - .name = "Killer(R) Wireless-AX 1650i Wireless Network Adapter (22560NGW)", + .name = "Killer(R) Wi-Fi 6 AX1650i 160MHz Wireless Network Adapter (201NGW)", .fw_name_pre = IWL_22000_QU_B_HR_B_FW_PRE, IWL_DEVICE_22500, /* @@ -340,7 +340,7 @@ const struct iwl_cfg killer1650s_2ax_cfg_qu_b0_hr_b0 = { }; const struct iwl_cfg killer1650i_2ax_cfg_qu_b0_hr_b0 = { - .name = "Killer(R) Wireless-AX 1650s Wireless Network Adapter (22560D2W)", + .name = "Killer(R) Wi-Fi 6 AX1650s 160MHz Wireless Network Adapter (201D2W)", .fw_name_pre = IWL_22000_QU_B_HR_B_FW_PRE, IWL_DEVICE_22500, /* diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index f5f87773667b..c91b537fa7ff 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -550,7 +550,7 @@ extern const struct iwl_cfg iwl22000_2ac_cfg_hr_cdb; extern const struct iwl_cfg iwl22000_2ac_cfg_jf; extern const struct iwl_cfg iwl_ax101_cfg_qu_hr; extern const struct iwl_cfg iwl22000_2ax_cfg_hr; -extern const struct iwl_cfg iwl22260_2ax_cfg; +extern const struct iwl_cfg iwl_ax200_cfg_cc; extern const struct iwl_cfg killer1650s_2ax_cfg_qu_b0_hr_b0; extern const struct iwl_cfg killer1650i_2ax_cfg_qu_b0_hr_b0; extern const struct iwl_cfg killer1650x_2ax_cfg; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 2b94e4cef56c..c35e50359d2c 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -953,14 +953,14 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0xA0F0, 0x1652, killer1650i_2ax_cfg_qu_b0_hr_b0)}, {IWL_PCI_DEVICE(0xA0F0, 0x4070, iwl_ax101_cfg_qu_hr)}, - {IWL_PCI_DEVICE(0x2723, 0x0080, iwl22260_2ax_cfg)}, - {IWL_PCI_DEVICE(0x2723, 0x0084, iwl22260_2ax_cfg)}, - {IWL_PCI_DEVICE(0x2723, 0x0088, iwl22260_2ax_cfg)}, - {IWL_PCI_DEVICE(0x2723, 0x008C, iwl22260_2ax_cfg)}, + {IWL_PCI_DEVICE(0x2723, 0x0080, iwl_ax200_cfg_cc)}, + {IWL_PCI_DEVICE(0x2723, 0x0084, iwl_ax200_cfg_cc)}, + {IWL_PCI_DEVICE(0x2723, 0x0088, iwl_ax200_cfg_cc)}, + {IWL_PCI_DEVICE(0x2723, 0x008C, iwl_ax200_cfg_cc)}, {IWL_PCI_DEVICE(0x2723, 0x1653, killer1650w_2ax_cfg)}, {IWL_PCI_DEVICE(0x2723, 0x1654, killer1650x_2ax_cfg)}, - {IWL_PCI_DEVICE(0x2723, 0x4080, iwl22260_2ax_cfg)}, - {IWL_PCI_DEVICE(0x2723, 0x4088, iwl22260_2ax_cfg)}, + {IWL_PCI_DEVICE(0x2723, 0x4080, iwl_ax200_cfg_cc)}, + {IWL_PCI_DEVICE(0x2723, 0x4088, iwl_ax200_cfg_cc)}, {IWL_PCI_DEVICE(0x1a56, 0x1653, killer1650w_2ax_cfg)}, {IWL_PCI_DEVICE(0x1a56, 0x1654, killer1650x_2ax_cfg)}, diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 28d6ae8c9336..1d6f3053f233 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -3561,7 +3561,7 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, } } else if (CSR_HW_RF_ID_TYPE_CHIP_ID(trans->hw_rf_id) == CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_HR) && - (trans->cfg != &iwl22260_2ax_cfg || + (trans->cfg != &iwl_ax200_cfg_cc || trans->hw_rev == CSR_HW_REV_TYPE_QNJ_B0)) { u32 hw_status; -- cgit v1.2.3 From 972d8e1377795556024e948357e82532890f2f7d Mon Sep 17 00:00:00 2001 From: Ihab Zhaika Date: Mon, 25 Feb 2019 08:07:03 +0200 Subject: iwlwifi: add new 0x2723/0x2080 card for 22000 add new PCI ID 0x2723/0x2080 for 22000 series Signed-off-by: Ihab Zhaika Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index c35e50359d2c..9f1af8da9dc1 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -959,6 +959,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x2723, 0x008C, iwl_ax200_cfg_cc)}, {IWL_PCI_DEVICE(0x2723, 0x1653, killer1650w_2ax_cfg)}, {IWL_PCI_DEVICE(0x2723, 0x1654, killer1650x_2ax_cfg)}, + {IWL_PCI_DEVICE(0x2723, 0x2080, iwl_ax200_cfg_cc)}, {IWL_PCI_DEVICE(0x2723, 0x4080, iwl_ax200_cfg_cc)}, {IWL_PCI_DEVICE(0x2723, 0x4088, iwl_ax200_cfg_cc)}, -- cgit v1.2.3 From 3e2cf62efec52fb49daed437cc486c3cb9a0afa2 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Tue, 19 Mar 2019 21:19:52 +0100 Subject: ARM: OMAP1: ams-delta: Fix broken GPIO ID allocation In order to request dynamic allocationn of GPIO IDs, a negative number should be passed as a base GPIO ID via platform data. Unfortuntely, commit 771e53c4d1a1 ("ARM: OMAP1: ams-delta: Drop board specific global GPIO numbers") didn't follow that rule while switching to dynamically allocated GPIO IDs for Amstrad Delta latches, making their IDs overlapping with those already assigned to OMAP GPIO devices. Fix it. Fixes: 771e53c4d1a1 ("ARM: OMAP1: ams-delta: Drop board specific global GPIO numbers") Signed-off-by: Janusz Krzysztofik Cc: stable@vger.kernel.org Acked-by: Aaro Koskinen Signed-off-by: Tony Lindgren --- arch/arm/mach-omap1/board-ams-delta.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c index be30c3c061b4..1b15d593837e 100644 --- a/arch/arm/mach-omap1/board-ams-delta.c +++ b/arch/arm/mach-omap1/board-ams-delta.c @@ -182,6 +182,7 @@ static struct resource latch1_resources[] = { static struct bgpio_pdata latch1_pdata = { .label = LATCH1_LABEL, + .base = -1, .ngpio = LATCH1_NGPIO, }; @@ -219,6 +220,7 @@ static struct resource latch2_resources[] = { static struct bgpio_pdata latch2_pdata = { .label = LATCH2_LABEL, + .base = -1, .ngpio = LATCH2_NGPIO, }; -- cgit v1.2.3 From 30645307e5d2c8a4caf978558c66121ac91ad17e Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 23 Feb 2019 14:20:42 +0100 Subject: ARM: OMAP2+: add missing of_node_put after of_device_is_available Add an of_node_put when a tested device node is not available. The semantic patch that fixes this problem is as follows (http://coccinelle.lip6.fr): // @@ identifier f; local idexpression e; expression x; @@ e = f(...); ... when != of_node_put(e) when != x = e when != e = x when any if (<+...of_device_is_available(e)...+>) { ... when != of_node_put(e) ( return e; | + of_node_put(e); return ...; ) } // Fixes: e0c827aca0730 ("drm/omap: Populate DSS children in omapdss driver") Signed-off-by: Julia Lawall Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/display.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c index 1444b4b4bd9f..439e143cad7b 100644 --- a/arch/arm/mach-omap2/display.c +++ b/arch/arm/mach-omap2/display.c @@ -250,8 +250,10 @@ static int __init omapdss_init_of(void) if (!node) return 0; - if (!of_device_is_available(node)) + if (!of_device_is_available(node)) { + of_node_put(node); return 0; + } pdev = of_find_device_by_node(node); -- cgit v1.2.3 From 4f96dc0a3e79ec257a2b082dab3ee694ff88c317 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 15 Mar 2019 12:59:09 +0200 Subject: ARM: dts: am335x-evm: Correct the regulators for the audio codec Correctly map the regulators used by tlv320aic3106. Both 1.8V and 3.3V for the codec is derived from VBAT via fixed regulators. Cc: # v4.14+ Signed-off-by: Peter Ujfalusi Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am335x-evm.dts | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/am335x-evm.dts b/arch/arm/boot/dts/am335x-evm.dts index dce5be5df97b..edcff79879e7 100644 --- a/arch/arm/boot/dts/am335x-evm.dts +++ b/arch/arm/boot/dts/am335x-evm.dts @@ -57,6 +57,24 @@ enable-active-high; }; + /* TPS79501 */ + v1_8d_reg: fixedregulator-v1_8d { + compatible = "regulator-fixed"; + regulator-name = "v1_8d"; + vin-supply = <&vbat>; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + }; + + /* TPS79501 */ + v3_3d_reg: fixedregulator-v3_3d { + compatible = "regulator-fixed"; + regulator-name = "v3_3d"; + vin-supply = <&vbat>; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + }; + matrix_keypad: matrix_keypad0 { compatible = "gpio-matrix-keypad"; debounce-delay-ms = <5>; @@ -499,10 +517,10 @@ status = "okay"; /* Regulators */ - AVDD-supply = <&vaux2_reg>; - IOVDD-supply = <&vaux2_reg>; - DRVDD-supply = <&vaux2_reg>; - DVDD-supply = <&vbat>; + AVDD-supply = <&v3_3d_reg>; + IOVDD-supply = <&v3_3d_reg>; + DRVDD-supply = <&v3_3d_reg>; + DVDD-supply = <&v1_8d_reg>; }; }; -- cgit v1.2.3 From 6691370646e844be98bb6558c024269791d20bd7 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 15 Mar 2019 12:59:17 +0200 Subject: ARM: dts: am335x-evmsk: Correct the regulators for the audio codec Correctly map the regulators used by tlv320aic3106. Both 1.8V and 3.3V for the codec is derived from VBAT via fixed regulators. Cc: # v4.14+ Signed-off-by: Peter Ujfalusi Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am335x-evmsk.dts | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/am335x-evmsk.dts b/arch/arm/boot/dts/am335x-evmsk.dts index b128998097ce..2c2d8b5b8cf5 100644 --- a/arch/arm/boot/dts/am335x-evmsk.dts +++ b/arch/arm/boot/dts/am335x-evmsk.dts @@ -73,6 +73,24 @@ enable-active-high; }; + /* TPS79518 */ + v1_8d_reg: fixedregulator-v1_8d { + compatible = "regulator-fixed"; + regulator-name = "v1_8d"; + vin-supply = <&vbat>; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + }; + + /* TPS78633 */ + v3_3d_reg: fixedregulator-v3_3d { + compatible = "regulator-fixed"; + regulator-name = "v3_3d"; + vin-supply = <&vbat>; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + }; + leds { pinctrl-names = "default"; pinctrl-0 = <&user_leds_s0>; @@ -501,10 +519,10 @@ status = "okay"; /* Regulators */ - AVDD-supply = <&vaux2_reg>; - IOVDD-supply = <&vaux2_reg>; - DRVDD-supply = <&vaux2_reg>; - DVDD-supply = <&vbat>; + AVDD-supply = <&v3_3d_reg>; + IOVDD-supply = <&v3_3d_reg>; + DRVDD-supply = <&v3_3d_reg>; + DVDD-supply = <&v1_8d_reg>; }; }; -- cgit v1.2.3 From 351f339faa308c1c1461314a18c832239a841ca0 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 12 Mar 2019 12:28:03 -0700 Subject: acpi/nfit: Always dump _DSM output payload The dynamic-debug statements for command payload output only get emitted when the command is not ND_CMD_CALL. Move the output payload dumping ahead of the early return path for ND_CMD_CALL. Fixes: 31eca76ba2fc9 ("...whitelisted dimm command marshaling mechanism") Reported-by: Vishal Verma Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 5a389a4f4f65..f1ed0befe303 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -567,6 +567,12 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, goto out; } + dev_dbg(dev, "%s cmd: %s output length: %d\n", dimm_name, + cmd_name, out_obj->buffer.length); + print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4, 4, + out_obj->buffer.pointer, + min_t(u32, 128, out_obj->buffer.length), true); + if (call_pkg) { call_pkg->nd_fw_size = out_obj->buffer.length; memcpy(call_pkg->nd_payload + call_pkg->nd_size_in, @@ -585,12 +591,6 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, return 0; } - dev_dbg(dev, "%s cmd: %s output length: %d\n", dimm_name, - cmd_name, out_obj->buffer.length); - print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4, 4, - out_obj->buffer.pointer, - min_t(u32, 128, out_obj->buffer.length), true); - for (i = 0, offset = 0; i < desc->out_num; i++) { u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i, buf, (u32 *) out_obj->buffer.pointer, -- cgit v1.2.3 From 55c1fc0af29a6c1b92f217b7eb7581a882e0c07c Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Tue, 12 Mar 2019 03:20:34 -0500 Subject: libnvdimm/namespace: Fix a potential NULL pointer dereference In case kmemdup fails, the fix goes to blk_err to avoid NULL pointer dereference. Signed-off-by: Kangjie Lu Signed-off-by: Dan Williams --- drivers/nvdimm/namespace_devs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 7849bf1812c4..f293556cbbf6 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -2249,9 +2249,12 @@ static struct device *create_namespace_blk(struct nd_region *nd_region, if (!nsblk->uuid) goto blk_err; memcpy(name, nd_label->name, NSLABEL_NAME_LEN); - if (name[0]) + if (name[0]) { nsblk->alt_name = kmemdup(name, NSLABEL_NAME_LEN, GFP_KERNEL); + if (!nsblk->alt_name) + goto blk_err; + } res = nsblk_add_resource(nd_region, ndd, nsblk, __le64_to_cpu(nd_label->dpa)); if (!res) -- cgit v1.2.3 From bfb57a91c2cb497c3780ed2e08f85d038efd0b7b Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 24 Mar 2019 18:07:02 +0200 Subject: habanalabs: remove low credit limit of DMA #0 Because DMA #0 is now used by the user, remove the limitation of credits from this channel. Without this patch, this channel is pretty much unusable due to its very low bandwidth configuration. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/goya/goya.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index ea979ebd62fb..3c509e19d69d 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -1688,12 +1688,11 @@ static void goya_init_golden_registers(struct hl_device *hdev) /* * Workaround for H2 #HW-23 bug - * Set DMA max outstanding read requests to 240 on DMA CH 1. Set it - * to 16 on KMD DMA - * We need to limit only these DMAs because the user can only read + * Set DMA max outstanding read requests to 240 on DMA CH 1. + * This limitation is still large enough to not affect Gen4 bandwidth. + * We need to only limit that DMA channel because the user can only read * from Host using DMA CH 1 */ - WREG32(mmDMA_CH_0_CFG0, 0x0fff0010); WREG32(mmDMA_CH_1_CFG0, 0x0fff00F0); goya->hw_cap_initialized |= HW_CAP_GOLDEN; @@ -3693,7 +3692,7 @@ static int goya_validate_dma_pkt_mmu(struct hl_device *hdev, * WA for HW-23. * We can't allow user to read from Host using QMANs other than 1. */ - if (parser->hw_queue_id > GOYA_QUEUE_ID_DMA_1 && + if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 && hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr), le32_to_cpu(user_dma_pkt->tsize), hdev->asic_prop.va_space_host_start_address, -- cgit v1.2.3 From fccfb9ce70ed4ea7a145f77b86de62e38178517f Mon Sep 17 00:00:00 2001 From: Dragos Bogdan Date: Tue, 19 Mar 2019 12:47:00 +0200 Subject: iio: ad_sigma_delta: select channel when reading register The desired channel has to be selected in order to correctly fill the buffer with the corresponding data. The `ad_sd_write_reg()` already does this, but for the `ad_sd_read_reg_raw()` this was omitted. Fixes: af3008485ea03 ("iio:adc: Add common code for ADI Sigma Delta devices") Signed-off-by: Dragos Bogdan Signed-off-by: Alexandru Ardelean Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad_sigma_delta.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/adc/ad_sigma_delta.c b/drivers/iio/adc/ad_sigma_delta.c index ff5f2da2e1b1..54d9978b2740 100644 --- a/drivers/iio/adc/ad_sigma_delta.c +++ b/drivers/iio/adc/ad_sigma_delta.c @@ -121,6 +121,7 @@ static int ad_sd_read_reg_raw(struct ad_sigma_delta *sigma_delta, if (sigma_delta->info->has_registers) { data[0] = reg << sigma_delta->info->addr_shift; data[0] |= sigma_delta->info->read_mask; + data[0] |= sigma_delta->comm; spi_message_add_tail(&t[0], &m); } spi_message_add_tail(&t[1], &m); -- cgit v1.2.3 From 4ba104f468bbfc27362c393815d03aa18fb7a20f Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 23 Feb 2019 14:27:10 +0100 Subject: batman-adv: Reduce claim hash refcnt only for removed entry The batadv_hash_remove is a function which searches the hashtable for an entry using a needle, a hashtable bucket selection function and a compare function. It will lock the bucket list and delete an entry when the compare function matches it with the needle. It returns the pointer to the hlist_node which matches or NULL when no entry matches the needle. The batadv_bla_del_claim is not itself protected in anyway to avoid that any other function is modifying the hashtable between the search for the entry and the call to batadv_hash_remove. It can therefore happen that the entry either doesn't exist anymore or an entry was deleted which is not the same object as the needle. In such an situation, the reference counter (for the reference stored in the hashtable) must not be reduced for the needle. Instead the reference counter of the actually removed entry has to be reduced. Otherwise the reference counter will underflow and the object might be freed before all its references were dropped. The kref helpers reported this problem as: refcount_t: underflow; use-after-free. Fixes: 23721387c409 ("batman-adv: add basic bridge loop avoidance code") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bridge_loop_avoidance.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index ef39aabdb694..4fb01108e5f5 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -803,6 +803,8 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, const u8 *mac, const unsigned short vid) { struct batadv_bla_claim search_claim, *claim; + struct batadv_bla_claim *claim_removed_entry; + struct hlist_node *claim_removed_node; ether_addr_copy(search_claim.addr, mac); search_claim.vid = vid; @@ -813,10 +815,18 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "%s(): %pM, vid %d\n", __func__, mac, batadv_print_vid(vid)); - batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim, - batadv_choose_claim, claim); - batadv_claim_put(claim); /* reference from the hash is gone */ + claim_removed_node = batadv_hash_remove(bat_priv->bla.claim_hash, + batadv_compare_claim, + batadv_choose_claim, claim); + if (!claim_removed_node) + goto free_claim; + /* reference from the hash is gone */ + claim_removed_entry = hlist_entry(claim_removed_node, + struct batadv_bla_claim, hash_entry); + batadv_claim_put(claim_removed_entry); + +free_claim: /* don't need the reference from hash_find() anymore */ batadv_claim_put(claim); } -- cgit v1.2.3 From 3d65b9accab4a7ed5038f6df403fbd5e298398c7 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 23 Feb 2019 14:27:10 +0100 Subject: batman-adv: Reduce tt_local hash refcnt only for removed entry The batadv_hash_remove is a function which searches the hashtable for an entry using a needle, a hashtable bucket selection function and a compare function. It will lock the bucket list and delete an entry when the compare function matches it with the needle. It returns the pointer to the hlist_node which matches or NULL when no entry matches the needle. The batadv_tt_local_remove is not itself protected in anyway to avoid that any other function is modifying the hashtable between the search for the entry and the call to batadv_hash_remove. It can therefore happen that the entry either doesn't exist anymore or an entry was deleted which is not the same object as the needle. In such an situation, the reference counter (for the reference stored in the hashtable) must not be reduced for the needle. Instead the reference counter of the actually removed entry has to be reduced. Otherwise the reference counter will underflow and the object might be freed before all its references were dropped. The kref helpers reported this problem as: refcount_t: underflow; use-after-free. Fixes: ef72706a0543 ("batman-adv: protect tt_local_entry from concurrent delete events") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/translation-table.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index f73d79139ae7..b5cfc5068a90 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1337,9 +1337,10 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr, unsigned short vid, const char *message, bool roaming) { + struct batadv_tt_local_entry *tt_removed_entry; struct batadv_tt_local_entry *tt_local_entry; u16 flags, curr_flags = BATADV_NO_FLAGS; - void *tt_entry_exists; + struct hlist_node *tt_removed_node; tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid); if (!tt_local_entry) @@ -1368,15 +1369,18 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr, */ batadv_tt_local_event(bat_priv, tt_local_entry, BATADV_TT_CLIENT_DEL); - tt_entry_exists = batadv_hash_remove(bat_priv->tt.local_hash, + tt_removed_node = batadv_hash_remove(bat_priv->tt.local_hash, batadv_compare_tt, batadv_choose_tt, &tt_local_entry->common); - if (!tt_entry_exists) + if (!tt_removed_node) goto out; - /* extra call to free the local tt entry */ - batadv_tt_local_entry_put(tt_local_entry); + /* drop reference of remove hash entry */ + tt_removed_entry = hlist_entry(tt_removed_node, + struct batadv_tt_local_entry, + common.hash_entry); + batadv_tt_local_entry_put(tt_removed_entry); out: if (tt_local_entry) -- cgit v1.2.3 From f131a56880d10932931e74773fb8702894a94a75 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 23 Feb 2019 14:27:10 +0100 Subject: batman-adv: Reduce tt_global hash refcnt only for removed entry The batadv_hash_remove is a function which searches the hashtable for an entry using a needle, a hashtable bucket selection function and a compare function. It will lock the bucket list and delete an entry when the compare function matches it with the needle. It returns the pointer to the hlist_node which matches or NULL when no entry matches the needle. The batadv_tt_global_free is not itself protected in anyway to avoid that any other function is modifying the hashtable between the search for the entry and the call to batadv_hash_remove. It can therefore happen that the entry either doesn't exist anymore or an entry was deleted which is not the same object as the needle. In such an situation, the reference counter (for the reference stored in the hashtable) must not be reduced for the needle. Instead the reference counter of the actually removed entry has to be reduced. Otherwise the reference counter will underflow and the object might be freed before all its references were dropped. The kref helpers reported this problem as: refcount_t: underflow; use-after-free. Fixes: 7683fdc1e886 ("batman-adv: protect the local and the global trans-tables with rcu") Reported-by: Martin Weinelt Signed-off-by: Sven Eckelmann Acked-by: Antonio Quartulli Signed-off-by: Simon Wunderlich --- net/batman-adv/translation-table.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index b5cfc5068a90..26c4e2493ddf 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -616,14 +616,26 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv, struct batadv_tt_global_entry *tt_global, const char *message) { + struct batadv_tt_global_entry *tt_removed_entry; + struct hlist_node *tt_removed_node; + batadv_dbg(BATADV_DBG_TT, bat_priv, "Deleting global tt entry %pM (vid: %d): %s\n", tt_global->common.addr, batadv_print_vid(tt_global->common.vid), message); - batadv_hash_remove(bat_priv->tt.global_hash, batadv_compare_tt, - batadv_choose_tt, &tt_global->common); - batadv_tt_global_entry_put(tt_global); + tt_removed_node = batadv_hash_remove(bat_priv->tt.global_hash, + batadv_compare_tt, + batadv_choose_tt, + &tt_global->common); + if (!tt_removed_node) + return; + + /* drop reference of remove hash entry */ + tt_removed_entry = hlist_entry(tt_removed_node, + struct batadv_tt_global_entry, + common.hash_entry); + batadv_tt_global_entry_put(tt_removed_entry); } /** -- cgit v1.2.3 From ca8c3b922e7032aff6cc3fd05548f4df1f3df90e Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Fri, 22 Feb 2019 16:25:54 +0100 Subject: batman-adv: fix warning in function batadv_v_elp_get_throughput MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When CONFIG_CFG80211 isn't enabled the compiler correcly warns about 'sinfo.pertid' may be unused. It can also happen for other error conditions that it not warn about. net/batman-adv/bat_v_elp.c: In function ‘batadv_v_elp_get_throughput.isra.0’: include/net/cfg80211.h:6370:13: warning: ‘sinfo.pertid’ may be used uninitialized in this function [-Wmaybe-uninitialized] kfree(sinfo->pertid); ~~~~~^~~~~~~~ Rework so that we only release '&sinfo' if cfg80211_get_station returns zero. Fixes: 7d652669b61d ("batman-adv: release station info tidstats") Signed-off-by: Anders Roxell Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_v_elp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index a9b7919c9de5..d5df0114f08a 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -104,8 +104,10 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) ret = cfg80211_get_station(real_netdev, neigh->addr, &sinfo); - /* free the TID stats immediately */ - cfg80211_sinfo_release_content(&sinfo); + if (!ret) { + /* free the TID stats immediately */ + cfg80211_sinfo_release_content(&sinfo); + } dev_put(real_netdev); if (ret == -ENOENT) { -- cgit v1.2.3 From 438b3d3fae4346a49fe12fa7cc1dc9327f006a91 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 3 Mar 2019 19:25:26 +0100 Subject: batman-adv: Fix genl notification for throughput_override The throughput_override sysfs file is not below the meshif but below a hardif. The kobj has therefore not a pointer which can be used to find the batadv_priv data. The pointer stored in the hardif object must be used instead to find the correct meshif private data. Fixes: 7e6f461efe25 ("batman-adv: Trigger genl notification on sysfs config change") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/sysfs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index 0b4b3fb778a6..208655cf6717 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -1116,9 +1116,9 @@ static ssize_t batadv_store_throughput_override(struct kobject *kobj, struct attribute *attr, char *buff, size_t count) { - struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); struct net_device *net_dev = batadv_kobj_to_netdev(kobj); struct batadv_hard_iface *hard_iface; + struct batadv_priv *bat_priv; u32 tp_override; u32 old_tp_override; bool ret; @@ -1147,7 +1147,10 @@ static ssize_t batadv_store_throughput_override(struct kobject *kobj, atomic_set(&hard_iface->bat_v.throughput_override, tp_override); - batadv_netlink_notify_hardif(bat_priv, hard_iface); + if (hard_iface->soft_iface) { + bat_priv = netdev_priv(hard_iface->soft_iface); + batadv_netlink_notify_hardif(bat_priv, hard_iface); + } out: batadv_hardif_put(hard_iface); -- cgit v1.2.3 From 8f71370f4b02730e8c27faf460af7a3586e24e1f Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 22 Mar 2019 15:39:48 -0700 Subject: ASoC: intel: Fix crash at suspend/resume after failed codec registration If codec registration fails after the ASoC Intel SST driver has been probed, the kernel will Oops and crash at suspend/resume. general protection fault: 0000 [#1] PREEMPT SMP KASAN PTI CPU: 1 PID: 2811 Comm: cat Tainted: G W 4.19.30 #15 Hardware name: GOOGLE Clapper, BIOS Google_Clapper.5216.199.7 08/22/2014 RIP: 0010:snd_soc_suspend+0x5a/0xd21 Code: 03 80 3c 10 00 49 89 d7 74 0b 48 89 df e8 71 72 c4 fe 4c 89 fa 48 8b 03 48 89 45 d0 48 8d 98 a0 01 00 00 48 89 d8 48 c1 e8 03 <8a> 04 10 84 c0 0f 85 85 0c 00 00 80 3b 00 0f 84 6b 0c 00 00 48 8b RSP: 0018:ffff888035407750 EFLAGS: 00010202 RAX: 0000000000000034 RBX: 00000000000001a0 RCX: 0000000000000000 RDX: dffffc0000000000 RSI: 0000000000000008 RDI: ffff88805c417098 RBP: ffff8880354077b0 R08: dffffc0000000000 R09: ffffed100b975718 R10: 0000000000000001 R11: ffffffff949ea4a3 R12: 1ffff1100b975746 R13: dffffc0000000000 R14: ffff88805cba4588 R15: dffffc0000000000 FS: 0000794a78e91b80(0000) GS:ffff888068d00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007bd5283ccf58 CR3: 000000004b7aa000 CR4: 00000000001006e0 Call Trace: ? dpm_complete+0x67b/0x67b ? i915_gem_suspend+0x14d/0x1ad sst_soc_prepare+0x91/0x1dd ? sst_be_hw_params+0x7e/0x7e dpm_prepare+0x39a/0x88b dpm_suspend_start+0x13/0x9d suspend_devices_and_enter+0x18f/0xbd7 ? arch_suspend_enable_irqs+0x11/0x11 ? printk+0xd9/0x12d ? lock_release+0x95f/0x95f ? log_buf_vmcoreinfo_setup+0x131/0x131 ? rcu_read_lock_sched_held+0x140/0x22a ? __bpf_trace_rcu_utilization+0xa/0xa ? __pm_pr_dbg+0x186/0x190 ? pm_notifier_call_chain+0x39/0x39 ? suspend_test+0x9d/0x9d pm_suspend+0x2f4/0x728 ? trace_suspend_resume+0x3da/0x3da ? lock_release+0x95f/0x95f ? kernfs_fop_write+0x19f/0x32d state_store+0xd8/0x147 ? sysfs_kf_read+0x155/0x155 kernfs_fop_write+0x23e/0x32d __vfs_write+0x108/0x608 ? vfs_read+0x2e9/0x2e9 ? rcu_read_lock_sched_held+0x140/0x22a ? __bpf_trace_rcu_utilization+0xa/0xa ? debug_smp_processor_id+0x10/0x10 ? selinux_file_permission+0x1c5/0x3c8 ? rcu_sync_lockdep_assert+0x6a/0xad ? __sb_start_write+0x129/0x2ac vfs_write+0x1aa/0x434 ksys_write+0xfe/0x1be ? __ia32_sys_read+0x82/0x82 do_syscall_64+0xcd/0x120 entry_SYSCALL_64_after_hwframe+0x49/0xbe In the observed situation, the problem is seen because the codec driver failed to probe due to a hardware problem. max98090 i2c-193C9890:00: Failed to read device revision: -1 max98090 i2c-193C9890:00: ASoC: failed to probe component -1 cht-bsw-max98090 cht-bsw-max98090: ASoC: failed to instantiate card -1 cht-bsw-max98090 cht-bsw-max98090: snd_soc_register_card failed -1 cht-bsw-max98090: probe of cht-bsw-max98090 failed with error -1 The problem is similar to the problem solved with commit 2fc995a87f2e ("ASoC: intel: Fix crash at suspend/resume without card registration"), but codec registration fails at a later point. At that time, the pointer checked with the above mentioned commit is already set, but it is not cleared if the device is subsequently removed. Adding a remove function to clear the pointer fixes the problem. Cc: stable@vger.kernel.org Cc: Jarkko Nikula Cc: Curtis Malainey Signed-off-by: Guenter Roeck Acked-by: Pierre-Louis Bossart Signed-off-by: Mark Brown --- sound/soc/intel/atom/sst-mfld-platform-pcm.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/soc/intel/atom/sst-mfld-platform-pcm.c b/sound/soc/intel/atom/sst-mfld-platform-pcm.c index 08cea5b5cda9..0e8b1c5eec88 100644 --- a/sound/soc/intel/atom/sst-mfld-platform-pcm.c +++ b/sound/soc/intel/atom/sst-mfld-platform-pcm.c @@ -706,9 +706,17 @@ static int sst_soc_probe(struct snd_soc_component *component) return sst_dsp_init_v2_dpcm(component); } +static void sst_soc_remove(struct snd_soc_component *component) +{ + struct sst_data *drv = dev_get_drvdata(component->dev); + + drv->soc_card = NULL; +} + static const struct snd_soc_component_driver sst_soc_platform_drv = { .name = DRV_NAME, .probe = sst_soc_probe, + .remove = sst_soc_remove, .ops = &sst_platform_ops, .compr_ops = &sst_platform_compr_ops, .pcm_new = sst_pcm_new, -- cgit v1.2.3 From 836f90f9e2d11263f9c6d7610c82f3bc7335d9a6 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Fri, 15 Mar 2019 10:54:14 +0100 Subject: drm/bridge: dw-hdmi: disable SCDC configuration for invalid setups This patch is an attempt to limit HDMI 2.0 SCDC setup when : - the SoC embeds an HDMI 1.4 only controller - the EDID supports SCDC but not scrambling - the EDID supports SCDC scrambling but not for low TMDS bit rates, while only supporting low TMDS bit rates This to avoid communicating with the SCDC DDC slave uncessary, and setting the DW-HDMI TMDS Scrambler setup when not supported by the underlying hardware. Reported-by: Rob Herring Fixes: 264fce6cc2c1 ("drm/bridge: dw-hdmi: Add SCDC and TMDS Scrambling support") Signed-off-by: Neil Armstrong Tested-by: Rob Herring Reviewed-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20190315095414.28520-1-narmstrong@baylibre.com --- drivers/gpu/drm/bridge/synopsys/dw-hdmi.c | 34 +++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index a63e5f0dae56..db761329a1e3 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -1037,6 +1037,31 @@ void dw_hdmi_phy_i2c_write(struct dw_hdmi *hdmi, unsigned short data, } EXPORT_SYMBOL_GPL(dw_hdmi_phy_i2c_write); +/* Filter out invalid setups to avoid configuring SCDC and scrambling */ +static bool dw_hdmi_support_scdc(struct dw_hdmi *hdmi) +{ + struct drm_display_info *display = &hdmi->connector.display_info; + + /* Completely disable SCDC support for older controllers */ + if (hdmi->version < 0x200a) + return false; + + /* Disable if SCDC is not supported, or if an HF-VSDB block is absent */ + if (!display->hdmi.scdc.supported || + !display->hdmi.scdc.scrambling.supported) + return false; + + /* + * Disable if display only support low TMDS rates and scrambling + * for low rates is not supported either + */ + if (!display->hdmi.scdc.scrambling.low_rates && + display->max_tmds_clock <= 340000) + return false; + + return true; +} + /* * HDMI2.0 Specifies the following procedure for High TMDS Bit Rates: * - The Source shall suspend transmission of the TMDS clock and data @@ -1055,7 +1080,7 @@ void dw_hdmi_set_high_tmds_clock_ratio(struct dw_hdmi *hdmi) unsigned long mtmdsclock = hdmi->hdmi_data.video_mode.mtmdsclock; /* Control for TMDS Bit Period/TMDS Clock-Period Ratio */ - if (hdmi->connector.display_info.hdmi.scdc.supported) { + if (dw_hdmi_support_scdc(hdmi)) { if (mtmdsclock > HDMI14_MAX_TMDSCLK) drm_scdc_set_high_tmds_clock_ratio(hdmi->ddc, 1); else @@ -1579,8 +1604,9 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi, /* Set up HDMI_FC_INVIDCONF */ inv_val = (hdmi->hdmi_data.hdcp_enable || - vmode->mtmdsclock > HDMI14_MAX_TMDSCLK || - hdmi_info->scdc.scrambling.low_rates ? + (dw_hdmi_support_scdc(hdmi) && + (vmode->mtmdsclock > HDMI14_MAX_TMDSCLK || + hdmi_info->scdc.scrambling.low_rates)) ? HDMI_FC_INVIDCONF_HDCP_KEEPOUT_ACTIVE : HDMI_FC_INVIDCONF_HDCP_KEEPOUT_INACTIVE); @@ -1646,7 +1672,7 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi, } /* Scrambling Control */ - if (hdmi_info->scdc.supported) { + if (dw_hdmi_support_scdc(hdmi)) { if (vmode->mtmdsclock > HDMI14_MAX_TMDSCLK || hdmi_info->scdc.scrambling.low_rates) { /* -- cgit v1.2.3 From d6f987c8462ab97591fbd6ed6bea6df61d2919e5 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sun, 24 Mar 2019 17:43:27 +0100 Subject: clk: meson: pll: fix rounding and setting a rate that matches precisely Make meson_clk_pll_is_better() consider a rate that precisely matches the requested rate to be better than any previous rate (which was smaller than the current). Prior to commit 8eed1db1adec6a ("clk: meson: pll: update driver for the g12a") meson_clk_get_pll_settings() returned early (before calling meson_clk_pll_is_better()) if the rate from the current iteration matches the requested rate precisely. After this commit meson_clk_pll_is_better() is called unconditionally. This requires meson_clk_pll_is_better() to work with the case where "now == rate". This fixes a hang during boot on Meson8b / Odroid-C1 for me. Fixes: 8eed1db1adec6a ("clk: meson: pll: update driver for the g12a") Signed-off-by: Martin Blumenstingl Reviewed-by: Jerome Brunet Signed-off-by: Neil Armstrong Link: https://lkml.kernel.org/r/20190324164327.22590-2-martin.blumenstingl@googlemail.com --- drivers/clk/meson/clk-pll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/meson/clk-pll.c b/drivers/clk/meson/clk-pll.c index 41e16dd7272a..7a14ac9b2fec 100644 --- a/drivers/clk/meson/clk-pll.c +++ b/drivers/clk/meson/clk-pll.c @@ -120,7 +120,7 @@ static bool meson_clk_pll_is_better(unsigned long rate, return true; } else { /* Round down */ - if (now < rate && best < now) + if (now <= rate && best < now) return true; } -- cgit v1.2.3 From cacea3a90e211f0c111975535508d446a4a928d2 Mon Sep 17 00:00:00 2001 From: Pankaj Bharadiya Date: Fri, 22 Mar 2019 18:00:09 +0530 Subject: ASoC: dapm: Fix NULL pointer dereference in snd_soc_dapm_free_kcontrol w_text_param can be NULL and it is being dereferenced without checking. Add the missing sanity check to prevent NULL pointer dereference. Signed-off-by: Pankaj Bharadiya Acked-by: Pierre-Louis Bossart Signed-off-by: Mark Brown --- sound/soc/soc-dapm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 1ec06ef6d161..67b032ca1601 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -3957,6 +3957,10 @@ snd_soc_dapm_free_kcontrol(struct snd_soc_card *card, int count; devm_kfree(card->dev, (void *)*private_value); + + if (!w_param_text) + return; + for (count = 0 ; count < num_params; count++) devm_kfree(card->dev, (void *)w_param_text[count]); devm_kfree(card->dev, w_param_text); -- cgit v1.2.3 From d040e4e8deeaa8257d6aa260e29ad69832b5d630 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 20 Mar 2019 13:14:00 -0700 Subject: ARM: dts: rockchip: Fix gpu opp node names for rk3288 The device tree compiler yells like this: Warning (unit_address_vs_reg): /gpu-opp-table/opp@100000000: node has a unit name, but no reg property Let's match the cpu opp node names and use a dash. Signed-off-by: Douglas Anderson Reviewed-by: Matthias Kaehlcke Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rk3288.dtsi | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index 09868dcee34b..df0c5456c94f 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -1282,27 +1282,27 @@ gpu_opp_table: gpu-opp-table { compatible = "operating-points-v2"; - opp@100000000 { + opp-100000000 { opp-hz = /bits/ 64 <100000000>; opp-microvolt = <950000>; }; - opp@200000000 { + opp-200000000 { opp-hz = /bits/ 64 <200000000>; opp-microvolt = <950000>; }; - opp@300000000 { + opp-300000000 { opp-hz = /bits/ 64 <300000000>; opp-microvolt = <1000000>; }; - opp@400000000 { + opp-400000000 { opp-hz = /bits/ 64 <400000000>; opp-microvolt = <1100000>; }; - opp@500000000 { + opp-500000000 { opp-hz = /bits/ 64 <500000000>; opp-microvolt = <1200000>; }; - opp@600000000 { + opp-600000000 { opp-hz = /bits/ 64 <600000000>; opp-microvolt = <1250000>; }; -- cgit v1.2.3 From 282e2e078ba5338c72150477b743794bc7523917 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 20 Mar 2019 13:14:01 -0700 Subject: ARM: dts: rockchip: Remove #address/#size-cells from rk3288 mipi_dsi They are pointless. As dtc points out: Warning (avoid_unnecessary_addr_size): /mipi@ff960000: unnecessary #address-cells/#size-cells without "ranges" or child "reg" property Let's remove them. Signed-off-by: Douglas Anderson Reviewed-by: Matthias Kaehlcke Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rk3288.dtsi | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index df0c5456c94f..a024d1e7e74c 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -1119,8 +1119,6 @@ clock-names = "ref", "pclk"; power-domains = <&power RK3288_PD_VIO>; rockchip,grf = <&grf>; - #address-cells = <1>; - #size-cells = <0>; status = "disabled"; ports { -- cgit v1.2.3 From 1a96665143c355b1019ed13b927266185d2a1e4f Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 20 Mar 2019 13:14:02 -0700 Subject: ARM: dts: rockchip: Remove #address/#size-cells from rk3288-veyron gpio-keys They are pointless. As dtc points out: Warning (avoid_unnecessary_addr_size): /gpio-keys: unnecessary #address-cells/#size-cells without "ranges" or child "reg" property Let's remove them. Signed-off-by: Douglas Anderson Reviewed-by: Matthias Kaehlcke Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rk3288-veyron.dtsi | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm/boot/dts/rk3288-veyron.dtsi b/arch/arm/boot/dts/rk3288-veyron.dtsi index 0bc2409f6903..192dbc089ade 100644 --- a/arch/arm/boot/dts/rk3288-veyron.dtsi +++ b/arch/arm/boot/dts/rk3288-veyron.dtsi @@ -25,8 +25,6 @@ gpio_keys: gpio-keys { compatible = "gpio-keys"; - #address-cells = <1>; - #size-cells = <0>; pinctrl-names = "default"; pinctrl-0 = <&pwr_key_l>; -- cgit v1.2.3 From a6256b3a92cbaf3f5ff034ce09d5665607e2d7a4 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 4 Mar 2019 11:49:16 +0100 Subject: dt-bindings: reset: meson-g12a: Add missing USB2 PHY resets The G12A Documentation lacked these 2 reset lines, but they are present and used for each USB 2 PHYs. Add them to the dt-bindings for the upcoming USB support. Fixes: dbfc54534dfc ("dt-bindings: reset: meson: add g12a bindings") Signed-off-by: Neil Armstrong Reviewed-by: Martin Blumenstingl Signed-off-by: Philipp Zabel --- include/dt-bindings/reset/amlogic,meson-g12a-reset.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/dt-bindings/reset/amlogic,meson-g12a-reset.h b/include/dt-bindings/reset/amlogic,meson-g12a-reset.h index 8063e8314eef..6d487c5eba2c 100644 --- a/include/dt-bindings/reset/amlogic,meson-g12a-reset.h +++ b/include/dt-bindings/reset/amlogic,meson-g12a-reset.h @@ -51,7 +51,10 @@ #define RESET_SD_EMMC_A 44 #define RESET_SD_EMMC_B 45 #define RESET_SD_EMMC_C 46 -/* 47-60 */ +/* 47 */ +#define RESET_USB_PHY20 48 +#define RESET_USB_PHY21 49 +/* 50-60 */ #define RESET_AUDIO_CODEC 61 /* 62-63 */ /* RESET2 */ -- cgit v1.2.3 From 13e8a05b922457761ddef39cfff6231bd4ed9eef Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 18 Mar 2019 22:03:52 +0800 Subject: reset: meson-audio-arb: Fix missing .owner setting of reset_controller_dev Set .owner to prevent module unloading while being used. Signed-off-by: Axel Lin Fixes: d903779b58be ("reset: meson: add meson audio arb driver") Signed-off-by: Philipp Zabel --- drivers/reset/reset-meson-audio-arb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/reset/reset-meson-audio-arb.c b/drivers/reset/reset-meson-audio-arb.c index 91751617b37a..c53a2185a039 100644 --- a/drivers/reset/reset-meson-audio-arb.c +++ b/drivers/reset/reset-meson-audio-arb.c @@ -130,6 +130,7 @@ static int meson_audio_arb_probe(struct platform_device *pdev) arb->rstc.nr_resets = ARRAY_SIZE(axg_audio_arb_reset_bits); arb->rstc.ops = &meson_audio_arb_rstc_ops; arb->rstc.of_node = dev->of_node; + arb->rstc.owner = THIS_MODULE; /* * Enable general : -- cgit v1.2.3 From 1da6c4d9140cb7c13e87667dc4e1488d6c8fc10f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 25 Mar 2019 15:54:43 +0100 Subject: bpf: fix use after free in bpf_evict_inode syzkaller was able to generate the following UAF in bpf: BUG: KASAN: use-after-free in lookup_last fs/namei.c:2269 [inline] BUG: KASAN: use-after-free in path_lookupat.isra.43+0x9f8/0xc00 fs/namei.c:2318 Read of size 1 at addr ffff8801c4865c47 by task syz-executor2/9423 CPU: 0 PID: 9423 Comm: syz-executor2 Not tainted 4.20.0-rc1-next-20181109+ #110 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x244/0x39d lib/dump_stack.c:113 print_address_description.cold.7+0x9/0x1ff mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report.cold.8+0x242/0x309 mm/kasan/report.c:412 __asan_report_load1_noabort+0x14/0x20 mm/kasan/report.c:430 lookup_last fs/namei.c:2269 [inline] path_lookupat.isra.43+0x9f8/0xc00 fs/namei.c:2318 filename_lookup+0x26a/0x520 fs/namei.c:2348 user_path_at_empty+0x40/0x50 fs/namei.c:2608 user_path include/linux/namei.h:62 [inline] do_mount+0x180/0x1ff0 fs/namespace.c:2980 ksys_mount+0x12d/0x140 fs/namespace.c:3258 __do_sys_mount fs/namespace.c:3272 [inline] __se_sys_mount fs/namespace.c:3269 [inline] __x64_sys_mount+0xbe/0x150 fs/namespace.c:3269 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x457569 Code: fd b3 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b3 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fde6ed96c78 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5 RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 0000000000457569 RDX: 0000000020000040 RSI: 0000000020000000 RDI: 0000000000000000 RBP: 000000000072bf00 R08: 0000000020000340 R09: 0000000000000000 R10: 0000000000200000 R11: 0000000000000246 R12: 00007fde6ed976d4 R13: 00000000004c2c24 R14: 00000000004d4990 R15: 00000000ffffffff Allocated by task 9424: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] kasan_kmalloc+0xc7/0xe0 mm/kasan/kasan.c:553 __do_kmalloc mm/slab.c:3722 [inline] __kmalloc_track_caller+0x157/0x760 mm/slab.c:3737 kstrdup+0x39/0x70 mm/util.c:49 bpf_symlink+0x26/0x140 kernel/bpf/inode.c:356 vfs_symlink+0x37a/0x5d0 fs/namei.c:4127 do_symlinkat+0x242/0x2d0 fs/namei.c:4154 __do_sys_symlink fs/namei.c:4173 [inline] __se_sys_symlink fs/namei.c:4171 [inline] __x64_sys_symlink+0x59/0x80 fs/namei.c:4171 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 9425: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] __kasan_slab_free+0x102/0x150 mm/kasan/kasan.c:521 kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528 __cache_free mm/slab.c:3498 [inline] kfree+0xcf/0x230 mm/slab.c:3817 bpf_evict_inode+0x11f/0x150 kernel/bpf/inode.c:565 evict+0x4b9/0x980 fs/inode.c:558 iput_final fs/inode.c:1550 [inline] iput+0x674/0xa90 fs/inode.c:1576 do_unlinkat+0x733/0xa30 fs/namei.c:4069 __do_sys_unlink fs/namei.c:4110 [inline] __se_sys_unlink fs/namei.c:4108 [inline] __x64_sys_unlink+0x42/0x50 fs/namei.c:4108 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe In this scenario path lookup under RCU is racing with the final unlink in case of symlinks. As Linus puts it in his analysis: [...] We actually RCU-delay the inode freeing itself, but when we do the final iput(), the "evict()" function is called synchronously. Now, the simple fix would seem to just RCU-delay the kfree() of the symlink data in bpf_evict_inode(). Maybe that's the right thing to do. [...] Al suggested to piggy-back on the ->destroy_inode() callback in order to implement RCU deferral there which can then kfree() the inode->i_link eventually right before putting inode back into inode cache. By reusing free_inode_nonrcu() from there we can avoid the need for our own inode cache and just reuse generic one as we currently do. And in-fact on top of all this we should just get rid of the bpf_evict_inode() entirely. This means truncate_inode_pages_final() and clear_inode() will then simply be called by the fs core via evict(). Dropping the reference should really only be done when inode is unhashed and nothing reachable anymore, so it's better also moved into the final ->destroy_inode() callback. Fixes: 0f98621bef5d ("bpf, inode: add support for symlinks and fix mtime/ctime") Reported-by: syzbot+fb731ca573367b7f6564@syzkaller.appspotmail.com Reported-by: syzbot+a13e5ead792d6df37818@syzkaller.appspotmail.com Reported-by: syzbot+7a8ba368b47fdefca61e@syzkaller.appspotmail.com Suggested-by: Al Viro Analyzed-by: Linus Torvalds Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: Linus Torvalds Acked-by: Al Viro Link: https://lore.kernel.org/lkml/0000000000006946d2057bbd0eef@google.com/T/ --- kernel/bpf/inode.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 2ada5e21dfa6..4a8f390a2b82 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -554,19 +554,6 @@ struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type typ } EXPORT_SYMBOL(bpf_prog_get_type_path); -static void bpf_evict_inode(struct inode *inode) -{ - enum bpf_type type; - - truncate_inode_pages_final(&inode->i_data); - clear_inode(inode); - - if (S_ISLNK(inode->i_mode)) - kfree(inode->i_link); - if (!bpf_inode_type(inode, &type)) - bpf_any_put(inode->i_private, type); -} - /* * Display the mount options in /proc/mounts. */ @@ -579,11 +566,28 @@ static int bpf_show_options(struct seq_file *m, struct dentry *root) return 0; } +static void bpf_destroy_inode_deferred(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + enum bpf_type type; + + if (S_ISLNK(inode->i_mode)) + kfree(inode->i_link); + if (!bpf_inode_type(inode, &type)) + bpf_any_put(inode->i_private, type); + free_inode_nonrcu(inode); +} + +static void bpf_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, bpf_destroy_inode_deferred); +} + static const struct super_operations bpf_super_ops = { .statfs = simple_statfs, .drop_inode = generic_delete_inode, .show_options = bpf_show_options, - .evict_inode = bpf_evict_inode, + .destroy_inode = bpf_destroy_inode, }; enum { -- cgit v1.2.3 From dbb2483b2a46fbaf833cfb5deb5ed9cace9c7399 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 22 Mar 2019 16:26:19 -0700 Subject: xfrm: clean up xfrm protocol checks In commit 6a53b7593233 ("xfrm: check id proto in validate_tmpl()") I introduced a check for xfrm protocol, but according to Herbert IPSEC_PROTO_ANY should only be used as a wildcard for lookup, so it should be removed from validate_tmpl(). And, IPSEC_PROTO_ANY is expected to only match 3 IPSec-specific protocols, this is why xfrm_state_flush() could still miss IPPROTO_ROUTING, which leads that those entries are left in net->xfrm.state_all before exit net. Fix this by replacing IPSEC_PROTO_ANY with zero. This patch also extracts the check from validate_tmpl() to xfrm_id_proto_valid() and uses it in parse_ipsecrequest(). With this, no other protocols should be added into xfrm. Fixes: 6a53b7593233 ("xfrm: check id proto in validate_tmpl()") Reported-by: syzbot+0bf0519d6e0de15914fe@syzkaller.appspotmail.com Cc: Steffen Klassert Cc: Herbert Xu Signed-off-by: Cong Wang Acked-by: Herbert Xu Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 17 +++++++++++++++++ net/ipv6/xfrm6_tunnel.c | 2 +- net/key/af_key.c | 4 +++- net/xfrm/xfrm_state.c | 2 +- net/xfrm/xfrm_user.c | 14 +------------- 5 files changed, 23 insertions(+), 16 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 85386becbaea..902437dfbce7 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1404,6 +1404,23 @@ static inline int xfrm_state_kern(const struct xfrm_state *x) return atomic_read(&x->tunnel_users); } +static inline bool xfrm_id_proto_valid(u8 proto) +{ + switch (proto) { + case IPPROTO_AH: + case IPPROTO_ESP: + case IPPROTO_COMP: +#if IS_ENABLED(CONFIG_IPV6) + case IPPROTO_ROUTING: + case IPPROTO_DSTOPTS: +#endif + return true; + default: + return false; + } +} + +/* IPSEC_PROTO_ANY only matches 3 IPsec protocols, 0 could match all. */ static inline int xfrm_id_proto_match(u8 proto, u8 userproto) { return (!userproto || proto == userproto || diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 12cb3aa990af..d9e5f6808811 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -345,7 +345,7 @@ static void __net_exit xfrm6_tunnel_net_exit(struct net *net) unsigned int i; xfrm_flush_gc(); - xfrm_state_flush(net, IPSEC_PROTO_ANY, false, true); + xfrm_state_flush(net, 0, false, true); for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++) WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byaddr[i])); diff --git a/net/key/af_key.c b/net/key/af_key.c index 5651c29cb5bd..4af1e1d60b9f 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1951,8 +1951,10 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq) if (rq->sadb_x_ipsecrequest_mode == 0) return -EINVAL; + if (!xfrm_id_proto_valid(rq->sadb_x_ipsecrequest_proto)) + return -EINVAL; - t->id.proto = rq->sadb_x_ipsecrequest_proto; /* XXX check proto */ + t->id.proto = rq->sadb_x_ipsecrequest_proto; if ((mode = pfkey_mode_to_xfrm(rq->sadb_x_ipsecrequest_mode)) < 0) return -EINVAL; t->mode = mode; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 1bb971f46fc6..178baaa037e5 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2384,7 +2384,7 @@ void xfrm_state_fini(struct net *net) flush_work(&net->xfrm.state_hash_work); flush_work(&xfrm_state_gc_work); - xfrm_state_flush(net, IPSEC_PROTO_ANY, false, true); + xfrm_state_flush(net, 0, false, true); WARN_ON(!list_empty(&net->xfrm.state_all)); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 8d4d52fd457b..6916931b1de1 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1513,20 +1513,8 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) return -EINVAL; } - switch (ut[i].id.proto) { - case IPPROTO_AH: - case IPPROTO_ESP: - case IPPROTO_COMP: -#if IS_ENABLED(CONFIG_IPV6) - case IPPROTO_ROUTING: - case IPPROTO_DSTOPTS: -#endif - case IPSEC_PROTO_ANY: - break; - default: + if (!xfrm_id_proto_valid(ut[i].id.proto)) return -EINVAL; - } - } return 0; -- cgit v1.2.3 From 8dfb4eba4100e7cdd161a8baef2d8d61b7a7e62e Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 25 Mar 2019 14:30:00 +0100 Subject: esp4: add length check for UDP encapsulation esp_output_udp_encap can produce a length that doesn't fit in the 16 bits of a UDP header's length field. In that case, we'll send a fragmented packet whose length is larger than IP_MAX_MTU (resulting in "Oversized IP packet" warnings on receive) and with a bogus UDP length. To prevent this, add a length check to esp_output_udp_encap and return -EMSGSIZE on failure. This seems to be older than git history. Signed-off-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- net/ipv4/esp4.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 10e809b296ec..fb065a8937ea 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -226,7 +226,7 @@ static void esp_output_fill_trailer(u8 *tail, int tfclen, int plen, __u8 proto) tail[plen - 1] = proto; } -static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) +static int esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) { int encap_type; struct udphdr *uh; @@ -234,6 +234,7 @@ static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, stru __be16 sport, dport; struct xfrm_encap_tmpl *encap = x->encap; struct ip_esp_hdr *esph = esp->esph; + unsigned int len; spin_lock_bh(&x->lock); sport = encap->encap_sport; @@ -241,11 +242,14 @@ static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, stru encap_type = encap->encap_type; spin_unlock_bh(&x->lock); + len = skb->len + esp->tailen - skb_transport_offset(skb); + if (len + sizeof(struct iphdr) >= IP_MAX_MTU) + return -EMSGSIZE; + uh = (struct udphdr *)esph; uh->source = sport; uh->dest = dport; - uh->len = htons(skb->len + esp->tailen - - skb_transport_offset(skb)); + uh->len = htons(len); uh->check = 0; switch (encap_type) { @@ -262,6 +266,8 @@ static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, stru *skb_mac_header(skb) = IPPROTO_UDP; esp->esph = esph; + + return 0; } int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) @@ -275,8 +281,12 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * int tailen = esp->tailen; /* this is non-NULL only with UDP Encapsulation */ - if (x->encap) - esp_output_udp_encap(x, skb, esp); + if (x->encap) { + int err = esp_output_udp_encap(x, skb, esp); + + if (err < 0) + return err; + } if (!skb_cloned(skb)) { if (tailen <= skb_tailroom(skb)) { -- cgit v1.2.3 From 36e075ce74ec4e261a638bf09d10b3348ca4d883 Mon Sep 17 00:00:00 2001 From: Jenny TC Date: Sat, 23 Mar 2019 18:40:10 +0530 Subject: ASoC: Intel: Skylake: enable S24_LE format support To enable S24_LE format, sample_type in topology fw has to be set to 1. But sample_type defined in topology firmware configuration is not getting reflected in the dsp param. This patch sets sample_type in base config so that the sample type defined in the topology firmware is reflected in the dsp params. This issues was uncovered while debugging the S24_LE format which require the MSB byte in 32 bit word to be skipped. Setting sample_type in topology firmware to 1 helps to skip MSB byte word. Signed-off-by: Jenny TC Acked-by: Pierre-Louis Bossart Signed-off-by: Mark Brown --- sound/soc/intel/skylake/skl-messages.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/intel/skylake/skl-messages.c b/sound/soc/intel/skylake/skl-messages.c index 28c4806b196a..4bf70b4429f0 100644 --- a/sound/soc/intel/skylake/skl-messages.c +++ b/sound/soc/intel/skylake/skl-messages.c @@ -483,6 +483,7 @@ static void skl_set_base_module_format(struct skl_sst *ctx, base_cfg->audio_fmt.bit_depth = format->bit_depth; base_cfg->audio_fmt.valid_bit_depth = format->valid_bit_depth; base_cfg->audio_fmt.ch_cfg = format->ch_cfg; + base_cfg->audio_fmt.sample_type = format->sample_type; dev_dbg(ctx->dev, "bit_depth=%x valid_bd=%x ch_config=%x\n", format->bit_depth, format->valid_bit_depth, -- cgit v1.2.3 From 7d56bedb2730dc2ea8abf0fd7240ee99ecfee3c9 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 26 Mar 2019 10:32:23 -0700 Subject: ARM: dts: Fix dcan clkctrl clock for am3 We must not use legacy clock defines for dts clckctrl clocks as the offsets will be wrong. Fixes: 87fc89ced3a7 ("ARM: dts: am335x: Move l4 child devices to probe them with ti-sysc") Cc: Tero Kristo Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am33xx-l4.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/am33xx-l4.dtsi b/arch/arm/boot/dts/am33xx-l4.dtsi index f459ec316a22..ca6d9f02a800 100644 --- a/arch/arm/boot/dts/am33xx-l4.dtsi +++ b/arch/arm/boot/dts/am33xx-l4.dtsi @@ -1762,7 +1762,7 @@ reg = <0xcc000 0x4>; reg-names = "rev"; /* Domains (P, C): per_pwrdm, l4ls_clkdm */ - clocks = <&l4ls_clkctrl AM3_D_CAN0_CLKCTRL 0>; + clocks = <&l4ls_clkctrl AM3_L4LS_D_CAN0_CLKCTRL 0>; clock-names = "fck"; #address-cells = <1>; #size-cells = <1>; @@ -1785,7 +1785,7 @@ reg = <0xd0000 0x4>; reg-names = "rev"; /* Domains (P, C): per_pwrdm, l4ls_clkdm */ - clocks = <&l4ls_clkctrl AM3_D_CAN1_CLKCTRL 0>; + clocks = <&l4ls_clkctrl AM3_L4LS_D_CAN1_CLKCTRL 0>; clock-names = "fck"; #address-cells = <1>; #size-cells = <1>; -- cgit v1.2.3 From 927cb78177ae3773d0d27404566a93cb8e88890c Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Wed, 20 Mar 2019 13:58:27 +0100 Subject: bpf: remove incorrect 'verifier bug' warning The BPF verifier checks the maximum number of call stack frames twice, first in the main CFG traversal (do_check) and then in a subsequent traversal (check_max_stack_depth). If the second check fails, it logs a 'verifier bug' warning and errors out, as the number of call stack frames should have been verified already. However, the second check may fail without indicating a verifier bug: if the excessive function calls reside in dead code, the main CFG traversal may not visit them; the subsequent traversal visits all instructions, including dead code. This case raises the question of how invalid dead code should be treated. This patch implements the conservative option and rejects such code. Signed-off-by: Paul Chaignon Tested-by: Xiao Han Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index fd502c1f71eb..6c5a41f7f338 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1897,8 +1897,9 @@ continue_func: } frame++; if (frame >= MAX_CALL_FRAMES) { - WARN_ONCE(1, "verifier bug. Call stack is too deep\n"); - return -EFAULT; + verbose(env, "the call stack of %d frames is too deep !\n", + frame); + return -E2BIG; } goto process_func; } -- cgit v1.2.3 From cabacfbbe54ec6730b3c147599763892c6c03525 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Wed, 20 Mar 2019 13:58:50 +0100 Subject: selftests/bpf: test case for invalid call stack in dead code This patch adds a test case with an excessive number of call stack frames in dead code. Signed-off-by: Paul Chaignon Tested-by: Xiao Han Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/verifier/calls.c | 38 ++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index f2ccae39ee66..fb11240b758b 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -907,6 +907,44 @@ .errstr = "call stack", .result = REJECT, }, +{ + "calls: stack depth check in dead code", + .insns = { + /* main */ + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call A */ + BPF_EXIT_INSN(), + /* A */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 2), /* call B */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* B */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call C */ + BPF_EXIT_INSN(), + /* C */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call D */ + BPF_EXIT_INSN(), + /* D */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call E */ + BPF_EXIT_INSN(), + /* E */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call F */ + BPF_EXIT_INSN(), + /* F */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call G */ + BPF_EXIT_INSN(), + /* G */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call H */ + BPF_EXIT_INSN(), + /* H */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "call stack", + .result = REJECT, +}, { "calls: spill into caller stack frame", .insns = { -- cgit v1.2.3 From f52c97d9df983cc38a8809d0910e5eaba0c180b3 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Mon, 25 Mar 2019 15:12:15 +0100 Subject: bpf, doc: fix BTF docs reflow of bullet list Section 2.2.1 BTF_KIND_INT a bullet list was collapsed due to text reflow in commit 9ab5305dbe3f ("docs/btf: reflow text to fill up to 78 characters"). This patch correct the mistake. Also adjust next bullet list, which is used for comparison, to get rendered the same way. Fixes: 9ab5305dbe3f ("docs/btf: reflow text to fill up to 78 characters") Link: https://www.kernel.org/doc/html/latest/bpf/btf.html#btf-kind-int Signed-off-by: Jesper Dangaard Brouer Acked-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov --- Documentation/bpf/btf.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/bpf/btf.rst b/Documentation/bpf/btf.rst index 9a60a5d60e38..7313d354f20e 100644 --- a/Documentation/bpf/btf.rst +++ b/Documentation/bpf/btf.rst @@ -148,16 +148,16 @@ The ``btf_type.size * 8`` must be equal to or greater than ``BTF_INT_BITS()`` for the type. The maximum value of ``BTF_INT_BITS()`` is 128. The ``BTF_INT_OFFSET()`` specifies the starting bit offset to calculate values -for this int. For example, a bitfield struct member has: * btf member bit -offset 100 from the start of the structure, * btf member pointing to an int -type, * the int type has ``BTF_INT_OFFSET() = 2`` and ``BTF_INT_BITS() = 4`` +for this int. For example, a bitfield struct member has: + * btf member bit offset 100 from the start of the structure, + * btf member pointing to an int type, + * the int type has ``BTF_INT_OFFSET() = 2`` and ``BTF_INT_BITS() = 4`` Then in the struct memory layout, this member will occupy ``4`` bits starting from bits ``100 + 2 = 102``. Alternatively, the bitfield struct member can be the following to access the same bits as the above: - * btf member bit offset 102, * btf member pointing to an int type, * the int type has ``BTF_INT_OFFSET() = 0`` and ``BTF_INT_BITS() = 4`` -- cgit v1.2.3 From b3ccbbce1e455b8454d3935eb9ae0a5f18939e24 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 25 Feb 2019 11:20:05 -0800 Subject: i40e: fix i40e_ptp_adjtime when given a negative delta Commit 0ac30ce43323 ("i40e: fix up 32 bit timespec references", 2017-07-26) claims to be cleaning up references to 32-bit timespecs. The actual contents of the commit make no sense, as it converts a call to timespec64_add into timespec64_add_ns. This would seem ok, if (a) the change was documented in the commit message, and (b) timespec64_add_ns supported negative numbers. timespec64_add_ns doesn't work with signed deltas, because the implementation is based around iter_div_u64_rem. This change resulted in a regression where i40e_ptp_adjtime would interpret small negative adjustments as large positive additions, resulting in incorrect behavior. This commit doesn't appear to fix anything, is not well explained, and introduces a bug, so lets just revert it. Reverts: 0ac30ce43323 ("i40e: fix up 32 bit timespec references", 2017-07-26) Signed-off-by: Jacob Keller Reviewed-by: Arnd Bergmann Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_ptp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index 5fb4353c742b..31575c0bb884 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -146,12 +146,13 @@ static int i40e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) static int i40e_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) { struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps); - struct timespec64 now; + struct timespec64 now, then; + then = ns_to_timespec64(delta); mutex_lock(&pf->tmreg_lock); i40e_ptp_read(pf, &now, NULL); - timespec64_add_ns(&now, delta); + now = timespec64_add(now, then); i40e_ptp_write(pf, (const struct timespec64 *)&now); mutex_unlock(&pf->tmreg_lock); -- cgit v1.2.3 From dabb8338be533c18f50255cf39ff4f66d4dabdbe Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Sat, 2 Mar 2019 11:01:17 -0500 Subject: igb: Fix WARN_ONCE on runtime suspend The runtime_suspend device callbacks are not supposed to save configuration state or change the power state. Commit fb29f76cc566 ("igb: Fix an issue that PME is not enabled during runtime suspend") changed the driver to not save configuration state during runtime suspend, however the driver callback still put the device into a low-power state. This causes a warning in the pci pm core and results in pci_pm_runtime_suspend not calling pci_save_state or pci_finish_runtime_suspend. Fix this by not changing the power state either, leaving that to pci pm core, and make the same change for suspend callback as well. Also move a couple of defines into the appropriate header file instead of inline in the .c file. Fixes: fb29f76cc566 ("igb: Fix an issue that PME is not enabled during runtime suspend") Signed-off-by: Arvind Sankar Reviewed-by: Kai-Heng Feng Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/e1000_defines.h | 2 + drivers/net/ethernet/intel/igb/igb_main.c | 57 ++++---------------------- 2 files changed, 10 insertions(+), 49 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h index 01fcfc6f3415..d2e2c50ce257 100644 --- a/drivers/net/ethernet/intel/igb/e1000_defines.h +++ b/drivers/net/ethernet/intel/igb/e1000_defines.h @@ -194,6 +194,8 @@ /* enable link status from external LINK_0 and LINK_1 pins */ #define E1000_CTRL_SWDPIN0 0x00040000 /* SWDPIN 0 value */ #define E1000_CTRL_SWDPIN1 0x00080000 /* SWDPIN 1 value */ +#define E1000_CTRL_ADVD3WUC 0x00100000 /* D3 WUC */ +#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000 /* PHY PM enable */ #define E1000_CTRL_SDP0_DIR 0x00400000 /* SDP0 Data direction */ #define E1000_CTRL_SDP1_DIR 0x00800000 /* SDP1 Data direction */ #define E1000_CTRL_RST 0x04000000 /* Global reset */ diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 69b230c53fed..3269d8e94744 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -8740,9 +8740,7 @@ static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake, struct e1000_hw *hw = &adapter->hw; u32 ctrl, rctl, status; u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol; -#ifdef CONFIG_PM - int retval = 0; -#endif + bool wake; rtnl_lock(); netif_device_detach(netdev); @@ -8755,14 +8753,6 @@ static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake, igb_clear_interrupt_scheme(adapter); rtnl_unlock(); -#ifdef CONFIG_PM - if (!runtime) { - retval = pci_save_state(pdev); - if (retval) - return retval; - } -#endif - status = rd32(E1000_STATUS); if (status & E1000_STATUS_LU) wufc &= ~E1000_WUFC_LNKC; @@ -8779,10 +8769,6 @@ static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake, } ctrl = rd32(E1000_CTRL); - /* advertise wake from D3Cold */ - #define E1000_CTRL_ADVD3WUC 0x00100000 - /* phy power management enable */ - #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000 ctrl |= E1000_CTRL_ADVD3WUC; wr32(E1000_CTRL, ctrl); @@ -8796,12 +8782,15 @@ static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake, wr32(E1000_WUFC, 0); } - *enable_wake = wufc || adapter->en_mng_pt; - if (!*enable_wake) + wake = wufc || adapter->en_mng_pt; + if (!wake) igb_power_down_link(adapter); else igb_power_up_link(adapter); + if (enable_wake) + *enable_wake = wake; + /* Release control of h/w to f/w. If f/w is AMT enabled, this * would have already happened in close and is redundant. */ @@ -8844,22 +8833,7 @@ static void igb_deliver_wake_packet(struct net_device *netdev) static int __maybe_unused igb_suspend(struct device *dev) { - int retval; - bool wake; - struct pci_dev *pdev = to_pci_dev(dev); - - retval = __igb_shutdown(pdev, &wake, 0); - if (retval) - return retval; - - if (wake) { - pci_prepare_to_sleep(pdev); - } else { - pci_wake_from_d3(pdev, false); - pci_set_power_state(pdev, PCI_D3hot); - } - - return 0; + return __igb_shutdown(to_pci_dev(dev), NULL, 0); } static int __maybe_unused igb_resume(struct device *dev) @@ -8930,22 +8904,7 @@ static int __maybe_unused igb_runtime_idle(struct device *dev) static int __maybe_unused igb_runtime_suspend(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); - int retval; - bool wake; - - retval = __igb_shutdown(pdev, &wake, 1); - if (retval) - return retval; - - if (wake) { - pci_prepare_to_sleep(pdev); - } else { - pci_wake_from_d3(pdev, false); - pci_set_power_state(pdev, PCI_D3hot); - } - - return 0; + return __igb_shutdown(to_pci_dev(dev), NULL, 1); } static int __maybe_unused igb_runtime_resume(struct device *dev) -- cgit v1.2.3 From 7ec52b9df7d7472240fa96223185894b1897aeb0 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Fri, 15 Mar 2019 09:45:15 +0100 Subject: ixgbe: fix mdio bus registration The ixgbe ignores errors returned from mdiobus_register() and leaves adapter->mii_bus non-NULL and MDIO bus state as MDIOBUS_ALLOCATED. This triggers a BUG from mdiobus_unregister() during ixgbe_remove() call. Fixes: 8fa10ef01260 ("ixgbe: register a mdiobus") Signed-off-by: Ivan Vecera Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index cc4907f9ff02..2fb97967961c 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -905,13 +905,12 @@ s32 ixgbe_mii_bus_init(struct ixgbe_hw *hw) struct pci_dev *pdev = adapter->pdev; struct device *dev = &adapter->netdev->dev; struct mii_bus *bus; + int err = -ENODEV; - adapter->mii_bus = devm_mdiobus_alloc(dev); - if (!adapter->mii_bus) + bus = devm_mdiobus_alloc(dev); + if (!bus) return -ENOMEM; - bus = adapter->mii_bus; - switch (hw->device_id) { /* C3000 SoCs */ case IXGBE_DEV_ID_X550EM_A_KR: @@ -949,12 +948,15 @@ s32 ixgbe_mii_bus_init(struct ixgbe_hw *hw) */ hw->phy.mdio.mode_support = MDIO_SUPPORTS_C45 | MDIO_SUPPORTS_C22; - return mdiobus_register(bus); + err = mdiobus_register(bus); + if (!err) { + adapter->mii_bus = bus; + return 0; + } ixgbe_no_mii_bus: devm_mdiobus_free(dev, bus); - adapter->mii_bus = NULL; - return -ENODEV; + return err; } /** -- cgit v1.2.3 From f669d24f3dd00beab452c0fc9257f6a942ffca9b Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Thu, 21 Mar 2019 13:45:35 +0100 Subject: i40e: fix WoL support check The current check for WoL on i40e is broken. Code comment says only magic packet is supported, so only check for that. Fixes: 540a152da762 (i40e/ixgbe/igb: fail on new WoL flag setting WAKE_MAGICSECURE) Signed-off-by: Stefan Assmann Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 4c885801fa26..7874d0ec7fb0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -2573,8 +2573,7 @@ static int i40e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) return -EOPNOTSUPP; /* only magic packet is supported */ - if (wol->wolopts && (wol->wolopts != WAKE_MAGIC) - | (wol->wolopts != WAKE_FILTER)) + if (wol->wolopts & ~WAKE_MAGIC) return -EOPNOTSUPP; /* is this a new value? */ -- cgit v1.2.3 From 01ca667133d019edc9f0a1f70a272447c84ec41f Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Thu, 21 Mar 2019 22:42:23 +0800 Subject: fm10k: Fix a potential NULL pointer dereference Syzkaller report this: kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN PTI CPU: 0 PID: 4378 Comm: syz-executor.0 Tainted: G C 5.0.0+ #5 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 RIP: 0010:__lock_acquire+0x95b/0x3200 kernel/locking/lockdep.c:3573 Code: 00 0f 85 28 1e 00 00 48 81 c4 08 01 00 00 5b 5d 41 5c 41 5d 41 5e 41 5f c3 4c 89 ea 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> 3c 02 00 0f 85 cc 24 00 00 49 81 7d 00 e0 de 03 a6 41 bc 00 00 RSP: 0018:ffff8881e3c07a40 EFLAGS: 00010002 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000010 RSI: 0000000000000000 RDI: 0000000000000080 RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000 R10: ffff8881e3c07d98 R11: ffff8881c7f21f80 R12: 0000000000000001 R13: 0000000000000080 R14: 0000000000000000 R15: 0000000000000001 FS: 00007fce2252e700(0000) GS:ffff8881f2400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fffc7eb0228 CR3: 00000001e5bea002 CR4: 00000000007606f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: lock_acquire+0xff/0x2c0 kernel/locking/lockdep.c:4211 __mutex_lock_common kernel/locking/mutex.c:925 [inline] __mutex_lock+0xdf/0x1050 kernel/locking/mutex.c:1072 drain_workqueue+0x24/0x3f0 kernel/workqueue.c:2934 destroy_workqueue+0x23/0x630 kernel/workqueue.c:4319 __do_sys_delete_module kernel/module.c:1018 [inline] __se_sys_delete_module kernel/module.c:961 [inline] __x64_sys_delete_module+0x30c/0x480 kernel/module.c:961 do_syscall_64+0x9f/0x450 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x462e99 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fce2252dc58 EFLAGS: 00000246 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 000000000073bf00 RCX: 0000000000462e99 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000020000140 RBP: 0000000000000002 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fce2252e6bc R13: 00000000004bcca9 R14: 00000000006f6b48 R15: 00000000ffffffff If alloc_workqueue fails, it should return -ENOMEM, otherwise may trigger this NULL pointer dereference while unloading drivers. Reported-by: Hulk Robot Fixes: 0a38c17a21a0 ("fm10k: Remove create_workqueue") Signed-off-by: Yue Haibing Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 5a0419421511..ecef949f3baa 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -41,6 +41,8 @@ static int __init fm10k_init_module(void) /* create driver workqueue */ fm10k_workqueue = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, fm10k_driver_name); + if (!fm10k_workqueue) + return -ENOMEM; fm10k_dbg_init(); -- cgit v1.2.3 From 738c06d0e4562e0acf9f2c7438a22b2d5afc67aa Mon Sep 17 00:00:00 2001 From: KT Liao Date: Tue, 26 Mar 2019 17:28:32 -0700 Subject: Input: elan_i2c - add hardware ID for multiple Lenovo laptops There are many Lenovo laptops which need elan_i2c support, this patch adds relevant IDs to the Elan driver so that touchpads are recognized. Signed-off-by: KT Liao Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elan_i2c_core.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 628ef617bb2f..f9525d6f0bfe 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1339,21 +1339,46 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0600", 0 }, { "ELAN0601", 0 }, { "ELAN0602", 0 }, + { "ELAN0603", 0 }, + { "ELAN0604", 0 }, { "ELAN0605", 0 }, + { "ELAN0606", 0 }, + { "ELAN0607", 0 }, { "ELAN0608", 0 }, { "ELAN0609", 0 }, { "ELAN060B", 0 }, { "ELAN060C", 0 }, + { "ELAN060F", 0 }, + { "ELAN0610", 0 }, { "ELAN0611", 0 }, { "ELAN0612", 0 }, + { "ELAN0615", 0 }, + { "ELAN0616", 0 }, { "ELAN0617", 0 }, { "ELAN0618", 0 }, + { "ELAN0619", 0 }, + { "ELAN061A", 0 }, + { "ELAN061B", 0 }, { "ELAN061C", 0 }, { "ELAN061D", 0 }, { "ELAN061E", 0 }, + { "ELAN061F", 0 }, { "ELAN0620", 0 }, { "ELAN0621", 0 }, { "ELAN0622", 0 }, + { "ELAN0623", 0 }, + { "ELAN0624", 0 }, + { "ELAN0625", 0 }, + { "ELAN0626", 0 }, + { "ELAN0627", 0 }, + { "ELAN0628", 0 }, + { "ELAN0629", 0 }, + { "ELAN062A", 0 }, + { "ELAN062B", 0 }, + { "ELAN062C", 0 }, + { "ELAN062D", 0 }, + { "ELAN0631", 0 }, + { "ELAN0632", 0 }, { "ELAN1000", 0 }, { } }; -- cgit v1.2.3 From 07ba9e7be423423043c5090a2f395c0da26e1b3d Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 18 Jan 2019 11:18:17 -0800 Subject: Input: document meanings of KEY_SCREEN and KEY_ZOOM It is hard to say what KEY_SCREEN and KEY_ZOOM mean, but historically DVB folks have used them to indicate switch to full screen mode. Later, they converged on using KEY_ZOOM to switch into full screen mode and KEY)SCREEN to control aspect ratio (see Documentation/media/uapi/rc/rc-tables.rst). Let's commit to these uses, and define: - KEY_FULL_SCREEN (and make KEY_ZOOM its alias) - KEY_ASPECT_RATIO (and make KEY_SCREEN its alias) Signed-off-by: Dmitry Torokhov --- include/uapi/linux/input-event-codes.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index ae366b87426a..bc5054e51bef 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -439,10 +439,12 @@ #define KEY_TITLE 0x171 #define KEY_SUBTITLE 0x172 #define KEY_ANGLE 0x173 -#define KEY_ZOOM 0x174 +#define KEY_FULL_SCREEN 0x174 /* AC View Toggle */ +#define KEY_ZOOM KEY_FULL_SCREEN #define KEY_MODE 0x175 #define KEY_KEYBOARD 0x176 -#define KEY_SCREEN 0x177 +#define KEY_ASPECT_RATIO 0x177 /* HUTRR37: Aspect */ +#define KEY_SCREEN KEY_ASPECT_RATIO #define KEY_PC 0x178 /* Media Select Computer */ #define KEY_TV 0x179 /* Media Select TV */ #define KEY_TV2 0x17a /* Media Select Cable */ -- cgit v1.2.3 From 2291da5b4d305a6506d915d5bef85dedd7c94882 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 18 Jan 2019 13:37:40 -0800 Subject: [media] doc-rst: switch to new names for Full Screen/Aspect keys We defined better names for keys to activate full screen mode or change aspect ratio (while keeping the existing keycodes to avoid breaking userspace), so let's use them in the document. Signed-off-by: Dmitry Torokhov --- Documentation/media/uapi/rc/rc-tables.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/media/uapi/rc/rc-tables.rst b/Documentation/media/uapi/rc/rc-tables.rst index c8ae9479f842..57797e56f45e 100644 --- a/Documentation/media/uapi/rc/rc-tables.rst +++ b/Documentation/media/uapi/rc/rc-tables.rst @@ -616,7 +616,7 @@ the remote via /dev/input/event devices. - .. row 78 - - ``KEY_SCREEN`` + - ``KEY_ASPECT_RATIO`` - Select screen aspect ratio @@ -624,7 +624,7 @@ the remote via /dev/input/event devices. - .. row 79 - - ``KEY_ZOOM`` + - ``KEY_FULL_SCREEN`` - Put device into zoom/full screen mode -- cgit v1.2.3 From f7b3d85aa7a31a90c3ef5b0992604db339a615ab Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 18 Jan 2019 13:44:23 -0800 Subject: HID: input: fix mapping of aspect ratio key According to HUTRR37 usage 0x6d from the consumer usage page corresponds to action that selects the next available supported aspect ratio option on a device which outputs or displays video. However KEY_ZOOM means activate "Full Screen" mode, KEY_ASPECT_RATIO should be used instead. Signed-off-by: Dmitry Torokhov --- drivers/hid/hid-input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index d6fab5798487..def58c6aa835 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -891,7 +891,7 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel case 0x06a: map_key_clear(KEY_GREEN); break; case 0x06b: map_key_clear(KEY_BLUE); break; case 0x06c: map_key_clear(KEY_YELLOW); break; - case 0x06d: map_key_clear(KEY_ZOOM); break; + case 0x06d: map_key_clear(KEY_ASPECT_RATIO); break; case 0x06f: map_key_clear(KEY_BRIGHTNESSUP); break; case 0x070: map_key_clear(KEY_BRIGHTNESSDOWN); break; -- cgit v1.2.3 From 96dd86871e1fffbc39e4fa61c9c75ec54ee9af0f Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 18 Jan 2019 13:59:08 -0800 Subject: HID: input: add mapping for Expose/Overview key MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to HUTRR77 usage 0x29f from the consumer page is reserved for the Desktop application to present all running user’s application windows. Linux defines KEY_SCALE to request Compiz Scale (Expose) mode, so let's add the mapping. Signed-off-by: Dmitry Torokhov --- drivers/hid/hid-input.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index def58c6aa835..5f800e7b04f2 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -1030,6 +1030,8 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel case 0x2cb: map_key_clear(KEY_KBDINPUTASSIST_ACCEPT); break; case 0x2cc: map_key_clear(KEY_KBDINPUTASSIST_CANCEL); break; + case 0x29f: map_key_clear(KEY_SCALE); break; + default: map_key_clear(KEY_UNKNOWN); } break; -- cgit v1.2.3 From 7975a1d6a7afeb3eb61c971a153d24dd8fa032f3 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 18 Jan 2019 14:05:52 -0800 Subject: HID: input: add mapping for keyboard Brightness Up/Down/Toggle keys According to HUTRR73 usages 0x79, 0x7a and 0x7c from the consumer page correspond to Brightness Up/Down/Toggle keys, so let's add the mappings. Signed-off-by: Dmitry Torokhov --- drivers/hid/hid-input.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 5f800e7b04f2..cebe8a8cce2e 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -900,6 +900,10 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel case 0x074: map_key_clear(KEY_BRIGHTNESS_MAX); break; case 0x075: map_key_clear(KEY_BRIGHTNESS_AUTO); break; + case 0x079: map_key_clear(KEY_KBDILLUMUP); break; + case 0x07a: map_key_clear(KEY_KBDILLUMDOWN); break; + case 0x07c: map_key_clear(KEY_KBDILLUMTOGGLE); break; + case 0x082: map_key_clear(KEY_VIDEO_NEXT); break; case 0x083: map_key_clear(KEY_LAST); break; case 0x084: map_key_clear(KEY_ENTER); break; -- cgit v1.2.3 From afbbaa1bc0011d28f7604f9a7f0532f997c6f45e Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 18 Jan 2019 14:20:27 -0800 Subject: HID: input: add mapping for "Full Screen" key According to HUT 1.12 usage 0x232 from the consumer page is reserved for switching application between full screen and windowed mode, so let's add the mapping. Signed-off-by: Dmitry Torokhov --- drivers/hid/hid-input.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index cebe8a8cce2e..ecb1b6f26853 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -1014,6 +1014,7 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel case 0x22d: map_key_clear(KEY_ZOOMIN); break; case 0x22e: map_key_clear(KEY_ZOOMOUT); break; case 0x22f: map_key_clear(KEY_ZOOMRESET); break; + case 0x232: map_key_clear(KEY_FULL_SCREEN); break; case 0x233: map_key_clear(KEY_SCROLLUP); break; case 0x234: map_key_clear(KEY_SCROLLDOWN); break; case 0x238: map_rel(REL_HWHEEL); break; -- cgit v1.2.3 From c01908a14bf735b871170092807c618bb9dae654 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 18 Jan 2019 14:35:45 -0800 Subject: HID: input: add mapping for "Toggle Display" key According to HUT 1.12 usage 0xb5 from the generic desktop page is reserved for switching between external and internal display, so let's add the mapping. Signed-off-by: Dmitry Torokhov --- drivers/hid/hid-input.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index ecb1b6f26853..da76358cde06 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -677,6 +677,14 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel break; } + if ((usage->hid & 0xf0) == 0xb0) { /* SC - Display */ + switch (usage->hid & 0xf) { + case 0x05: map_key_clear(KEY_SWITCHVIDEOMODE); break; + default: goto ignore; + } + break; + } + /* * Some lazy vendors declare 255 usages for System Control, * leading to the creation of ABS_X|Y axis and too many others. -- cgit v1.2.3 From dbee9c9c45846f003ec2f819710c2f4835630a6a Mon Sep 17 00:00:00 2001 From: Alan Kao Date: Fri, 22 Mar 2019 14:37:04 +0800 Subject: riscv: fix accessing 8-byte variable from RV32 A memory save operation to 8-byte variable in RV32 is divided into two sw instructions in the put_user macro. The current fixup returns execution flow to the second sw instead of the one after it. This patch fixes this fixup code according to the load access part. Signed-off-by: Alan Kao Cc: Greentime Hu Cc: Vincent Chen Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index a00168b980d2..fb53a8089e76 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -300,7 +300,7 @@ do { \ " .balign 4\n" \ "4:\n" \ " li %0, %6\n" \ - " jump 2b, %1\n" \ + " jump 3b, %1\n" \ " .previous\n" \ " .section __ex_table,\"a\"\n" \ " .balign " RISCV_SZPTR "\n" \ -- cgit v1.2.3 From 387181dcdb6c1ee254efab4744846a7a53d4c4cb Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Tue, 26 Mar 2019 08:03:47 +0000 Subject: RISC-V: Always compile mm/init.c with cmodel=medany and notrace The Linux RISC-V 32bit kernel is broken after we moved setup_vm() from kernel/setup.c to mm/init.c because Linux RISC-V 32bit kernel by default uses cmodel=medlow which results in a non-position-independent setup_vm(). This patch fixes Linux RISC-V 32bit kernel booting by: 1. Forcing cmodel=medany for mm/init.c 2. Moving remaing MM-related stuff va_pa_offset, pfn_base and empty_zero_page from kernel/setup.c to mm/init.c Further, the setup_vm() cannot handle GCC instrumentation for FTRACE so we disable it for mm/init.c by not using "-pg" compiler flag. Fixes: 6f1e9e946f0b ("RISC-V: Move setup_vm() to mm/init.c") Suggested-by: Christoph Hellwig Suggested-by: Mike Rapoport Signed-off-by: Anup Patel Reviewed-by: Mike Rapoport Reviewed-by: Christoph Hellwig Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/Makefile | 3 --- arch/riscv/kernel/setup.c | 8 -------- arch/riscv/mm/Makefile | 6 ++++++ arch/riscv/mm/init.c | 28 ++++++++++++++++++++++++++++ 4 files changed, 34 insertions(+), 11 deletions(-) diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index f13f7f276639..598568168d35 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -4,7 +4,6 @@ ifdef CONFIG_FTRACE CFLAGS_REMOVE_ftrace.o = -pg -CFLAGS_REMOVE_setup.o = -pg endif extra-y += head.o @@ -29,8 +28,6 @@ obj-y += vdso.o obj-y += cacheinfo.o obj-y += vdso/ -CFLAGS_setup.o := -mcmodel=medany - obj-$(CONFIG_FPU) += fpu.o obj-$(CONFIG_SMP) += smpboot.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index ecb654f6a79e..540a331d1376 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -48,14 +48,6 @@ struct screen_info screen_info = { }; #endif -unsigned long va_pa_offset; -EXPORT_SYMBOL(va_pa_offset); -unsigned long pfn_base; -EXPORT_SYMBOL(pfn_base); - -unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; -EXPORT_SYMBOL(empty_zero_page); - /* The lucky hart to first increment this variable will boot the other cores */ atomic_t hart_lottery; unsigned long boot_cpu_hartid; diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index eb22ab49b3e0..b68aac701803 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -1,3 +1,9 @@ + +CFLAGS_init.o := -mcmodel=medany +ifdef CONFIG_FTRACE +CFLAGS_REMOVE_init.o = -pg +endif + obj-y += init.o obj-y += fault.o obj-y += extable.o diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index b379a75ac6a6..5fd8c922e1c2 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -25,6 +25,10 @@ #include #include +unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] + __page_aligned_bss; +EXPORT_SYMBOL(empty_zero_page); + static void __init zone_sizes_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES] = { 0, }; @@ -143,6 +147,11 @@ void __init setup_bootmem(void) } } +unsigned long va_pa_offset; +EXPORT_SYMBOL(va_pa_offset); +unsigned long pfn_base; +EXPORT_SYMBOL(pfn_base); + pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss; pgd_t trampoline_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); @@ -172,6 +181,25 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) } } +/* + * setup_vm() is called from head.S with MMU-off. + * + * Following requirements should be honoured for setup_vm() to work + * correctly: + * 1) It should use PC-relative addressing for accessing kernel symbols. + * To achieve this we always use GCC cmodel=medany. + * 2) The compiler instrumentation for FTRACE will not work for setup_vm() + * so disable compiler instrumentation when FTRACE is enabled. + * + * Currently, the above requirements are honoured by using custom CFLAGS + * for init.o in mm/Makefile. + */ + +#ifndef __riscv_cmodel_medany +#error "setup_vm() is called from head.S before relocate so it should " + "not use absolute addressing." +#endif + asmlinkage void __init setup_vm(void) { extern char _start; -- cgit v1.2.3 From 14bc29646639650e38f061fca6f644706cc25034 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Mon, 7 Jan 2019 14:09:34 +0100 Subject: drm/omap: fix typo Fix spelling mistake: "lenght" -> "length" Signed-off-by: Matteo Croce Signed-off-by: Tomi Valkeinen Link: https://patchwork.freedesktop.org/patch/msgid/20190107130934.9997-1-mcroce@redhat.com --- drivers/gpu/drm/omapdrm/dss/hdmi4_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c b/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c index 813ba42f2753..e384b95ad857 100644 --- a/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c +++ b/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c @@ -708,7 +708,7 @@ int hdmi4_audio_config(struct hdmi_core_data *core, struct hdmi_wp_data *wp, else acore.i2s_cfg.justification = HDMI_AUDIO_JUSTIFY_RIGHT; /* - * The I2S input word length is twice the lenght given in the IEC-60958 + * The I2S input word length is twice the length given in the IEC-60958 * status word. If the word size is greater than * 20 bits, increment by one. */ -- cgit v1.2.3 From 36a1da15b5df493241b0011d2185fdd724ac1ed1 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 26 Mar 2019 08:14:37 -0700 Subject: drm/omap: hdmi4_cec: Fix CEC clock handling for PM If CONFIG_OMAP4_DSS_HDMI_CEC is enabled in .config, deeper SoC idle states are blocked because the CEC clock gets always enabled on init. Let's fix the issue by moving the CEC clock handling to happen later in hdmi_cec_adap_enable() as suggested by Hans Verkuil . This way the CEC clock gets only enabled when needed. This can be tested by doing cec-ctl --playback to enable the CEC, and doing cec-ctl --clear to disable it. Let's also fix the typo for "divider" in the comments while at it. Fixes: 8d7f934df8d8 ("omapdrm: hdmi4_cec: add OMAP4 HDMI CEC support") Suggested-by: Hans Verkuil Cc: Hans Verkuil Cc: Jyri Sarha Cc: Laurent Pinchart Signed-off-by: Tony Lindgren Reviewed-by: Hans Verkuil Signed-off-by: Tomi Valkeinen Link: https://patchwork.freedesktop.org/patch/msgid/20190326151438.32414-1-tony@atomide.com --- drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c b/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c index 340383150fb9..ebf9c96d43ee 100644 --- a/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c +++ b/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c @@ -175,6 +175,7 @@ static int hdmi_cec_adap_enable(struct cec_adapter *adap, bool enable) REG_FLD_MOD(core->base, HDMI_CORE_SYS_INTR_UNMASK4, 0, 3, 3); hdmi_wp_clear_irqenable(core->wp, HDMI_IRQ_CORE); hdmi_wp_set_irqstatus(core->wp, HDMI_IRQ_CORE); + REG_FLD_MOD(core->wp->base, HDMI_WP_CLK, 0, 5, 0); hdmi4_core_disable(core); return 0; } @@ -182,16 +183,24 @@ static int hdmi_cec_adap_enable(struct cec_adapter *adap, bool enable) if (err) return err; + /* + * Initialize CEC clock divider: CEC needs 2MHz clock hence + * set the divider to 24 to get 48/24=2MHz clock + */ + REG_FLD_MOD(core->wp->base, HDMI_WP_CLK, 0x18, 5, 0); + /* Clear TX FIFO */ if (!hdmi_cec_clear_tx_fifo(adap)) { pr_err("cec-%s: could not clear TX FIFO\n", adap->name); - return -EIO; + err = -EIO; + goto err_disable_clk; } /* Clear RX FIFO */ if (!hdmi_cec_clear_rx_fifo(adap)) { pr_err("cec-%s: could not clear RX FIFO\n", adap->name); - return -EIO; + err = -EIO; + goto err_disable_clk; } /* Clear CEC interrupts */ @@ -236,6 +245,12 @@ static int hdmi_cec_adap_enable(struct cec_adapter *adap, bool enable) hdmi_write_reg(core->base, HDMI_CEC_INT_STATUS_1, temp); } return 0; + +err_disable_clk: + REG_FLD_MOD(core->wp->base, HDMI_WP_CLK, 0, 5, 0); + hdmi4_core_disable(core); + + return err; } static int hdmi_cec_adap_log_addr(struct cec_adapter *adap, u8 log_addr) @@ -333,11 +348,8 @@ int hdmi4_cec_init(struct platform_device *pdev, struct hdmi_core_data *core, return ret; core->wp = wp; - /* - * Initialize CEC clock divider: CEC needs 2MHz clock hence - * set the devider to 24 to get 48/24=2MHz clock - */ - REG_FLD_MOD(core->wp->base, HDMI_WP_CLK, 0x18, 5, 0); + /* Disable clock initially, hdmi_cec_adap_enable() manages it */ + REG_FLD_MOD(core->wp->base, HDMI_WP_CLK, 0, 5, 0); ret = cec_register_adapter(core->adap, &pdev->dev); if (ret < 0) { -- cgit v1.2.3 From 2bafa1e9625400bec4c840a168d70ba52607a58d Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Tue, 26 Mar 2019 09:55:54 -0700 Subject: HID: quirks: Fix keyboard + touchpad on Lenovo Miix 630 Similar to commit edfc3722cfef ("HID: quirks: Fix keyboard + touchpad on Toshiba Click Mini not working"), the Lenovo Miix 630 has a combo keyboard/touchpad device with vid:pid of 04F3:0400, which is shared with Elan touchpads. The combo on the Miix 630 has an ACPI id of QTEC0001, which is not claimed by the elan_i2c driver, so key on that similar to what was done for the Toshiba Click Mini. Signed-off-by: Jeffrey Hugo Signed-off-by: Jiri Kosina --- drivers/hid/hid-quirks.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 1148d8c0816a..77ffba48cc73 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -715,7 +715,6 @@ static const struct hid_device_id hid_ignore_list[] = { { HID_USB_DEVICE(USB_VENDOR_ID_DEALEXTREAME, USB_DEVICE_ID_DEALEXTREAME_RADIO_SI4701) }, { HID_USB_DEVICE(USB_VENDOR_ID_DELORME, USB_DEVICE_ID_DELORME_EARTHMATE) }, { HID_USB_DEVICE(USB_VENDOR_ID_DELORME, USB_DEVICE_ID_DELORME_EM_LT20) }, - { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, 0x0400) }, { HID_USB_DEVICE(USB_VENDOR_ID_ESSENTIAL_REALITY, USB_DEVICE_ID_ESSENTIAL_REALITY_P5) }, { HID_USB_DEVICE(USB_VENDOR_ID_ETT, USB_DEVICE_ID_TC5UH) }, { HID_USB_DEVICE(USB_VENDOR_ID_ETT, USB_DEVICE_ID_TC4UM) }, @@ -996,6 +995,10 @@ bool hid_ignore(struct hid_device *hdev) if (hdev->product == 0x0401 && strncmp(hdev->name, "ELAN0800", 8) != 0) return true; + /* Same with product id 0x0400 */ + if (hdev->product == 0x0400 && + strncmp(hdev->name, "QTEC0001", 8) != 0) + return true; break; } -- cgit v1.2.3 From 9ec71c1cdbdd6c4ac0150a51d64e06c5d1bd207e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 26 Mar 2019 22:00:06 -0700 Subject: libbpf: fix btf_dedup equivalence check handling of different kinds btf_dedup_is_equiv() used to compare btf_type->info fields, before doing kind-specific equivalence check. This comparsion implicitly verified that candidate and canonical types are of the same kind. With enum fwd resolution logic this check couldn't be done generically anymore, as for enums info contains vlen, which differs between enum fwd and fully-defined enum, so this check was subsumed by kind-specific equivalence checks. This change caused btf_dedup_is_equiv() to let through VOID vs other types check to reach switch, which was never meant to be handing VOID kind, as VOID kind is always pre-resolved to itself and is only equivalent to itself, which is checked early in btf_dedup_is_equiv(). This change adds back BTF kind equality check in place of more generic btf_type->info check, still defering further kind-specific checks to a per-kind switch. Fixes: 9768095ba97c ("btf: resolve enum fwds in btf_dedup") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/btf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 87e3020ac1bc..cf119c9b6f27 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -2107,6 +2107,9 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, return fwd_kind == real_kind; } + if (cand_kind != canon_kind) + return 0; + switch (cand_kind) { case BTF_KIND_INT: return btf_equal_int(cand_type, canon_type); -- cgit v1.2.3 From eb76899ce749507e09cad6816f32cede14a9b7ee Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 26 Mar 2019 22:00:07 -0700 Subject: selftests/bpf: add btf_dedup test for VOID equivalence check This patch adds specific test exposing bug in btf_dedup_is_equiv() when comparing candidate VOID type to a non-VOID canonical type. It's important for canonical type to be anonymous, otherwise name equality check will do the right thing and will exit early. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_btf.c | 47 ++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index 23e3b314ca60..ec5794e4205b 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -5776,6 +5776,53 @@ const struct btf_dedup_test dedup_tests[] = { .dedup_table_size = 1, /* force hash collisions */ }, }, +{ + .descr = "dedup: void equiv check", + /* + * // CU 1: + * struct s { + * struct {} *x; + * }; + * // CU 2: + * struct s { + * int *x; + * }; + */ + .input = { + .raw_types = { + /* CU 1 */ + BTF_STRUCT_ENC(0, 0, 1), /* [1] struct {} */ + BTF_PTR_ENC(1), /* [2] ptr -> [1] */ + BTF_STRUCT_ENC(NAME_NTH(1), 1, 8), /* [3] struct s */ + BTF_MEMBER_ENC(NAME_NTH(2), 2, 0), + /* CU 2 */ + BTF_PTR_ENC(0), /* [4] ptr -> void */ + BTF_STRUCT_ENC(NAME_NTH(1), 1, 8), /* [5] struct s */ + BTF_MEMBER_ENC(NAME_NTH(2), 4, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0s\0x"), + }, + .expect = { + .raw_types = { + /* CU 1 */ + BTF_STRUCT_ENC(0, 0, 1), /* [1] struct {} */ + BTF_PTR_ENC(1), /* [2] ptr -> [1] */ + BTF_STRUCT_ENC(NAME_NTH(1), 1, 8), /* [3] struct s */ + BTF_MEMBER_ENC(NAME_NTH(2), 2, 0), + /* CU 2 */ + BTF_PTR_ENC(0), /* [4] ptr -> void */ + BTF_STRUCT_ENC(NAME_NTH(1), 1, 8), /* [5] struct s */ + BTF_MEMBER_ENC(NAME_NTH(2), 4, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0s\0x"), + }, + .opts = { + .dont_resolve_fwds = false, + .dedup_table_size = 1, /* force hash collisions */ + }, +}, { .descr = "dedup: all possible kinds (no duplicates)", .input = { -- cgit v1.2.3 From 025c65e119bf58b610549ca359c9ecc5dee6a8d2 Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Tue, 26 Mar 2019 13:20:43 +0100 Subject: xfrm: Honor original L3 slave device in xfrmi policy lookup If an xfrmi is associated to a vrf layer 3 master device, xfrm_policy_check() fails after traffic decapsulation. The input interface is replaced by the layer 3 master device, and hence xfrmi_decode_session() can't match the xfrmi anymore to satisfy policy checking. Extend ingress xfrmi lookup to honor the original layer 3 slave device, allowing xfrm interfaces to operate within a vrf domain. Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces") Signed-off-by: Martin Willi Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 3 ++- net/xfrm/xfrm_interface.c | 17 ++++++++++++++--- net/xfrm/xfrm_policy.c | 2 +- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 902437dfbce7..c9b0b2b5d672 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -295,7 +295,8 @@ struct xfrm_replay { }; struct xfrm_if_cb { - struct xfrm_if *(*decode_session)(struct sk_buff *skb); + struct xfrm_if *(*decode_session)(struct sk_buff *skb, + unsigned short family); }; void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb); diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index dbb3c1945b5c..85fec98676d3 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -70,17 +70,28 @@ static struct xfrm_if *xfrmi_lookup(struct net *net, struct xfrm_state *x) return NULL; } -static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb) +static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb, + unsigned short family) { struct xfrmi_net *xfrmn; - int ifindex; struct xfrm_if *xi; + int ifindex = 0; if (!secpath_exists(skb) || !skb->dev) return NULL; + switch (family) { + case AF_INET6: + ifindex = inet6_sdif(skb); + break; + case AF_INET: + ifindex = inet_sdif(skb); + break; + } + if (!ifindex) + ifindex = skb->dev->ifindex; + xfrmn = net_generic(xs_net(xfrm_input_state(skb)), xfrmi_net_id); - ifindex = skb->dev->ifindex; for_each_xfrmi_rcu(xfrmn->xfrmi[0], xi) { if (ifindex == xi->dev->ifindex && diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 8d1a898d0ba5..a6b58df7a70f 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3313,7 +3313,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, ifcb = xfrm_if_get_cb(); if (ifcb) { - xi = ifcb->decode_session(skb); + xi = ifcb->decode_session(skb, family); if (xi) { if_id = xi->p.if_id; net = xi->net; -- cgit v1.2.3 From 486fa92df4707b5df58d6508728bdb9321a59766 Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Mon, 25 Mar 2019 16:55:27 -0500 Subject: libnvdimm/btt: Fix a kmemdup failure check In case kmemdup fails, the fix releases resources and returns to avoid the NULL pointer dereference. Signed-off-by: Aditya Pakki Signed-off-by: Dan Williams --- drivers/nvdimm/btt_devs.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index b72a303176c7..9486acc08402 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -198,14 +198,15 @@ static struct device *__nd_btt_create(struct nd_region *nd_region, return NULL; nd_btt->id = ida_simple_get(&nd_region->btt_ida, 0, 0, GFP_KERNEL); - if (nd_btt->id < 0) { - kfree(nd_btt); - return NULL; - } + if (nd_btt->id < 0) + goto out_nd_btt; nd_btt->lbasize = lbasize; - if (uuid) + if (uuid) { uuid = kmemdup(uuid, 16, GFP_KERNEL); + if (!uuid) + goto out_put_id; + } nd_btt->uuid = uuid; dev = &nd_btt->dev; dev_set_name(dev, "btt%d.%d", nd_region->id, nd_btt->id); @@ -220,6 +221,13 @@ static struct device *__nd_btt_create(struct nd_region *nd_region, return NULL; } return dev; + +out_put_id: + ida_simple_remove(&nd_region->btt_ida, nd_btt->id); + +out_nd_btt: + kfree(nd_btt); + return NULL; } struct device *nd_btt_create(struct nd_region *nd_region) -- cgit v1.2.3 From 662d66466637862ef955f7f6e78a286d8cf0ebef Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Mon, 18 Mar 2019 09:55:19 -0700 Subject: IB/hfi1: Failed to drain send queue when QP is put into error state When a QP is put into error state, all pending requests in the send work queue should be drained. The following sequence of events could lead to a failure, causing a request to hang: (1) The QP builds a packet and tries to send through SDMA engine. However, PIO engine is still busy. Consequently, this packet is put on the QP's tx list and the QP is put on the PIO waiting list. The field qp->s_flags is set with HFI1_S_WAIT_PIO_DRAIN; (2) The QP is put into error state by the user application and notify_error_qp() is called, which removes the QP from the PIO waiting list and the packet from the QP's tx list. In addition, qp->s_flags is cleared of RVT_S_ANY_WAIT_IO bits, which does not include HFI1_S_WAIT_PIO_DRAIN bit; (3) The hfi1_schdule_send() function is called to drain the QP's send queue. Subsequently, hfi1_do_send() is called. Since the flag bit HFI1_S_WAIT_PIO_DRAIN is set in qp->s_flags, hfi1_send_ok() fails. As a result, hfi1_do_send() bails out without draining any request from the send queue; (4) The PIO engine completes the sending and tries to wake up any QP on its waiting list. But the QP has been removed from the PIO waiting list and therefore is kept in sleep forever. The fix is to clear qp->s_flags of HFI1_S_ANY_WAIT_IO bits in step (2). HFI1_S_ANY_WAIT_IO includes RVT_S_ANY_WAIT_IO and HFI1_S_WAIT_PIO_DRAIN. Fixes: 2e2ba09e48b7 ("IB/rdmavt, IB/hfi1: Create device dependent s_flags") Cc: # 4.19.x+ Reviewed-by: Mike Marciniszyn Reviewed-by: Alex Estrin Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 9b643c2409cf..64889eda1735 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -898,7 +898,7 @@ void notify_error_qp(struct rvt_qp *qp) if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY) && !(priv->s_flags & RVT_S_BUSY)) { - qp->s_flags &= ~RVT_S_ANY_WAIT_IO; + qp->s_flags &= ~HFI1_S_ANY_WAIT_IO; list_del_init(&priv->s_iowait.list); priv->s_iowait.lock = NULL; rvt_put_qp(qp); -- cgit v1.2.3 From 93b289b9aff66eca7575b09f36f5abbeca8e6167 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Mon, 18 Mar 2019 09:55:29 -0700 Subject: IB/hfi1: Clear the IOWAIT pending bits when QP is put into error state When a QP is put into error state, it may be waiting for send engine resources. In this case, the QP will be removed from the send engine's waiting list, but its IOWAIT pending bits are not cleared. This will normally not have any major impact as the QP is being destroyed. However, the QP still needs to wind down its operations, such as draining the send queue by scheduling the send engine. Clearing the pending bits will avoid any potential complications. In addition, if the QP will eventually hang, clearing the pending bits can help debugging by presenting a consistent picture if the user dumps the qp_stats. This patch clears a QP's IOWAIT_PENDING_IB and IO_PENDING_TID bits in priv->s_iowait.flags in this case. Fixes: 5da0fc9dbf89 ("IB/hfi1: Prepare resource waits for dual leg") Reviewed-by: Mike Marciniszyn Reviewed-by: Alex Estrin Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/qp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 64889eda1735..eba300330a02 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -899,6 +899,8 @@ void notify_error_qp(struct rvt_qp *qp) !(qp->s_flags & RVT_S_BUSY) && !(priv->s_flags & RVT_S_BUSY)) { qp->s_flags &= ~HFI1_S_ANY_WAIT_IO; + iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB); + iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_TID); list_del_init(&priv->s_iowait.list); priv->s_iowait.lock = NULL; rvt_put_qp(qp); -- cgit v1.2.3 From a8639a79e85c18c16c10089edd589c7948f19bbd Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Mon, 18 Mar 2019 09:55:39 -0700 Subject: IB/hfi1: Eliminate opcode tests on mr deref When an old ack_queue entry is used to store an incoming request, it may need to clean up the old entry if it is still referencing the MR. Originally only RDMA READ request needed to reference MR on the responder side and therefore the opcode was tested when cleaning up the old entry. The introduction of tid rdma specific operations in the ack_queue makes the specific opcode tests wrong. Multiple opcodes (RDMA READ, TID RDMA READ, and TID RDMA WRITE) may need MR ref cleanup. Remove the opcode specific tests associated with the ack_queue. Fixes: f48ad614c100 ("IB/hfi1: Move driver out of staging") Signed-off-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/rc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index e6726c1ab866..5991211d72bd 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -3088,7 +3088,7 @@ send_last: update_ack_queue(qp, next); } e = &qp->s_ack_queue[qp->r_head_ack_queue]; - if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { + if (e->rdma_sge.mr) { rvt_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } @@ -3166,7 +3166,7 @@ send_last: update_ack_queue(qp, next); } e = &qp->s_ack_queue[qp->r_head_ack_queue]; - if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { + if (e->rdma_sge.mr) { rvt_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } -- cgit v1.2.3 From d0294344470e6b52d097aa7369173f32d11f2f52 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Mon, 18 Mar 2019 09:55:49 -0700 Subject: IB/hfi1: Fix the allocation of RSM table The receive side mapping (RSM) on hfi1 hardware is a special matching mechanism to direct an incoming packet to a given hardware receive context. It has 4 instances of matching capabilities (RSM0 - RSM3) that share the same RSM table (RMT). The RMT has a total of 256 entries, each of which points to a receive context. Currently, three instances of RSM have been used: 1. RSM0 by QOS; 2. RSM1 by PSM FECN; 3. RSM2 by VNIC. Each RSM instance should reserve enough entries in RMT to function properly. Since both PSM and VNIC could allocate any receive context between dd->first_dyn_alloc_ctxt and dd->num_rcv_contexts, PSM FECN must reserve enough RMT entries to cover the entire receive context index range (dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt) instead of only the user receive contexts allocated for PSM (dd->num_user_contexts). Consequently, the sizing of dd->num_user_contexts in set_up_context_variables is incorrect. Fixes: 2280740f01ae ("IB/hfi1: Virtual Network Interface Controller (VNIC) HW support") Reviewed-by: Mike Marciniszyn Reviewed-by: Michael J. Ruhl Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/chip.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 612f04190ed8..9784c6c0d2ec 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -13232,7 +13232,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd) int total_contexts; int ret; unsigned ngroups; - int qos_rmt_count; + int rmt_count; int user_rmt_reduced; u32 n_usr_ctxts; u32 send_contexts = chip_send_contexts(dd); @@ -13294,10 +13294,20 @@ static int set_up_context_variables(struct hfi1_devdata *dd) n_usr_ctxts = rcv_contexts - total_contexts; } - /* each user context requires an entry in the RMT */ - qos_rmt_count = qos_rmt_entries(dd, NULL, NULL); - if (qos_rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) { - user_rmt_reduced = NUM_MAP_ENTRIES - qos_rmt_count; + /* + * The RMT entries are currently allocated as shown below: + * 1. QOS (0 to 128 entries); + * 2. FECN for PSM (num_user_contexts + num_vnic_contexts); + * 3. VNIC (num_vnic_contexts). + * It should be noted that PSM FECN oversubscribe num_vnic_contexts + * entries of RMT because both VNIC and PSM could allocate any receive + * context between dd->first_dyn_alloc_text and dd->num_rcv_contexts, + * and PSM FECN must reserve an RMT entry for each possible PSM receive + * context. + */ + rmt_count = qos_rmt_entries(dd, NULL, NULL) + (num_vnic_contexts * 2); + if (rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) { + user_rmt_reduced = NUM_MAP_ENTRIES - rmt_count; dd_dev_err(dd, "RMT size is reducing the number of user receive contexts from %u to %d\n", n_usr_ctxts, @@ -14285,9 +14295,11 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd, u64 reg; int i, idx, regoff, regidx; u8 offset; + u32 total_cnt; /* there needs to be enough room in the map table */ - if (rmt->used + dd->num_user_contexts >= NUM_MAP_ENTRIES) { + total_cnt = dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt; + if (rmt->used + total_cnt >= NUM_MAP_ENTRIES) { dd_dev_err(dd, "User FECN handling disabled - too many user contexts allocated\n"); return; } @@ -14341,7 +14353,7 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd, /* add rule 1 */ add_rsm_rule(dd, RSM_INS_FECN, &rrd); - rmt->used += dd->num_user_contexts; + rmt->used += total_cnt; } /* Initialize RSM for VNIC */ -- cgit v1.2.3 From 1abe186ed8a6593069bc122da55fc684383fdc1c Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 19 Mar 2019 11:24:36 +0200 Subject: IB/mlx5: Reset access mask when looping inside page fault handler If page-fault handler spans multiple MRs then the access mask needs to be reset before each MR handling or otherwise write access will be granted to mapped pages instead of read-only. Cc: # 3.19 Fixes: 7bdf65d411c1 ("IB/mlx5: Handle page faults") Reported-by: Jerome Glisse Signed-off-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/odp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index c20bfc41ecf1..0aa10ebda5d9 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -585,7 +585,7 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem); bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE; bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH; - u64 access_mask = ODP_READ_ALLOWED_BIT; + u64 access_mask; u64 start_idx, page_mask; struct ib_umem_odp *odp; size_t size; @@ -607,6 +607,7 @@ next_mr: page_shift = mr->umem->page_shift; page_mask = ~(BIT(page_shift) - 1); start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift; + access_mask = ODP_READ_ALLOWED_BIT; if (prefetch && !downgrade && !mr->umem->writable) { /* prefetch with write-access must -- cgit v1.2.3 From ad51c46eec739c18be24178a30b47801b10e0357 Mon Sep 17 00:00:00 2001 From: Chengming Gui Date: Thu, 21 Mar 2019 13:26:28 +0800 Subject: drm/amd/amdgpu: fix PCIe dpm feature issue (v3) use pcie_bandwidth_available to get real link state to update pcie table. v2: fix incorrect initialized return value v3: expand the fetching method about the link width to all asics. Signed-off-by: Chengming Gui Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 4f8fb4ecde34..ac0d646a7b74 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3625,6 +3625,7 @@ static void amdgpu_device_get_min_pci_speed_width(struct amdgpu_device *adev, struct pci_dev *pdev = adev->pdev; enum pci_bus_speed cur_speed; enum pcie_link_width cur_width; + u32 ret = 1; *speed = PCI_SPEED_UNKNOWN; *width = PCIE_LNK_WIDTH_UNKNOWN; @@ -3632,6 +3633,10 @@ static void amdgpu_device_get_min_pci_speed_width(struct amdgpu_device *adev, while (pdev) { cur_speed = pcie_get_speed_cap(pdev); cur_width = pcie_get_width_cap(pdev); + ret = pcie_bandwidth_available(adev->pdev, NULL, + NULL, &cur_width); + if (!ret) + cur_width = PCIE_LNK_WIDTH_RESRV; if (cur_speed != PCI_SPEED_UNKNOWN) { if (*speed == PCI_SPEED_UNKNOWN) -- cgit v1.2.3 From 6f5d29ff1a643d52128efb4b6c4f4d4074e32e10 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Sat, 23 Mar 2019 01:02:44 +0800 Subject: drm/amd/powerplay: add ECC feature bit It's OK to have this feature bit with old SMU firmwares. But the feature should be disabled on them. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 10 +++++++++- drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.h | 1 + drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h | 3 ++- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index 9aa7bec1b5fe..5e3ffcc5b14a 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -91,6 +91,12 @@ static void vega20_set_default_registry_data(struct pp_hwmgr *hwmgr) * MP0CLK DS */ data->registry_data.disallowed_features = 0xE0041C00; + /* ECC feature should be disabled on old SMUs */ + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetSmuVersion); + hwmgr->smu_version = smum_get_argument(hwmgr); + if (hwmgr->smu_version < 0x282100) + data->registry_data.disallowed_features |= FEATURE_ECC_MASK; + data->registry_data.od_state_in_dc_support = 0; data->registry_data.thermal_support = 1; data->registry_data.skip_baco_hardware = 0; @@ -357,6 +363,7 @@ static void vega20_init_dpm_defaults(struct pp_hwmgr *hwmgr) data->smu_features[GNLD_DS_MP1CLK].smu_feature_id = FEATURE_DS_MP1CLK_BIT; data->smu_features[GNLD_DS_MP0CLK].smu_feature_id = FEATURE_DS_MP0CLK_BIT; data->smu_features[GNLD_XGMI].smu_feature_id = FEATURE_XGMI_BIT; + data->smu_features[GNLD_ECC].smu_feature_id = FEATURE_ECC_BIT; for (i = 0; i < GNLD_FEATURES_MAX; i++) { data->smu_features[i].smu_feature_bitmap = @@ -3020,7 +3027,8 @@ static int vega20_get_ppfeature_status(struct pp_hwmgr *hwmgr, char *buf) "FCLK_DS", "MP1CLK_DS", "MP0CLK_DS", - "XGMI"}; + "XGMI", + "ECC"}; static const char *output_title[] = { "FEATURES", "BITMASK", diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.h index a5bc758ae097..ac2a3118a0ae 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.h @@ -80,6 +80,7 @@ enum { GNLD_DS_MP1CLK, GNLD_DS_MP0CLK, GNLD_XGMI, + GNLD_ECC, GNLD_FEATURES_MAX }; diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h index 63d5cf691549..b90089a4fb6a 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h @@ -99,7 +99,7 @@ #define FEATURE_DS_MP1CLK_BIT 30 #define FEATURE_DS_MP0CLK_BIT 31 #define FEATURE_XGMI_BIT 32 -#define FEATURE_SPARE_33_BIT 33 +#define FEATURE_ECC_BIT 33 #define FEATURE_SPARE_34_BIT 34 #define FEATURE_SPARE_35_BIT 35 #define FEATURE_SPARE_36_BIT 36 @@ -166,6 +166,7 @@ #define FEATURE_DS_MP1CLK_MASK (1 << FEATURE_DS_MP1CLK_BIT ) #define FEATURE_DS_MP0CLK_MASK (1 << FEATURE_DS_MP0CLK_BIT ) #define FEATURE_XGMI_MASK (1 << FEATURE_XGMI_BIT ) +#define FEATURE_ECC_MASK (1ULL << FEATURE_ECC_BIT ) #define DPM_OVERRIDE_DISABLE_SOCCLK_PID 0x00000001 #define DPM_OVERRIDE_DISABLE_UCLK_PID 0x00000002 -- cgit v1.2.3 From aaaba51bf1618958c91728607c63264655c545ef Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Sat, 23 Mar 2019 02:02:24 +0800 Subject: drm/amd/powerplay: correct data type to avoid overflow Avoid left shift overflow. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h index b90089a4fb6a..195c4ae67058 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h @@ -165,7 +165,7 @@ #define FEATURE_DS_FCLK_MASK (1 << FEATURE_DS_FCLK_BIT ) #define FEATURE_DS_MP1CLK_MASK (1 << FEATURE_DS_MP1CLK_BIT ) #define FEATURE_DS_MP0CLK_MASK (1 << FEATURE_DS_MP0CLK_BIT ) -#define FEATURE_XGMI_MASK (1 << FEATURE_XGMI_BIT ) +#define FEATURE_XGMI_MASK (1ULL << FEATURE_XGMI_BIT ) #define FEATURE_ECC_MASK (1ULL << FEATURE_ECC_BIT ) #define DPM_OVERRIDE_DISABLE_SOCCLK_PID 0x00000001 -- cgit v1.2.3 From db64a2f43c1bc22c5ff2d22606000b8c3587d0ec Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 26 Mar 2019 17:57:53 +0800 Subject: drm/amd/powerplay: fix possible hang with 3+ 4K monitors If DAL requires to force MCLK high, the FCLK will be forced to high also. Signed-off-by: Evan Quan Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index 5e3ffcc5b14a..23b5b94a4939 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -3470,6 +3470,7 @@ static int vega20_apply_clocks_adjust_rules(struct pp_hwmgr *hwmgr) struct vega20_single_dpm_table *dpm_table; bool vblank_too_short = false; bool disable_mclk_switching; + bool disable_fclk_switching; uint32_t i, latency; disable_mclk_switching = ((1 < hwmgr->display_config->num_display) && @@ -3545,13 +3546,20 @@ static int vega20_apply_clocks_adjust_rules(struct pp_hwmgr *hwmgr) if (hwmgr->display_config->nb_pstate_switch_disable) dpm_table->dpm_state.hard_min_level = dpm_table->dpm_levels[dpm_table->count - 1].value; + if ((disable_mclk_switching && + (dpm_table->dpm_state.hard_min_level == dpm_table->dpm_levels[dpm_table->count - 1].value)) || + hwmgr->display_config->min_mem_set_clock / 100 >= dpm_table->dpm_levels[dpm_table->count - 1].value) + disable_fclk_switching = true; + else + disable_fclk_switching = false; + /* fclk */ dpm_table = &(data->dpm_table.fclk_table); dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[0].value; dpm_table->dpm_state.soft_max_level = VG20_CLOCK_MAX_DEFAULT; dpm_table->dpm_state.hard_min_level = dpm_table->dpm_levels[0].value; dpm_table->dpm_state.hard_max_level = VG20_CLOCK_MAX_DEFAULT; - if (hwmgr->display_config->nb_pstate_switch_disable) + if (hwmgr->display_config->nb_pstate_switch_disable || disable_fclk_switching) dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[dpm_table->count - 1].value; /* vclk */ -- cgit v1.2.3 From ab0cb022c8fd6f465f40f1cbee7d71c6280b6c74 Mon Sep 17 00:00:00 2001 From: Paul Hsieh Date: Mon, 18 Mar 2019 18:04:05 +0800 Subject: drm/amd/display: VBIOS can't be light up HDMI when restart system [Why] VBIOS will not post pixel rate > 340MHz. If driver set pixel rate > 340MHz and do restart bottom, VBIOS can't post HDMI monitor due to monitor is stay in HDMI2.0 state. [How] Program Scrambling_Enable and TMDS_Bit_Clock_Ratio when disable stream. Signed-off-by: Paul Hsieh Reviewed-by: Charlene Liu Acked-by: Bhawanpreet Lakha Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 4eba3c4800b6..ea18e9c2d8ce 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -2660,12 +2660,18 @@ void core_link_enable_stream( void core_link_disable_stream(struct pipe_ctx *pipe_ctx, int option) { struct dc *core_dc = pipe_ctx->stream->ctx->dc; + struct dc_stream_state *stream = pipe_ctx->stream; core_dc->hwss.blank_stream(pipe_ctx); if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) deallocate_mst_payload(pipe_ctx); + if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) + dal_ddc_service_write_scdc_data( + stream->link->ddc, 0, + stream->timing.flags.LTE_340MCSC_SCRAMBLE); + core_dc->hwss.disable_stream(pipe_ctx, option); disable_link(pipe_ctx->stream->link, pipe_ctx->stream->signal); -- cgit v1.2.3 From 5ceaeb99ffb4dc002d20f6ac243c19a85e2c7a76 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 23 Mar 2019 19:41:32 +0100 Subject: net: dsa: mv88e6xxx: fix few issues in mv88e6390x_port_set_cmode This patches fixes few issues in mv88e6390x_port_set_cmode(). 1. When entering the function the old cmode may be 0, in this case mv88e6390x_serdes_get_lane() returns -ENODEV. As result we bail out and have no chance to set a new mode. Therefore deal properly with -ENODEV. 2. Once we have disabled power and irq, let's set the cached cmode to 0. This reflects the actual status and is cleaner if we bail out with an error in the following function calls. 3. The cached cmode is used by mv88e6390x_serdes_get_lane(), mv88e6390_serdes_power_lane() and mv88e6390_serdes_irq_enable(). Currently we set the cached mode to the new one at the very end of the function only, means until then we use the old one what may be wrong. 4. When calling mv88e6390_serdes_irq_enable() we use the lane value belonging to the old cmode. Get the lane belonging to the new cmode before calling this function. It's hard to provide a good "Fixes" tag because quite a few smaller changes have been done to the code in question recently. Fixes: d235c48b40d3 ("net: dsa: mv88e6xxx: power serdes on/off for 10G interfaces on 6390X") Signed-off-by: Heiner Kallweit Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/port.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c index dce84a2a65c7..c44b2822e4dd 100644 --- a/drivers/net/dsa/mv88e6xxx/port.c +++ b/drivers/net/dsa/mv88e6xxx/port.c @@ -427,18 +427,22 @@ int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port, return 0; lane = mv88e6390x_serdes_get_lane(chip, port); - if (lane < 0) + if (lane < 0 && lane != -ENODEV) return lane; - if (chip->ports[port].serdes_irq) { - err = mv88e6390_serdes_irq_disable(chip, port, lane); + if (lane >= 0) { + if (chip->ports[port].serdes_irq) { + err = mv88e6390_serdes_irq_disable(chip, port, lane); + if (err) + return err; + } + + err = mv88e6390x_serdes_power(chip, port, false); if (err) return err; } - err = mv88e6390x_serdes_power(chip, port, false); - if (err) - return err; + chip->ports[port].cmode = 0; if (cmode) { err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_STS, ®); @@ -452,6 +456,12 @@ int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port, if (err) return err; + chip->ports[port].cmode = cmode; + + lane = mv88e6390x_serdes_get_lane(chip, port); + if (lane < 0) + return lane; + err = mv88e6390x_serdes_power(chip, port, true); if (err) return err; @@ -463,8 +473,6 @@ int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port, } } - chip->ports[port].cmode = cmode; - return 0; } -- cgit v1.2.3 From 0b91bce1ebfc797ff3de60c8f4a1e6219a8a3187 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 25 Mar 2019 14:18:06 +0100 Subject: net: datagram: fix unbounded loop in __skb_try_recv_datagram() Christoph reported a stall while peeking datagram with an offset when busy polling is enabled. __skb_try_recv_datagram() uses as the loop termination condition 'queue empty'. When peeking, the socket queue can be not empty, even when no additional packets are received. Address the issue explicitly checking for receive queue changes, as currently done by __skb_wait_for_more_packets(). Fixes: 2b5cd0dfa384 ("net: Change return type of sk_busy_loop from bool to void") Reported-and-tested-by: Christoph Paasch Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/core/datagram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/datagram.c b/net/core/datagram.c index b2651bb6d2a3..e657289db4ac 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -279,7 +279,7 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, break; sk_busy_loop(sk, flags & MSG_DONTWAIT); - } while (!skb_queue_empty(&sk->sk_receive_queue)); + } while (sk->sk_receive_queue.prev != *last); error = -EAGAIN; -- cgit v1.2.3 From 79706ced7a982ebc60c2663a07ff4003847b8be6 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 25 Mar 2019 14:35:30 -0700 Subject: MAINTAINERS: Fix documentation file name for PHY Library MAINTAINERS still pointed to phy.txt after moving this file into the rst format, fix this. Reported-by: Joe Perches Fixes: 25fe02d00a1e ("Documentation: net: phy: switch documentation to rst format") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 3e5a5d263f29..1fdc8970e816 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5833,7 +5833,7 @@ L: netdev@vger.kernel.org S: Maintained F: Documentation/ABI/testing/sysfs-bus-mdio F: Documentation/devicetree/bindings/net/mdio* -F: Documentation/networking/phy.txt +F: Documentation/networking/phy.rst F: drivers/net/phy/ F: drivers/of/of_mdio.c F: drivers/of/of_net.c -- cgit v1.2.3 From b5f9bd15b88563b55a99ed588416881367a0ce5f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 26 Mar 2019 13:50:14 +0800 Subject: ila: Fix rhashtable walker list corruption ila_xlat_nl_cmd_flush uses rhashtable walkers allocated from the stack but it never frees them. This corrupts the walker list of the hash table. This patch fixes it. Reported-by: syzbot+dae72a112334aa65a159@syzkaller.appspotmail.com Fixes: b6e71bdebb12 ("ila: Flush netlink command to clear xlat...") Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/ila/ila_xlat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 79d2e43c05c5..5fc1f4e0c0cf 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -417,6 +417,7 @@ int ila_xlat_nl_cmd_flush(struct sk_buff *skb, struct genl_info *info) done: rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); return ret; } -- cgit v1.2.3 From 669efc76b317b3aa550ffbf0b79d064cb00a5f96 Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Tue, 26 Mar 2019 14:53:49 +0800 Subject: net: hns3: fix compile error Currently, the rules for configuring search paths in Kbuild have changed, this will lead some erros when compiling hns3 with the following command: make O=DIR M=drivers/net/ethernet/hisilicon/hns3 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c:11:10: fatal error: hnae3.h: No such file or directory This patch fix it by adding $(srctree)/ prefix to the serach paths. Signed-off-by: Xi Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile | 2 +- drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile index fffe8c1c45d3..0fb61d440d3b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile @@ -3,7 +3,7 @@ # Makefile for the HISILICON network device drivers. # -ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3 +ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3 obj-$(CONFIG_HNS3_HCLGE) += hclge.o hclge-objs = hclge_main.o hclge_cmd.o hclge_mdio.o hclge_tm.o hclge_mbx.o hclge_err.o hclge_debugfs.o diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile index fb93bbd35845..6193f8fa7cf3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile @@ -3,7 +3,7 @@ # Makefile for the HISILICON network device drivers. # -ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3 +ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3 obj-$(CONFIG_HNS3_HCLGEVF) += hclgevf.o hclgevf-objs = hclgevf_main.o hclgevf_cmd.o hclgevf_mbx.o \ No newline at end of file -- cgit v1.2.3 From 7f07e5f1f778605e98cf2156d4db1ff3a3a1a74a Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Tue, 26 Mar 2019 11:48:57 +0200 Subject: net: mii: Fix PAUSE cap advertisement from linkmode_adv_to_lcl_adv_t() helper With a recent link mode advertisement code update this helper providing local pause capability translation used for flow control link mode negotiation got broken. For eth drivers using this helper, the issue is apparent only if either PAUSE or ASYM_PAUSE is being advertised. Fixes: 3c1bcc8614db ("net: ethernet: Convert phydev advertize and supported from u32 to link mode") Signed-off-by: Claudiu Manoil Signed-off-by: David S. Miller --- include/linux/mii.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mii.h b/include/linux/mii.h index 6fee8b1a4400..5cd824c1c0ca 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -469,7 +469,7 @@ static inline u32 linkmode_adv_to_lcl_adv_t(unsigned long *advertising) if (linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, advertising)) lcl_adv |= ADVERTISE_PAUSE_CAP; - if (linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, + if (linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, advertising)) lcl_adv |= ADVERTISE_PAUSE_ASYM; -- cgit v1.2.3 From b3e208069477588c06f4d5d986164b435bb06e6d Mon Sep 17 00:00:00 2001 From: Dean Nelson Date: Tue, 26 Mar 2019 11:53:19 -0400 Subject: thunderx: enable page recycling for non-XDP case Commit 773225388dae15e72790 ("net: thunderx: Optimize page recycling for XDP") added code to nicvf_alloc_page() that inadvertently disables receive buffer page recycling for the non-XDP case by always NULL'ng the page pointer. This patch corrects two if-conditionals to allow for the recycling of non-XDP mode pages by only setting the page pointer to NULL when the page is not ready for recycling. Fixes: 773225388dae ("net: thunderx: Optimize page recycling for XDP") Signed-off-by: Dean Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 23 +++++++++++----------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index 5b4d3badcb73..55dbf02c42af 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -105,20 +105,19 @@ static inline struct pgcache *nicvf_alloc_page(struct nicvf *nic, /* Check if page can be recycled */ if (page) { ref_count = page_ref_count(page); - /* Check if this page has been used once i.e 'put_page' - * called after packet transmission i.e internal ref_count - * and page's ref_count are equal i.e page can be recycled. + /* This page can be recycled if internal ref_count and page's + * ref_count are equal, indicating that the page has been used + * once for packet transmission. For non-XDP mode, internal + * ref_count is always '1'. */ - if (rbdr->is_xdp && (ref_count == pgcache->ref_count)) - pgcache->ref_count--; - else - page = NULL; - - /* In non-XDP mode, page's ref_count needs to be '1' for it - * to be recycled. - */ - if (!rbdr->is_xdp && (ref_count != 1)) + if (rbdr->is_xdp) { + if (ref_count == pgcache->ref_count) + pgcache->ref_count--; + else + page = NULL; + } else if (ref_count != 1) { page = NULL; + } } if (!page) { -- cgit v1.2.3 From cd35ef91490ad8049dd180bb060aff7ee192eda9 Mon Sep 17 00:00:00 2001 From: Dean Nelson Date: Tue, 26 Mar 2019 11:53:26 -0400 Subject: thunderx: eliminate extra calls to put_page() for pages held for recycling For the non-XDP case, commit 773225388dae15e72790 ("net: thunderx: Optimize page recycling for XDP") added code to nicvf_free_rbdr() that, when releasing the additional receive buffer page reference held for recycling, repeatedly calls put_page() until the page's _refcount goes to zero. Which results in the page being freed. This is not okay if the page's _refcount was greater than 1 (in the non-XDP case), because nicvf_free_rbdr() should not be subtracting more than what nicvf_alloc_page() had previously added to the page's _refcount, which was only 1 (in the non-XDP case). This can arise if a received packet is still being processed and the receive buffer (i.e., skb->head) has not yet been freed via skb_free_head() when nicvf_free_rbdr() is spinning through the aforementioned put_page() loop. If this should occur, when the received packet finishes processing and skb_free_head() is called, various problems can ensue. Exactly what, depends on whether the page has already been reallocated or not, anything from "BUG: Bad page state ... ", to "Unable to handle kernel NULL pointer dereference ..." or "Unable to handle kernel paging request...". So this patch changes nicvf_free_rbdr() to only call put_page() once for pages held for recycling (in the non-XDP case). Fixes: 773225388dae ("net: thunderx: Optimize page recycling for XDP") Signed-off-by: Dean Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index 55dbf02c42af..e246f9733bb8 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -364,11 +364,10 @@ static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr) while (head < rbdr->pgcnt) { pgcache = &rbdr->pgcache[head]; if (pgcache->page && page_ref_count(pgcache->page) != 0) { - if (!rbdr->is_xdp) { - put_page(pgcache->page); - continue; + if (rbdr->is_xdp) { + page_ref_sub(pgcache->page, + pgcache->ref_count - 1); } - page_ref_sub(pgcache->page, pgcache->ref_count - 1); put_page(pgcache->page); } head++; -- cgit v1.2.3 From b4e9e931e9bb2f5b302ce66640832f5a3e57e8c4 Mon Sep 17 00:00:00 2001 From: Iuliana Prodan Date: Fri, 22 Mar 2019 14:12:30 +0200 Subject: crypto: caam - fix copy of next buffer for xcbc and cmac MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a side effect of adding xcbc support, when the next_buffer is not copied. The issue occurs, when there is stored from previous state a blocksize buffer and received, a less than blocksize, from user. In this case, the nents for req->src is 0, and the next_buffer is not copied. An example is: { .tap = { 17, 15, 8 }, .psize = 40, .np = 3, .ksize = 16, } Fixes: 12b8567f6fa4 ("crypto: caam - add support for xcbc(aes)") Signed-off-by: Iuliana Prodan Reviewed-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamhash.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index b1eadc6652b5..7205d9f4029e 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -865,19 +865,18 @@ static int ahash_update_ctx(struct ahash_request *req) if (ret) goto unmap_ctx; - if (mapped_nents) { + if (mapped_nents) sg_to_sec4_sg_last(req->src, mapped_nents, edesc->sec4_sg + sec4_sg_src_index, 0); - if (*next_buflen) - scatterwalk_map_and_copy(next_buf, req->src, - to_hash - *buflen, - *next_buflen, 0); - } else { + else sg_to_sec4_set_last(edesc->sec4_sg + sec4_sg_src_index - 1); - } + if (*next_buflen) + scatterwalk_map_and_copy(next_buf, req->src, + to_hash - *buflen, + *next_buflen, 0); desc = edesc->hw_desc; edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, -- cgit v1.2.3 From 1017e0987117c32783ba7c10fe2e7ff1456ba1dc Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 26 Mar 2019 18:22:16 +0100 Subject: vrf: prevent adding upper devices VRF devices don't work with upper devices. Currently, it's possible to add a VRF device to a bridge or team, and to create macvlan, macsec, or ipvlan devices on top of a VRF (bond and vlan are prevented respectively by the lack of an ndo_set_mac_address op and the NETIF_F_VLAN_CHALLENGED feature flag). Fix this by setting the IFF_NO_RX_HANDLER flag (introduced in commit f5426250a6ec ("net: introduce IFF_NO_RX_HANDLER")). Cc: David Ahern Fixes: 193125dbd8eb ("net: Introduce VRF device driver") Signed-off-by: Sabrina Dubroca Acked-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 7c1430ed0244..6d1a1abbed27 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1273,6 +1273,7 @@ static void vrf_setup(struct net_device *dev) /* default to no qdisc; user can add if desired */ dev->priv_flags |= IFF_NO_QUEUE; + dev->priv_flags |= IFF_NO_RX_HANDLER; dev->min_mtu = 0; dev->max_mtu = 0; -- cgit v1.2.3 From c8b1917c8987a6fa3695d479b4d60fbbbc3e537b Mon Sep 17 00:00:00 2001 From: Furquan Shaikh Date: Wed, 20 Mar 2019 15:28:44 -0700 Subject: ACPICA: Clear status of GPEs before enabling them Commit 18996f2db918 ("ACPICA: Events: Stop unconditionally clearing ACPI IRQs during suspend/resume") was added to stop clearing event status bits unconditionally in the system-wide suspend and resume paths. This was done because of an issue with a laptop lid appaering to be closed even when it was used to wake up the system from suspend (see https://bugzilla.kernel.org/show_bug.cgi?id=196249), which happened because event status bits were cleared unconditionally on system resume. Though this change fixed the issue in the resume path, it introduced regressions in a few suspend paths. First regression was reported and fixed in the S5 entry path by commit fa85015c0d95 ("ACPICA: Clear status of all events when entering S5"). Next regression was reported and fixed for all legacy sleep paths by commit f317c7dc12b7 ("ACPICA: Clear status of all events when entering sleep states"). However, there still is a suspend-to-idle regression, since suspend-to-idle does not follow the legacy sleep paths. In the suspend-to-idle case, wakeup is enabled as part of device suspend. If the status bits of wakeup GPEs are set when they are enabled, it causes a premature system wakeup to occur. To address that problem, partially revert commit 18996f2db918 to restore GPE status bits clearing before the GPE is enabled in acpi_ev_enable_gpe(). Fixes: 18996f2db918 ("ACPICA: Events: Stop unconditionally clearing ACPI IRQs during suspend/resume") Signed-off-by: Furquan Shaikh Cc: 4.17+ # 4.17+ [ rjw: Subject & changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/evgpe.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/acpica/evgpe.c b/drivers/acpi/acpica/evgpe.c index 62d3aa74277b..5e9d7348c16f 100644 --- a/drivers/acpi/acpica/evgpe.c +++ b/drivers/acpi/acpica/evgpe.c @@ -81,8 +81,12 @@ acpi_status acpi_ev_enable_gpe(struct acpi_gpe_event_info *gpe_event_info) ACPI_FUNCTION_TRACE(ev_enable_gpe); - /* Enable the requested GPE */ + /* Clear the GPE status */ + status = acpi_hw_clear_gpe(gpe_event_info); + if (ACPI_FAILURE(status)) + return_ACPI_STATUS(status); + /* Enable the requested GPE */ status = acpi_hw_low_set_gpe(gpe_event_info, ACPI_GPE_ENABLE); return_ACPI_STATUS(status); } -- cgit v1.2.3 From e7dfb6d04e4715be1f3eb2c60d97b753fd2e4516 Mon Sep 17 00:00:00 2001 From: David Engraf Date: Mon, 11 Mar 2019 08:57:42 +0100 Subject: ARM: dts: at91: Fix typo in ISC_D0 on PC9 The function argument for the ISC_D0 on PC9 was incorrect. According to the documentation it should be 'C' aka 3. Signed-off-by: David Engraf Reviewed-by: Nicolas Ferre Signed-off-by: Ludovic Desroches Fixes: 7f16cb676c00 ("ARM: at91/dt: add sama5d2 pinmux") Cc: # v4.4+ --- arch/arm/boot/dts/sama5d2-pinfunc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sama5d2-pinfunc.h b/arch/arm/boot/dts/sama5d2-pinfunc.h index 1c01a6f843d8..28a2e45752fe 100644 --- a/arch/arm/boot/dts/sama5d2-pinfunc.h +++ b/arch/arm/boot/dts/sama5d2-pinfunc.h @@ -518,7 +518,7 @@ #define PIN_PC9__GPIO PINMUX_PIN(PIN_PC9, 0, 0) #define PIN_PC9__FIQ PINMUX_PIN(PIN_PC9, 1, 3) #define PIN_PC9__GTSUCOMP PINMUX_PIN(PIN_PC9, 2, 1) -#define PIN_PC9__ISC_D0 PINMUX_PIN(PIN_PC9, 2, 1) +#define PIN_PC9__ISC_D0 PINMUX_PIN(PIN_PC9, 3, 1) #define PIN_PC9__TIOA4 PINMUX_PIN(PIN_PC9, 4, 2) #define PIN_PC10 74 #define PIN_PC10__GPIO PINMUX_PIN(PIN_PC10, 0, 0) -- cgit v1.2.3 From 157c99c5a2956a9ab1ae12de0136a2d8a1b1a307 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Tue, 26 Mar 2019 15:04:14 +0800 Subject: mmc: alcor: don't write data before command has completed The alcor driver is setting up data transfer and submitting the associated MMC command at the same time. While this works most of the time, it occasionally causes problems upon write. In the working case, after setting up the data transfer and submitting the MMC command, an interrupt comes in a moment later with CMD_END and WRITE_BUF_RDY bits set. The data transfer then happens without problem. However, on occasion, the interrupt that arrives at that point only has WRITE_BUF_RDY set. The hardware notifies that it's ready to write data, but the associated MMC command is still running. Regardless, the driver was proceeding to write data immediately, and that would then cause another interrupt indicating data CRC error, and the write would fail. Additionally, the transfer setup function alcor_trigger_data_transfer() was being called 3 times for each write operation, which was confusing and may be contributing to this issue. Solve this by tweaking the driver behaviour to follow the sequence observed in the original ampe_stor vendor driver: 1. When starting request handling, write 0 to DATA_XFER_CTRL 2. Submit the command 3. Wait for CMD_END interrupt and then trigger data transfer 4. For the PIO case, trigger the next step of the data transfer only upon the following DATA_END interrupt, which occurs after the block has been written. I confirmed that the read path still works (DMA & PIO) and also now presents more consistency with the operations performed by ampe_stor. Signed-off-by: Daniel Drake Fixes: c5413ad815a6 ("mmc: add new Alcor Micro Cardreader SD/MMC driver") Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/alcor.c | 34 +++++++++++++--------------------- 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/drivers/mmc/host/alcor.c b/drivers/mmc/host/alcor.c index 82a97866e0cf..7c8f203f9a24 100644 --- a/drivers/mmc/host/alcor.c +++ b/drivers/mmc/host/alcor.c @@ -48,7 +48,6 @@ struct alcor_sdmmc_host { struct mmc_command *cmd; struct mmc_data *data; unsigned int dma_on:1; - unsigned int early_data:1; struct mutex cmd_mutex; @@ -144,8 +143,7 @@ static void alcor_data_set_dma(struct alcor_sdmmc_host *host) host->sg_count--; } -static void alcor_trigger_data_transfer(struct alcor_sdmmc_host *host, - bool early) +static void alcor_trigger_data_transfer(struct alcor_sdmmc_host *host) { struct alcor_pci_priv *priv = host->alcor_pci; struct mmc_data *data = host->data; @@ -155,13 +153,6 @@ static void alcor_trigger_data_transfer(struct alcor_sdmmc_host *host, ctrl |= AU6601_DATA_WRITE; if (data->host_cookie == COOKIE_MAPPED) { - if (host->early_data) { - host->early_data = false; - return; - } - - host->early_data = early; - alcor_data_set_dma(host); ctrl |= AU6601_DATA_DMA_MODE; host->dma_on = 1; @@ -231,6 +222,7 @@ static void alcor_prepare_sg_miter(struct alcor_sdmmc_host *host) static void alcor_prepare_data(struct alcor_sdmmc_host *host, struct mmc_command *cmd) { + struct alcor_pci_priv *priv = host->alcor_pci; struct mmc_data *data = cmd->data; if (!data) @@ -248,7 +240,7 @@ static void alcor_prepare_data(struct alcor_sdmmc_host *host, if (data->host_cookie != COOKIE_MAPPED) alcor_prepare_sg_miter(host); - alcor_trigger_data_transfer(host, true); + alcor_write8(priv, 0, AU6601_DATA_XFER_CTRL); } static void alcor_send_cmd(struct alcor_sdmmc_host *host, @@ -435,7 +427,7 @@ static int alcor_cmd_irq_done(struct alcor_sdmmc_host *host, u32 intmask) if (!host->data) return false; - alcor_trigger_data_transfer(host, false); + alcor_trigger_data_transfer(host); host->cmd = NULL; return true; } @@ -456,7 +448,7 @@ static void alcor_cmd_irq_thread(struct alcor_sdmmc_host *host, u32 intmask) if (!host->data) alcor_request_complete(host, 1); else - alcor_trigger_data_transfer(host, false); + alcor_trigger_data_transfer(host); host->cmd = NULL; } @@ -487,15 +479,9 @@ static int alcor_data_irq_done(struct alcor_sdmmc_host *host, u32 intmask) break; case AU6601_INT_READ_BUF_RDY: alcor_trf_block_pio(host, true); - if (!host->blocks) - break; - alcor_trigger_data_transfer(host, false); return 1; case AU6601_INT_WRITE_BUF_RDY: alcor_trf_block_pio(host, false); - if (!host->blocks) - break; - alcor_trigger_data_transfer(host, false); return 1; case AU6601_INT_DMA_END: if (!host->sg_count) @@ -508,8 +494,14 @@ static int alcor_data_irq_done(struct alcor_sdmmc_host *host, u32 intmask) break; } - if (intmask & AU6601_INT_DATA_END) - return 0; + if (intmask & AU6601_INT_DATA_END) { + if (!host->dma_on && host->blocks) { + alcor_trigger_data_transfer(host); + return 1; + } else { + return 0; + } + } return 1; } -- cgit v1.2.3 From 379e2014c95b7a454713da822b8ef4ec51ab8a75 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Wed, 27 Mar 2019 14:51:13 +0100 Subject: libbpf: add xsk.h to install_headers target MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The xsk.h header file was missing from the install_headers target in the Makefile. This patch simply adds xsk.h to the set of installed headers. Fixes: 1cad07884239 ("libbpf: add support for using AF_XDP sockets") Reported-by: Bruce Richardson Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann --- tools/lib/bpf/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 5bf8e52c41fc..279589c29983 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -222,6 +222,7 @@ install_headers: $(call do_install,bpf.h,$(prefix)/include/bpf,644); \ $(call do_install,libbpf.h,$(prefix)/include/bpf,644); $(call do_install,btf.h,$(prefix)/include/bpf,644); + $(call do_install,xsk.h,$(prefix)/include/bpf,644); install: install_lib -- cgit v1.2.3 From 89dedaef49d36adc2bb5e7e4c38b52fa3013c7c8 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Wed, 27 Mar 2019 14:51:14 +0100 Subject: libbpf: add libelf dependency to shared library build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DPDK project is moving forward with its AF_XDP PMD, and during that process some libbpf issues surfaced [1]: When libbpf was built as a shared library, libelf was not included in the linking phase. Since libelf is an internal depedency to libbpf, libelf should be included. This patch adds '-lelf' to resolve that. [1] https://patches.dpdk.org/patch/50704/#93571 Fixes: 1b76c13e4b36 ("bpf tools: Introduce 'bpf' library and add bpf feature check") Suggested-by: Luca Boccassi Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann --- tools/lib/bpf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 279589c29983..7beec4d5b522 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -177,7 +177,7 @@ $(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION) $(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN) $(QUIET_LINK)$(CC) --shared -Wl,-soname,libbpf.so.$(VERSION) \ - -Wl,--version-script=$(VERSION_SCRIPT) $^ -o $@ + -Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -o $@ @ln -sf $(@F) $(OUTPUT)libbpf.so @ln -sf $(@F) $(OUTPUT)libbpf.so.$(VERSION) -- cgit v1.2.3 From 8543e437807970166c2b66b79935c9f4b0e6d1f9 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 28 Mar 2019 16:44:28 +0100 Subject: bpf, libbpf: fix quiet install_headers Both btf.h and xsk.h headers are not installed quietly due to missing '\' for the call to QUIET_INSTALL. Lets fix it. Before: # make install_headers INSTALL headers if [ ! -d '''/usr/local/include/bpf' ]; then install -d -m 755 '''/usr/local/include/bpf'; fi; install btf.h -m 644 '''/usr/local/include/bpf'; if [ ! -d '''/usr/local/include/bpf' ]; then install -d -m 755 '''/usr/local/include/bpf'; fi; install xsk.h -m 644 '''/usr/local/include/bpf'; # ls /usr/local/include/bpf/ bpf.h btf.h libbpf.h xsk.h After: # make install_headers INSTALL headers # ls /usr/local/include/bpf/ bpf.h btf.h libbpf.h xsk.h Fixes: a493f5f9d8c2 ("libbpf: Install btf.h with libbpf") Fixes: 379e2014c95b ("libbpf: add xsk.h to install_headers target") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: Andrii Nakryiko --- tools/lib/bpf/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 7beec4d5b522..8e7c56e9590f 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -220,8 +220,8 @@ install_lib: all_cmd install_headers: $(call QUIET_INSTALL, headers) \ $(call do_install,bpf.h,$(prefix)/include/bpf,644); \ - $(call do_install,libbpf.h,$(prefix)/include/bpf,644); - $(call do_install,btf.h,$(prefix)/include/bpf,644); + $(call do_install,libbpf.h,$(prefix)/include/bpf,644); \ + $(call do_install,btf.h,$(prefix)/include/bpf,644); \ $(call do_install,xsk.h,$(prefix)/include/bpf,644); install: install_lib -- cgit v1.2.3 From f35f06c35560a86e841631f0243b83a984dc11a9 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 26 Mar 2019 10:49:56 +0000 Subject: Btrfs: do not allow trimming when a fs is mounted with the nologreplay option Whan a filesystem is mounted with the nologreplay mount option, which requires it to be mounted in RO mode as well, we can not allow discard on free space inside block groups, because log trees refer to extents that are not pinned in a block group's free space cache (pinning the extents is precisely the first phase of replaying a log tree). So do not allow the fitrim ioctl to do anything when the filesystem is mounted with the nologreplay option, because later it can be mounted RW without that option, which causes log replay to happen and result in either a failure to replay the log trees (leading to a mount failure), a crash or some silent corruption. Reported-by: Darrick J. Wong Fixes: 96da09192cda ("btrfs: Introduce new mount option to disable tree log replay") CC: stable@vger.kernel.org # 4.9+ Reviewed-by: Nikolay Borisov Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index ec2d8919e7fb..cd4e693406a0 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -501,6 +501,16 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; + /* + * If the fs is mounted with nologreplay, which requires it to be + * mounted in RO mode as well, we can not allow discard on free space + * inside block groups, because log trees refer to extents that are not + * pinned in a block group's free space cache (pinning the extents is + * precisely the first phase of replaying a log tree). + */ + if (btrfs_test_opt(fs_info, NOLOGREPLAY)) + return -EROFS; + rcu_read_lock(); list_for_each_entry_rcu(device, &fs_info->fs_devices->devices, dev_list) { -- cgit v1.2.3 From ab8a6d821179ab9bea1a9179f535ccba6330c1ed Mon Sep 17 00:00:00 2001 From: Chong Qiao Date: Thu, 28 Mar 2019 07:08:01 +0800 Subject: MIPS: KGDB: fix kgdb support for SMP platforms. KGDB_call_nmi_hook is called by other cpu through smp call. MIPS smp call is processed in ipi irq handler and regs is saved in handle_int. So kgdb_call_nmi_hook get regs by get_irq_regs and regs will be passed to kgdb_cpu_enter. Signed-off-by: Chong Qiao Reviewed-by: Douglas Anderson Acked-by: Daniel Thompson Signed-off-by: Paul Burton Cc: Ralf Baechle Cc: James Hogan Cc: Will Deacon Cc: Christophe Leroy Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: QiaoChong --- arch/mips/kernel/kgdb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/mips/kernel/kgdb.c b/arch/mips/kernel/kgdb.c index 6e574c02e4c3..ea781b29f7f1 100644 --- a/arch/mips/kernel/kgdb.c +++ b/arch/mips/kernel/kgdb.c @@ -33,6 +33,7 @@ #include #include #include +#include static struct hard_trap_info { unsigned char tt; /* Trap type code for MIPS R3xxx and R4xxx */ @@ -214,7 +215,7 @@ void kgdb_call_nmi_hook(void *ignored) old_fs = get_fs(); set_fs(KERNEL_DS); - kgdb_nmicallback(raw_smp_processor_id(), NULL); + kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); set_fs(old_fs); } -- cgit v1.2.3 From e4952b0c2c0309bdbfc4c6cc0dd81e37450d74d0 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Thu, 28 Mar 2019 14:37:45 +0100 Subject: MIPS: SGI-IP27: Fix use of unchecked pointer in shutdown_bridge_irq smatch complaint: arch/mips/sgi-ip27/ip27-irq.c:123 shutdown_bridge_irq() warn: variable dereferenced before check 'hd' (see line 121) Fix it by removing local variable and use hd->pin directly. Fixes: 69a07a41d908 ("MIPS: SGI-IP27: rework HUB interrupts") Reported-by: Dan Carpenter Signed-off-by: Thomas Bogendoerfer Reviewed-by: Mukesh Ojha Signed-off-by: Paul Burton Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org --- arch/mips/sgi-ip27/ip27-irq.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c index 710a59764b01..a32f843cdbe0 100644 --- a/arch/mips/sgi-ip27/ip27-irq.c +++ b/arch/mips/sgi-ip27/ip27-irq.c @@ -118,7 +118,6 @@ static void shutdown_bridge_irq(struct irq_data *d) { struct hub_irq_data *hd = irq_data_get_irq_chip_data(d); struct bridge_controller *bc; - int pin = hd->pin; if (!hd) return; @@ -126,7 +125,7 @@ static void shutdown_bridge_irq(struct irq_data *d) disable_hub_irq(d); bc = hd->bc; - bridge_clr(bc, b_int_enable, (1 << pin)); + bridge_clr(bc, b_int_enable, (1 << hd->pin)); bridge_read(bc, b_wid_tflush); } -- cgit v1.2.3 From 6e57d72a84db9f2e565992f76b6a6bc907f13b77 Mon Sep 17 00:00:00 2001 From: xiaofeis Date: Wed, 27 Mar 2019 11:59:06 +0800 Subject: net: dsa: Implement flow_dissect callback for tag_qca Add flow_dissect for qca tagged packet to get the right hash. Signed-off-by: Xiaofei Shen Reviewed-by: Andrew Lunn Reviewed-by: Vinod Koul Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/tag_qca.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c index ed4f6dc26365..85c22ada4744 100644 --- a/net/dsa/tag_qca.c +++ b/net/dsa/tag_qca.c @@ -98,8 +98,18 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, return skb; } +static int qca_tag_flow_dissect(const struct sk_buff *skb, __be16 *proto, + int *offset) +{ + *offset = QCA_HDR_LEN; + *proto = ((__be16 *)skb->data)[0]; + + return 0; +} + const struct dsa_device_ops qca_netdev_ops = { .xmit = qca_tag_xmit, .rcv = qca_tag_rcv, + .flow_dissect = qca_tag_flow_dissect, .overhead = QCA_HDR_LEN, }; -- cgit v1.2.3 From 6289d0facd9ebce4cc83e5da39e15643ee998dc5 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Wed, 27 Mar 2019 15:26:01 +0100 Subject: qmi_wwan: add Olicard 600 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a Qualcomm based device with a QMI function on interface 4. It is mode switched from 2020:2030 using a standard eject message. T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 6 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2020 ProdID=2031 Rev= 2.32 S: Manufacturer=Mobile Connect S: Product=Mobile Connect S: SerialNumber=0123456789ABCDEF C:* #Ifs= 6 Cfg#= 1 Atr=80 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=89(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=(none) E: Ad=8a(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=125us Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 74bebbdb4b15..9195f3476b1d 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1203,6 +1203,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x19d2, 0x2002, 4)}, /* ZTE (Vodafone) K3765-Z */ {QMI_FIXED_INTF(0x2001, 0x7e19, 4)}, /* D-Link DWM-221 B1 */ {QMI_FIXED_INTF(0x2001, 0x7e35, 4)}, /* D-Link DWM-222 */ + {QMI_FIXED_INTF(0x2020, 0x2031, 4)}, /* Olicard 600 */ {QMI_FIXED_INTF(0x2020, 0x2033, 4)}, /* BroadMobi BM806U */ {QMI_FIXED_INTF(0x0f3d, 0x68a2, 8)}, /* Sierra Wireless MC7700 */ {QMI_FIXED_INTF(0x114f, 0x68a2, 8)}, /* Sierra Wireless MC7750 */ -- cgit v1.2.3 From 355b98553789b646ed97ad801a619ff898471b92 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 Mar 2019 08:21:30 -0700 Subject: netns: provide pure entropy for net_hash_mix() net_hash_mix() currently uses kernel address of a struct net, and is used in many places that could be used to reveal this address to a patient attacker, thus defeating KASLR, for the typical case (initial net namespace, &init_net is not dynamically allocated) I believe the original implementation tried to avoid spending too many cycles in this function, but security comes first. Also provide entropy regardless of CONFIG_NET_NS. Fixes: 0b4419162aa6 ("netns: introduce the net_hash_mix "salt" for hashes") Signed-off-by: Eric Dumazet Reported-by: Amit Klein Reported-by: Benny Pinkas Cc: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/net_namespace.h | 1 + include/net/netns/hash.h | 10 ++-------- net/core/net_namespace.c | 1 + 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index a68ced28d8f4..12689ddfc24c 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -59,6 +59,7 @@ struct net { */ spinlock_t rules_mod_lock; + u32 hash_mix; atomic64_t cookie_gen; struct list_head list; /* list of network namespaces */ diff --git a/include/net/netns/hash.h b/include/net/netns/hash.h index 16a842456189..d9b665151f3d 100644 --- a/include/net/netns/hash.h +++ b/include/net/netns/hash.h @@ -2,16 +2,10 @@ #ifndef __NET_NS_HASH_H__ #define __NET_NS_HASH_H__ -#include - -struct net; +#include static inline u32 net_hash_mix(const struct net *net) { -#ifdef CONFIG_NET_NS - return (u32)(((unsigned long)net) >> ilog2(sizeof(*net))); -#else - return 0; -#endif + return net->hash_mix; } #endif diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 17f36317363d..7e6dcc625701 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -304,6 +304,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) refcount_set(&net->count, 1); refcount_set(&net->passive, 1); + get_random_bytes(&net->hash_mix, sizeof(u32)); net->dev_base_seq = 1; net->user_ns = user_ns; idr_init(&net->netns_ids); -- cgit v1.2.3 From c8ba5b91a04e3e2643e48501c114108802f21cda Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 27 Mar 2019 11:38:38 -0700 Subject: nfp: validate the return code from dev_queue_xmit() dev_queue_xmit() may return error codes as well as netdev_tx_t, and it always consumes the skb. Make sure we always return a correct netdev_tx_t value. Fixes: eadfa4c3be99 ("nfp: add stats and xmit helpers for representors") Signed-off-by: Jakub Kicinski Reviewed-by: John Hurley Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net_repr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c index d2c803bb4e56..7b46fce2e81e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c @@ -195,7 +195,7 @@ static netdev_tx_t nfp_repr_xmit(struct sk_buff *skb, struct net_device *netdev) ret = dev_queue_xmit(skb); nfp_repr_inc_tx_stats(netdev, len, ret); - return ret; + return NETDEV_TX_OK; } static int nfp_repr_stop(struct net_device *netdev) -- cgit v1.2.3 From c3e1f7fff69c78169c8ac40cc74ac4307f74e36d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 27 Mar 2019 11:38:39 -0700 Subject: nfp: disable netpoll on representors NFP reprs are software device on top of the PF's vNIC. The comment above __dev_queue_xmit() sayeth: When calling this method, interrupts MUST be enabled. This is because the BH enable code must have IRQs enabled so that it will not deadlock. For netconsole we can't guarantee IRQ state, let's just disable netpoll on representors to be on the safe side. When the initial implementation of NFP reprs was added by the commit 5de73ee46704 ("nfp: general representor implementation") .ndo_poll_controller was required for netpoll to be enabled. Fixes: ac3d9dd034e5 ("netpoll: make ndo_poll_controller() optional") Signed-off-by: Jakub Kicinski Reviewed-by: John Hurley Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net_repr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c index 7b46fce2e81e..94d228c04496 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c @@ -383,7 +383,7 @@ int nfp_repr_init(struct nfp_app *app, struct net_device *netdev, netdev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6); netdev->gso_max_segs = NFP_NET_LSO_MAX_SEGS; - netdev->priv_flags |= IFF_NO_QUEUE; + netdev->priv_flags |= IFF_NO_QUEUE | IFF_DISABLE_NETPOLL; netdev->features |= NETIF_F_LLTX; if (nfp_app_has_tc(app)) { -- cgit v1.2.3 From f28cd2af22a0c134e4aa1c64a70f70d815d473fb Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Thu, 28 Mar 2019 07:36:00 +0100 Subject: openvswitch: fix flow actions reallocation The flow action buffer can be resized if it's not big enough to contain all the requested flow actions. However, this resize doesn't take into account the new requested size, the buffer is only increased by a factor of 2x. This might be not enough to contain the new data, causing a buffer overflow, for example: [ 42.044472] ============================================================================= [ 42.045608] BUG kmalloc-96 (Not tainted): Redzone overwritten [ 42.046415] ----------------------------------------------------------------------------- [ 42.047715] Disabling lock debugging due to kernel taint [ 42.047716] INFO: 0x8bf2c4a5-0x720c0928. First byte 0x0 instead of 0xcc [ 42.048677] INFO: Slab 0xbc6d2040 objects=29 used=18 fp=0xdc07dec4 flags=0x2808101 [ 42.049743] INFO: Object 0xd53a3464 @offset=2528 fp=0xccdcdebb [ 42.050747] Redzone 76f1b237: cc cc cc cc cc cc cc cc ........ [ 42.051839] Object d53a3464: 6b 6b 6b 6b 6b 6b 6b 6b 0c 00 00 00 6c 00 00 00 kkkkkkkk....l... [ 42.053015] Object f49a30cc: 6c 00 0c 00 00 00 00 00 00 00 00 03 78 a3 15 f6 l...........x... [ 42.054203] Object acfe4220: 20 00 02 00 ff ff ff ff 00 00 00 00 00 00 00 00 ............... [ 42.055370] Object 21024e91: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ [ 42.056541] Object 070e04c3: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ [ 42.057797] Object 948a777a: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ [ 42.059061] Redzone 8bf2c4a5: 00 00 00 00 .... [ 42.060189] Padding a681b46e: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ Fix by making sure the new buffer is properly resized to contain all the requested data. BugLink: https://bugs.launchpad.net/bugs/1813244 Signed-off-by: Andrea Righi Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/flow_netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 691da853bef5..4bdf5e3ac208 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2306,14 +2306,14 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, struct sw_flow_actions *acts; int new_acts_size; - int req_size = NLA_ALIGN(attr_len); + size_t req_size = NLA_ALIGN(attr_len); int next_offset = offsetof(struct sw_flow_actions, actions) + (*sfa)->actions_len; if (req_size <= (ksize(*sfa) - next_offset)) goto out; - new_acts_size = ksize(*sfa) * 2; + new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2); if (new_acts_size > MAX_ACTIONS_BUFSIZE) { if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) { -- cgit v1.2.3 From cb66ddd156203daefb8d71158036b27b0e2caf63 Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Thu, 28 Mar 2019 17:10:56 +0800 Subject: net: rds: force to destroy connection if t_sock is NULL in rds_tcp_kill_sock(). When it is to cleanup net namespace, rds_tcp_exit_net() will call rds_tcp_kill_sock(), if t_sock is NULL, it will not call rds_conn_destroy(), rds_conn_path_destroy() and rds_tcp_conn_free() to free connection, and the worker cp_conn_w is not stopped, afterwards the net is freed in net_drop_ns(); While cp_conn_w rds_connect_worker() will call rds_tcp_conn_path_connect() and reference 'net' which has already been freed. In rds_tcp_conn_path_connect(), rds_tcp_set_callbacks() will set t_sock = sock before sock->ops->connect, but if connect() is failed, it will call rds_tcp_restore_callbacks() and set t_sock = NULL, if connect is always failed, rds_connect_worker() will try to reconnect all the time, so rds_tcp_kill_sock() will never to cancel worker cp_conn_w and free the connections. Therefore, the condition !tc->t_sock is not needed if it is going to do cleanup_net->rds_tcp_exit_net->rds_tcp_kill_sock, because tc->t_sock is always NULL, and there is on other path to cancel cp_conn_w and free connection. So this patch is to fix this. rds_tcp_kill_sock(): ... if (net != c_net || !tc->t_sock) ... Acked-by: Santosh Shilimkar ================================================================== BUG: KASAN: use-after-free in inet_create+0xbcc/0xd28 net/ipv4/af_inet.c:340 Read of size 4 at addr ffff8003496a4684 by task kworker/u8:4/3721 CPU: 3 PID: 3721 Comm: kworker/u8:4 Not tainted 5.1.0 #11 Hardware name: linux,dummy-virt (DT) Workqueue: krdsd rds_connect_worker Call trace: dump_backtrace+0x0/0x3c0 arch/arm64/kernel/time.c:53 show_stack+0x28/0x38 arch/arm64/kernel/traps.c:152 __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x120/0x188 lib/dump_stack.c:113 print_address_description+0x68/0x278 mm/kasan/report.c:253 kasan_report_error mm/kasan/report.c:351 [inline] kasan_report+0x21c/0x348 mm/kasan/report.c:409 __asan_report_load4_noabort+0x30/0x40 mm/kasan/report.c:429 inet_create+0xbcc/0xd28 net/ipv4/af_inet.c:340 __sock_create+0x4f8/0x770 net/socket.c:1276 sock_create_kern+0x50/0x68 net/socket.c:1322 rds_tcp_conn_path_connect+0x2b4/0x690 net/rds/tcp_connect.c:114 rds_connect_worker+0x108/0x1d0 net/rds/threads.c:175 process_one_work+0x6e8/0x1700 kernel/workqueue.c:2153 worker_thread+0x3b0/0xdd0 kernel/workqueue.c:2296 kthread+0x2f0/0x378 kernel/kthread.c:255 ret_from_fork+0x10/0x18 arch/arm64/kernel/entry.S:1117 Allocated by task 687: save_stack mm/kasan/kasan.c:448 [inline] set_track mm/kasan/kasan.c:460 [inline] kasan_kmalloc+0xd4/0x180 mm/kasan/kasan.c:553 kasan_slab_alloc+0x14/0x20 mm/kasan/kasan.c:490 slab_post_alloc_hook mm/slab.h:444 [inline] slab_alloc_node mm/slub.c:2705 [inline] slab_alloc mm/slub.c:2713 [inline] kmem_cache_alloc+0x14c/0x388 mm/slub.c:2718 kmem_cache_zalloc include/linux/slab.h:697 [inline] net_alloc net/core/net_namespace.c:384 [inline] copy_net_ns+0xc4/0x2d0 net/core/net_namespace.c:424 create_new_namespaces+0x300/0x658 kernel/nsproxy.c:107 unshare_nsproxy_namespaces+0xa0/0x198 kernel/nsproxy.c:206 ksys_unshare+0x340/0x628 kernel/fork.c:2577 __do_sys_unshare kernel/fork.c:2645 [inline] __se_sys_unshare kernel/fork.c:2643 [inline] __arm64_sys_unshare+0x38/0x58 kernel/fork.c:2643 __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline] invoke_syscall arch/arm64/kernel/syscall.c:47 [inline] el0_svc_common+0x168/0x390 arch/arm64/kernel/syscall.c:83 el0_svc_handler+0x60/0xd0 arch/arm64/kernel/syscall.c:129 el0_svc+0x8/0xc arch/arm64/kernel/entry.S:960 Freed by task 264: save_stack mm/kasan/kasan.c:448 [inline] set_track mm/kasan/kasan.c:460 [inline] __kasan_slab_free+0x114/0x220 mm/kasan/kasan.c:521 kasan_slab_free+0x10/0x18 mm/kasan/kasan.c:528 slab_free_hook mm/slub.c:1370 [inline] slab_free_freelist_hook mm/slub.c:1397 [inline] slab_free mm/slub.c:2952 [inline] kmem_cache_free+0xb8/0x3a8 mm/slub.c:2968 net_free net/core/net_namespace.c:400 [inline] net_drop_ns.part.6+0x78/0x90 net/core/net_namespace.c:407 net_drop_ns net/core/net_namespace.c:406 [inline] cleanup_net+0x53c/0x6d8 net/core/net_namespace.c:569 process_one_work+0x6e8/0x1700 kernel/workqueue.c:2153 worker_thread+0x3b0/0xdd0 kernel/workqueue.c:2296 kthread+0x2f0/0x378 kernel/kthread.c:255 ret_from_fork+0x10/0x18 arch/arm64/kernel/entry.S:1117 The buggy address belongs to the object at ffff8003496a3f80 which belongs to the cache net_namespace of size 7872 The buggy address is located 1796 bytes inside of 7872-byte region [ffff8003496a3f80, ffff8003496a5e40) The buggy address belongs to the page: page:ffff7e000d25a800 count:1 mapcount:0 mapping:ffff80036ce4b000 index:0x0 compound_mapcount: 0 flags: 0xffffe0000008100(slab|head) raw: 0ffffe0000008100 dead000000000100 dead000000000200 ffff80036ce4b000 raw: 0000000000000000 0000000080040004 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8003496a4580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8003496a4600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff8003496a4680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8003496a4700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8003496a4780: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Fixes: 467fa15356ac("RDS-TCP: Support multiple RDS-TCP listen endpoints, one per netns.") Reported-by: Hulk Robot Signed-off-by: Mao Wenan Signed-off-by: David S. Miller --- net/rds/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rds/tcp.c b/net/rds/tcp.c index fd2694174607..faf726e00e27 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -608,7 +608,7 @@ static void rds_tcp_kill_sock(struct net *net) list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); - if (net != c_net || !tc->t_sock) + if (net != c_net) continue; if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) { list_move_tail(&tc->t_tcp_node, &tmp_list); -- cgit v1.2.3 From 9a5a90d167b0e5fe3d47af16b68fd09ce64085cd Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 28 Mar 2019 18:23:04 +0300 Subject: net: core: netif_receive_skb_list: unlist skb before passing to pt->func __netif_receive_skb_list_ptype() leaves skb->next poisoned before passing it to pt_prev->func handler, what may produce (in certain cases, e.g. DSA setup) crashes like: [ 88.606777] CPU 0 Unable to handle kernel paging request at virtual address 0000000e, epc == 80687078, ra == 8052cc7c [ 88.618666] Oops[#1]: [ 88.621196] CPU: 0 PID: 0 Comm: swapper Not tainted 5.1.0-rc2-dlink-00206-g4192a172-dirty #1473 [ 88.630885] $ 0 : 00000000 10000400 00000002 864d7850 [ 88.636709] $ 4 : 87c0ddf0 864d7800 87c0ddf0 00000000 [ 88.642526] $ 8 : 00000000 49600000 00000001 00000001 [ 88.648342] $12 : 00000000 c288617b dadbee27 25d17c41 [ 88.654159] $16 : 87c0ddf0 85cff080 80790000 fffffffd [ 88.659975] $20 : 80797b20 ffffffff 00000001 864d7800 [ 88.665793] $24 : 00000000 8011e658 [ 88.671609] $28 : 80790000 87c0dbc0 87cabf00 8052cc7c [ 88.677427] Hi : 00000003 [ 88.680622] Lo : 7b5b4220 [ 88.683840] epc : 80687078 vlan_dev_hard_start_xmit+0x1c/0x1a0 [ 88.690532] ra : 8052cc7c dev_hard_start_xmit+0xac/0x188 [ 88.696734] Status: 10000404 IEp [ 88.700422] Cause : 50000008 (ExcCode 02) [ 88.704874] BadVA : 0000000e [ 88.708069] PrId : 0001a120 (MIPS interAptiv (multi)) [ 88.713005] Modules linked in: [ 88.716407] Process swapper (pid: 0, threadinfo=(ptrval), task=(ptrval), tls=00000000) [ 88.725219] Stack : 85f61c28 00000000 0000000e 80780000 87c0ddf0 85cff080 80790000 8052cc7c [ 88.734529] 87cabf00 00000000 00000001 85f5fb40 807b0000 864d7850 87cabf00 807d0000 [ 88.743839] 864d7800 8655f600 00000000 85cff080 87c1c000 0000006a 00000000 8052d96c [ 88.753149] 807a0000 8057adb8 87c0dcc8 87c0dc50 85cfff08 00000558 87cabf00 85f58c50 [ 88.762460] 00000002 85f58c00 864d7800 80543308 fffffff4 00000001 85f58c00 864d7800 [ 88.771770] ... [ 88.774483] Call Trace: [ 88.777199] [<80687078>] vlan_dev_hard_start_xmit+0x1c/0x1a0 [ 88.783504] [<8052cc7c>] dev_hard_start_xmit+0xac/0x188 [ 88.789326] [<8052d96c>] __dev_queue_xmit+0x6e8/0x7d4 [ 88.794955] [<805a8640>] ip_finish_output2+0x238/0x4d0 [ 88.800677] [<805ab6a0>] ip_output+0xc8/0x140 [ 88.805526] [<805a68f4>] ip_forward+0x364/0x560 [ 88.810567] [<805a4ff8>] ip_rcv+0x48/0xe4 [ 88.815030] [<80528d44>] __netif_receive_skb_one_core+0x44/0x58 [ 88.821635] [<8067f220>] dsa_switch_rcv+0x108/0x1ac [ 88.827067] [<80528f80>] __netif_receive_skb_list_core+0x228/0x26c [ 88.833951] [<8052ed84>] netif_receive_skb_list+0x1d4/0x394 [ 88.840160] [<80355a88>] lunar_rx_poll+0x38c/0x828 [ 88.845496] [<8052fa78>] net_rx_action+0x14c/0x3cc [ 88.850835] [<806ad300>] __do_softirq+0x178/0x338 [ 88.856077] [<8012a2d4>] irq_exit+0xbc/0x100 [ 88.860846] [<802f8b70>] plat_irq_dispatch+0xc0/0x144 [ 88.866477] [<80105974>] handle_int+0x14c/0x158 [ 88.871516] [<806acfb0>] r4k_wait+0x30/0x40 [ 88.876462] Code: afb10014 8c8200a0 00803025 <9443000c> 94a20468 00000000 10620042 00a08025 9605046a [ 88.887332] [ 88.888982] ---[ end trace eb863d007da11cf1 ]--- [ 88.894122] Kernel panic - not syncing: Fatal exception in interrupt [ 88.901202] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- Fix this by pulling skb off the sublist and zeroing skb->next pointer before calling ptype callback. Fixes: 88eb1944e18c ("net: core: propagate SKB lists through packet_type lookup") Reviewed-by: Edward Cree Signed-off-by: Alexander Lobakin Signed-off-by: David S. Miller --- net/core/dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 2b67f2aa59dd..fdcff29df915 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5014,8 +5014,10 @@ static inline void __netif_receive_skb_list_ptype(struct list_head *head, if (pt_prev->list_func != NULL) pt_prev->list_func(head, pt_prev, orig_dev); else - list_for_each_entry_safe(skb, next, head, list) + list_for_each_entry_safe(skb, next, head, list) { + skb_list_del_init(skb); pt_prev->func(skb, skb->dev, pt_prev, orig_dev); + } } static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemalloc) -- cgit v1.2.3 From dade58ed5af6365ac50ff4259c2a0bf31219e285 Mon Sep 17 00:00:00 2001 From: Yan Zhao Date: Wed, 27 Mar 2019 00:54:51 -0400 Subject: drm/i915/gvt: do not deliver a workload if its creation fails in workload creation routine, if any failure occurs, do not queue this workload for delivery. if this failure is fatal, enter into failsafe mode. Fixes: 6d76303553ba ("drm/i915/gvt: Move common vGPU workload creation into scheduler.c") Cc: stable@vger.kernel.org #4.19+ Cc: zhenyuw@linux.intel.com Signed-off-by: Yan Zhao Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 159192c097cc..05b953793316 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -1486,8 +1486,9 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, intel_runtime_pm_put_unchecked(dev_priv); } - if (ret && (vgpu_is_vm_unhealthy(ret))) { - enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR); + if (ret) { + if (vgpu_is_vm_unhealthy(ret)) + enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR); intel_vgpu_destroy_workload(workload); return ERR_PTR(ret); } -- cgit v1.2.3 From 663a50ceac75c2208d2ad95365bc8382fd42f44d Mon Sep 17 00:00:00 2001 From: Yan Zhao Date: Wed, 27 Mar 2019 00:55:45 -0400 Subject: drm/i915/gvt: do not let pin count of shadow mm go negative shadow mm's pin count got increased in workload preparation phase, which is after workload scanning. it will get decreased in complete_current_workload() anyway after workload completion. Sometimes, if a workload meets a scanning error, its shadow mm pin count will not get increased but will get decreased in the end. This patch lets shadow mm's pin count not go below 0. Fixes: 2707e4446688 ("drm/i915/gvt: vGPU graphics memory virtualization") Cc: zhenyuw@linux.intel.com Cc: stable@vger.kernel.org #4.14+ Signed-off-by: Yan Zhao Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index d7052ab7908c..cf133ef03873 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -1946,7 +1946,7 @@ void _intel_vgpu_mm_release(struct kref *mm_ref) */ void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm) { - atomic_dec(&mm->pincount); + atomic_dec_if_positive(&mm->pincount); } /** -- cgit v1.2.3 From ff0e2a7bd13f7c332d7f09ff45d08df4bf512ce0 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Fri, 22 Mar 2019 10:04:44 +0000 Subject: RISC-V: Fix FIXMAP_TOP to avoid overlap with VMALLOC area The FIXMAP area overlaps with VMALLOC area in Linux-5.1-rc1 hence we get below warning in Linux RISC-V 32bit kernel. This warning does not show-up in Linux RISC-V 64bit kernel due to large VMALLOC area. WARNING: CPU: 0 PID: 22 at mm/vmalloc.c:150 vmap_page_range_noflush+0x134/0x15c Modules linked in: CPU: 0 PID: 22 Comm: kworker/0:1 Not tainted 5.1.0-rc1-00005-gebc2f658040e #1 Workqueue: events pcpu_balance_workfn Call Trace: [] walk_stackframe+0x0/0xa0 [] show_stack+0x28/0x32 [] dump_stack+0x62/0x7e [] __warn+0x98/0xce [] warn_slowpath_null+0x2e/0x3c [] vmap_page_range_noflush+0x134/0x15c [] map_kernel_range_noflush+0xc/0x14 [] pcpu_populate_chunk+0x19e/0x236 [] pcpu_balance_workfn+0x448/0x464 [] process_one_work+0x16c/0x2ea [] worker_thread+0xf2/0x3b2 [] kthread+0xce/0xdc [] ret_from_exception+0x0/0xc This patch fixes above warning by placing FIXMAP area below VMALLOC area. Fixes: f2c17aabc917 ("RISC-V: Implement compile-time fixed mappings") Signed-off-by: Anup Patel Reviewed-by: Christoph Hellwig Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/fixmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h index 57afe604b495..c207f6634b91 100644 --- a/arch/riscv/include/asm/fixmap.h +++ b/arch/riscv/include/asm/fixmap.h @@ -26,7 +26,7 @@ enum fixed_addresses { }; #define FIXADDR_SIZE (__end_of_fixed_addresses * PAGE_SIZE) -#define FIXADDR_TOP (PAGE_OFFSET) +#define FIXADDR_TOP (VMALLOC_START) #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) #define FIXMAP_PAGE_IO PAGE_KERNEL -- cgit v1.2.3 From da4ed37873918eeb4e8db7f0cf55e0a7e18788c3 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 7 Mar 2019 15:56:34 -0800 Subject: RISC-V: Use IS_ENABLED(CONFIG_CMODEL_MEDLOW) IS_ENABLED should generally use CONFIG_ prefaced symbols and it doesn't appear as if there is a CMODEL_MEDLOW define. Signed-off-by: Joe Perches Reviewed-by: Christoph Hellwig Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 7dd308129b40..2872edce894d 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -141,7 +141,7 @@ static int apply_r_riscv_hi20_rela(struct module *me, u32 *location, { s32 hi20; - if (IS_ENABLED(CMODEL_MEDLOW)) { + if (IS_ENABLED(CONFIG_CMODEL_MEDLOW)) { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", me->name, (long long)v, location); -- cgit v1.2.3 From 2d4ea4b95cae3133de6b18ec5d5a42ee824fa0ef Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 7 Mar 2019 15:51:45 -0800 Subject: s390/mem_detect: Use IS_ENABLED(CONFIG_BLK_DEV_INITRD) IS_ENABLED should generally use CONFIG_ prefaced symbols and it doesn't appear as if there is a BLK_DEV_INITRD define. Cc: # 4.20 Signed-off-by: Joe Perches Signed-off-by: Martin Schwidefsky --- arch/s390/boot/mem_detect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/boot/mem_detect.c b/arch/s390/boot/mem_detect.c index 4cb771ba13fa..5d316fe40480 100644 --- a/arch/s390/boot/mem_detect.c +++ b/arch/s390/boot/mem_detect.c @@ -25,7 +25,7 @@ static void *mem_detect_alloc_extended(void) { unsigned long offset = ALIGN(mem_safe_offset(), sizeof(u64)); - if (IS_ENABLED(BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE && + if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE && INITRD_START < offset + ENTRIES_EXTENDED_MAX) offset = ALIGN(INITRD_START + INITRD_SIZE, sizeof(u64)); -- cgit v1.2.3 From 2cc9637ce825f3a9f51f8f78af7474e9e85bfa5f Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Fri, 22 Mar 2019 16:01:17 +0100 Subject: s390/dasd: Fix capacity calculation for large volumes The DASD driver incorrectly limits the maximum number of blocks of ECKD DASD volumes to 32 bit numbers. Volumes with a capacity greater than 2^32-1 blocks are incorrectly recognized as smaller volumes. This results in the following volume capacity limits depending on the formatted block size: BLKSIZE MAX_GB MAX_CYL 512 2047 5843492 1024 4095 8676701 2048 8191 13634816 4096 16383 23860929 The same problem occurs when a volume with more than 17895697 cylinders is accessed in raw-track-access mode. Fix this problem by adding an explicit type cast when calculating the maximum number of blocks. Signed-off-by: Peter Oberparleiter Reviewed-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd_eckd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 6e294b4d3635..f89f9d02e788 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -2004,14 +2004,14 @@ static int dasd_eckd_end_analysis(struct dasd_block *block) blk_per_trk = recs_per_track(&private->rdc_data, 0, block->bp_block); raw: - block->blocks = (private->real_cyl * + block->blocks = ((unsigned long) private->real_cyl * private->rdc_data.trk_per_cyl * blk_per_trk); dev_info(&device->cdev->dev, - "DASD with %d KB/block, %d KB total size, %d KB/track, " + "DASD with %u KB/block, %lu KB total size, %u KB/track, " "%s\n", (block->bp_block >> 10), - ((private->real_cyl * + (((unsigned long) private->real_cyl * private->rdc_data.trk_per_cyl * blk_per_trk * (block->bp_block >> 9)) >> 1), ((blk_per_trk * block->bp_block) >> 10), -- cgit v1.2.3 From 6620f45ff8519549a6877663f965c10002918dc2 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Wed, 27 Mar 2019 16:13:48 +0100 Subject: clk: meson: vid-pll-div: remove warning and return 0 on invalid config The vid_pll_div is a programmable fractional divider, but vendor gives a limited of known configuration value and it's corresponding fraction. Thus when at reset value (0) or unknown value, we cannot determine the result rate. The initial behaviour was to print a warning, but the warning triggers at each boot and when the clock tree is refreshed. This patch moves the print to debug and returns 0 instead of the parent rate. Fixes: 72dbb8c94d0d ("clk: meson: Add vid_pll divider driver") Signed-off-by: Neil Armstrong Reviewed-by: Jerome Brunet Link: https://lkml.kernel.org/r/20190327151348.27402-1-narmstrong@baylibre.com --- drivers/clk/meson/vid-pll-div.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/meson/vid-pll-div.c b/drivers/clk/meson/vid-pll-div.c index 08bcc01c0923..daff235bc763 100644 --- a/drivers/clk/meson/vid-pll-div.c +++ b/drivers/clk/meson/vid-pll-div.c @@ -82,8 +82,8 @@ static unsigned long meson_vid_pll_div_recalc_rate(struct clk_hw *hw, div = _get_table_val(meson_parm_read(clk->map, &pll_div->val), meson_parm_read(clk->map, &pll_div->sel)); if (!div || !div->divider) { - pr_info("%s: Invalid config value for vid_pll_div\n", __func__); - return parent_rate; + pr_debug("%s: Invalid config value for vid_pll_div\n", __func__); + return 0; } return DIV_ROUND_UP_ULL(parent_rate * div->multiplier, div->divider); -- cgit v1.2.3 From b49c15e1211cc962cb73bbaaa5175ae068144893 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 19 Mar 2019 12:00:13 +0100 Subject: mac80211: un-schedule TXQs on powersave start MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Once a station enters powersave, its queues should not be returned by ieee80211_next_txq() anymore. They will be re-scheduled again after the station has woken up again Fixes: 1866760096bf4 ("mac80211: Add TXQ scheduling API") Signed-off-by: Felix Fietkau Acked-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 7f8d93401ce0..bf0b187f994e 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1568,7 +1568,15 @@ static void sta_ps_start(struct sta_info *sta) return; for (tid = 0; tid < IEEE80211_NUM_TIDS; tid++) { - if (txq_has_queue(sta->sta.txq[tid])) + struct ieee80211_txq *txq = sta->sta.txq[tid]; + struct txq_info *txqi = to_txq_info(txq); + + spin_lock(&local->active_txq_lock[txq->ac]); + if (!list_empty(&txqi->schedule_order)) + list_del_init(&txqi->schedule_order); + spin_unlock(&local->active_txq_lock[txq->ac]); + + if (txq_has_queue(txq)) set_bit(tid, &sta->txq_buffered_tids); else clear_bit(tid, &sta->txq_buffered_tids); -- cgit v1.2.3 From 40586e3fc400c00c11151804dcdc93f8c831c808 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 13 Mar 2019 18:54:27 +0100 Subject: mac80211: fix unaligned access in mesh table hash function The pointer to the last four bytes of the address is not guaranteed to be aligned, so we need to use __get_unaligned_cpu32 here Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/mesh_pathtbl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 95eb5064fa91..b76a2aefa9ec 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -23,7 +23,7 @@ static void mesh_path_free_rcu(struct mesh_table *tbl, struct mesh_path *mpath); static u32 mesh_table_hash(const void *addr, u32 len, u32 seed) { /* Use last four bytes of hw addr as hash index */ - return jhash_1word(*(u32 *)(addr+2), seed); + return jhash_1word(__get_unaligned_cpu32((u8 *)addr + 2), seed); } static const struct rhashtable_params mesh_rht_params = { -- cgit v1.2.3 From 78be2d21cc1cd3069c6138dcfecec62583130171 Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Fri, 15 Mar 2019 17:38:57 +0200 Subject: mac80211: Increase MAX_MSG_LEN Looks that 100 chars isn't enough for messages, as we keep getting warnings popping from different places due to message shortening. Instead of trying to shorten the prints, just increase the buffer size. Signed-off-by: Andrei Otcheretianski Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/trace_msg.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/mac80211/trace_msg.h b/net/mac80211/trace_msg.h index 366b9e6f043e..40141df09f25 100644 --- a/net/mac80211/trace_msg.h +++ b/net/mac80211/trace_msg.h @@ -1,4 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ +/* + * Portions of this file + * Copyright (C) 2019 Intel Corporation + */ + #ifdef CONFIG_MAC80211_MESSAGE_TRACING #if !defined(__MAC80211_MSG_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ) @@ -11,7 +16,7 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM mac80211_msg -#define MAX_MSG_LEN 100 +#define MAX_MSG_LEN 120 DECLARE_EVENT_CLASS(mac80211_msg_event, TP_PROTO(struct va_format *vaf), -- cgit v1.2.3 From 08a75a887ee46828b54600f4bb7068d872a5edd5 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Fri, 15 Mar 2019 17:39:00 +0200 Subject: cfg80211: Handle WMM rules in regulatory domain intersection The support added for regulatory WMM rules did not handle the case of regulatory domain intersections. Fix it. Signed-off-by: Ilan Peer Fixes: 230ebaa189af ("cfg80211: read wmm rules from regulatory database") Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/wireless/reg.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 2f1bf91eb226..0ba778f371cb 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1309,6 +1309,16 @@ reg_intersect_dfs_region(const enum nl80211_dfs_regions dfs_region1, return dfs_region1; } +static void reg_wmm_rules_intersect(const struct ieee80211_wmm_ac *wmm_ac1, + const struct ieee80211_wmm_ac *wmm_ac2, + struct ieee80211_wmm_ac *intersect) +{ + intersect->cw_min = max_t(u16, wmm_ac1->cw_min, wmm_ac2->cw_min); + intersect->cw_max = max_t(u16, wmm_ac1->cw_max, wmm_ac2->cw_max); + intersect->cot = min_t(u16, wmm_ac1->cot, wmm_ac2->cot); + intersect->aifsn = max_t(u8, wmm_ac1->aifsn, wmm_ac2->aifsn); +} + /* * Helper for regdom_intersect(), this does the real * mathematical intersection fun @@ -1323,6 +1333,8 @@ static int reg_rules_intersect(const struct ieee80211_regdomain *rd1, struct ieee80211_freq_range *freq_range; const struct ieee80211_power_rule *power_rule1, *power_rule2; struct ieee80211_power_rule *power_rule; + const struct ieee80211_wmm_rule *wmm_rule1, *wmm_rule2; + struct ieee80211_wmm_rule *wmm_rule; u32 freq_diff, max_bandwidth1, max_bandwidth2; freq_range1 = &rule1->freq_range; @@ -1333,6 +1345,10 @@ static int reg_rules_intersect(const struct ieee80211_regdomain *rd1, power_rule2 = &rule2->power_rule; power_rule = &intersected_rule->power_rule; + wmm_rule1 = &rule1->wmm_rule; + wmm_rule2 = &rule2->wmm_rule; + wmm_rule = &intersected_rule->wmm_rule; + freq_range->start_freq_khz = max(freq_range1->start_freq_khz, freq_range2->start_freq_khz); freq_range->end_freq_khz = min(freq_range1->end_freq_khz, @@ -1376,6 +1392,29 @@ static int reg_rules_intersect(const struct ieee80211_regdomain *rd1, intersected_rule->dfs_cac_ms = max(rule1->dfs_cac_ms, rule2->dfs_cac_ms); + if (rule1->has_wmm && rule2->has_wmm) { + u8 ac; + + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + reg_wmm_rules_intersect(&wmm_rule1->client[ac], + &wmm_rule2->client[ac], + &wmm_rule->client[ac]); + reg_wmm_rules_intersect(&wmm_rule1->ap[ac], + &wmm_rule2->ap[ac], + &wmm_rule->ap[ac]); + } + + intersected_rule->has_wmm = true; + } else if (rule1->has_wmm) { + *wmm_rule = *wmm_rule1; + intersected_rule->has_wmm = true; + } else if (rule2->has_wmm) { + *wmm_rule = *wmm_rule2; + intersected_rule->has_wmm = true; + } else { + intersected_rule->has_wmm = false; + } + if (!is_valid_reg_rule(intersected_rule)) return -EINVAL; -- cgit v1.2.3 From eb9b64e3a9f8483e6e54f4e03b2ae14ae5db2690 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 16 Mar 2019 18:06:31 +0100 Subject: mac80211: fix memory accounting with A-MSDU aggregation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit skb->truesize can change due to memory reallocation or when adding extra fragments. Adjust fq->memory_usage accordingly Signed-off-by: Felix Fietkau Acked-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 8a49a74c0a37..5f546de10d96 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3221,6 +3221,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, u8 max_subframes = sta->sta.max_amsdu_subframes; int max_frags = local->hw.max_tx_fragments; int max_amsdu_len = sta->sta.max_amsdu_len; + int orig_truesize; __be16 len; void *data; bool ret = false; @@ -3261,6 +3262,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, if (!head || skb_is_gso(head)) goto out; + orig_truesize = head->truesize; orig_len = head->len; if (skb->len + head->len > max_amsdu_len) @@ -3318,6 +3320,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, *frag_tail = skb; out_recalc: + fq->memory_usage += head->truesize - orig_truesize; if (head->len != orig_len) { flow->backlog += head->len - orig_len; tin->backlog_bytes += head->len - orig_len; -- cgit v1.2.3 From 344c9719c508bb3ef4e9c134066c83ff00ab6206 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 7 Mar 2019 16:57:35 -0700 Subject: cfg80211: Change an 'else if' into an 'else' in cfg80211_calculate_bitrate_he When building with -Wsometimes-uninitialized, Clang warns: net/wireless/util.c:1223:11: warning: variable 'result' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized] Clang can't evaluate at this point that WARN(1, ...) always returns true because __ret_warn_on is defined as !!(condition), which isn't immediately evaluated as 1. Change this branch to else so that it's clear to Clang that we intend to bail out here. Link: https://github.com/ClangBuiltLinux/linux/issues/382 Suggested-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Reviewed-by: Arnd Bergmann Signed-off-by: Johannes Berg --- net/wireless/util.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/wireless/util.c b/net/wireless/util.c index e4b8db5e81ec..75899b62bdc9 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1220,9 +1220,11 @@ static u32 cfg80211_calculate_bitrate_he(struct rate_info *rate) else if (rate->bw == RATE_INFO_BW_HE_RU && rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_26) result = rates_26[rate->he_gi]; - else if (WARN(1, "invalid HE MCS: bw:%d, ru:%d\n", - rate->bw, rate->he_ru_alloc)) + else { + WARN(1, "invalid HE MCS: bw:%d, ru:%d\n", + rate->bw, rate->he_ru_alloc); return 0; + } /* now scale to the appropriate MCS */ tmp = result; -- cgit v1.2.3 From 4856bfd230985e43e84c26473c91028ff0a533bd Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 1 Mar 2019 14:48:37 +0100 Subject: mac80211: do not call driver wake_tx_queue op during reconfig There are several scenarios in which mac80211 can call drv_wake_tx_queue after ieee80211_restart_hw has been called and has not yet completed. Driver private structs are considered uninitialized until mac80211 has uploaded the vifs, stations and keys again, so using private tx queue data during that time is not safe. The driver can also not rely on drv_reconfig_complete to figure out when it is safe to accept drv_wake_tx_queue calls again, because it is only called after all tx queues are woken again. To fix this, bail out early in drv_wake_tx_queue if local->in_reconfig is set. Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/driver-ops.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 28d022a3eee3..ae4f0be3b393 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1195,6 +1195,9 @@ static inline void drv_wake_tx_queue(struct ieee80211_local *local, { struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->txq.vif); + if (local->in_reconfig) + return; + if (!check_sdata_in_driver(sdata)) return; -- cgit v1.2.3 From 90abf96abd9bb00f36c8d3640255e6bfa73f7495 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Mon, 25 Feb 2019 12:38:49 +0000 Subject: cfg80211: Use kmemdup in cfg80211_gen_new_ie() Use kmemdup rather than duplicating its implementation Signed-off-by: YueHaibing Signed-off-by: Johannes Berg --- net/wireless/scan.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 287518c6caa4..04d888628f29 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -190,10 +190,9 @@ static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen, /* copy subelement as we need to change its content to * mark an ie after it is processed. */ - sub_copy = kmalloc(subie_len, gfp); + sub_copy = kmemdup(subelement, subie_len, gfp); if (!sub_copy) return 0; - memcpy(sub_copy, subelement, subie_len); pos = &new_ie[0]; -- cgit v1.2.3 From d6db02a88a4aaa1cd7105137c67ddec7f3bdbc05 Mon Sep 17 00:00:00 2001 From: Sunil Dutt Date: Mon, 25 Feb 2019 15:37:20 +0530 Subject: nl80211: Add NL80211_FLAG_CLEAR_SKB flag for other NL commands This commit adds NL80211_FLAG_CLEAR_SKB flag to other NL commands that carry key data to ensure they do not stick around on heap after the SKB is freed. Also introduced this flag for NL80211_CMD_VENDOR as there are sub commands which configure the keys. Signed-off-by: Sunil Dutt Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 25a9e3b5c154..47e30a58566c 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -13650,7 +13650,8 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_DEAUTHENTICATE, @@ -13701,7 +13702,8 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_UPDATE_CONNECT_PARAMS, @@ -13709,7 +13711,8 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_DISCONNECT, @@ -13738,7 +13741,8 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_DEL_PMKSA, @@ -14090,7 +14094,8 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WIPHY | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_SET_QOS_MAP, @@ -14145,7 +14150,8 @@ static const struct genl_ops nl80211_ops[] = { .doit = nl80211_set_pmk, .policy = nl80211_policy, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_DEL_PMK, -- cgit v1.2.3 From 5b989c18dab2e82bac8a5564a174794bf84b20e6 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 15 Mar 2019 11:03:35 +0100 Subject: mac80211: rework locking for txq scheduling / airtime fairness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Holding the lock around the entire duration of tx scheduling can create some nasty lock contention, especially when processing airtime information from the tx status or the rx path. Improve locking by only holding the active_txq_lock for lookups / scheduling list modifications. Signed-off-by: Felix Fietkau Acked-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg --- include/net/mac80211.h | 49 +++++++++++++++++++------------------------------ net/mac80211/tx.c | 44 ++++++++++++++++---------------------------- 2 files changed, 35 insertions(+), 58 deletions(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index ac2ed8ec662b..616998252dc7 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -6231,8 +6231,6 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, * @hw: pointer as obtained from ieee80211_alloc_hw() * @ac: AC number to return packets from. * - * Should only be called between calls to ieee80211_txq_schedule_start() - * and ieee80211_txq_schedule_end(). * Returns the next txq if successful, %NULL if no queue is eligible. If a txq * is returned, it should be returned with ieee80211_return_txq() after the * driver has finished scheduling it. @@ -6240,51 +6238,42 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac); /** - * ieee80211_return_txq - return a TXQ previously acquired by ieee80211_next_txq() - * - * @hw: pointer as obtained from ieee80211_alloc_hw() - * @txq: pointer obtained from station or virtual interface - * - * Should only be called between calls to ieee80211_txq_schedule_start() - * and ieee80211_txq_schedule_end(). - */ -void ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq); - -/** - * ieee80211_txq_schedule_start - acquire locks for safe scheduling of an AC + * ieee80211_txq_schedule_start - start new scheduling round for TXQs * * @hw: pointer as obtained from ieee80211_alloc_hw() * @ac: AC number to acquire locks for * - * Acquire locks needed to schedule TXQs from the given AC. Should be called - * before ieee80211_next_txq() or ieee80211_return_txq(). + * Should be called before ieee80211_next_txq() or ieee80211_return_txq(). + * The driver must not call multiple TXQ scheduling rounds concurrently. */ -void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac) - __acquires(txq_lock); +void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac); + +/* (deprecated) */ +static inline void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac) +{ +} /** - * ieee80211_txq_schedule_end - release locks for safe scheduling of an AC + * ieee80211_schedule_txq - schedule a TXQ for transmission * * @hw: pointer as obtained from ieee80211_alloc_hw() - * @ac: AC number to acquire locks for + * @txq: pointer obtained from station or virtual interface * - * Release locks previously acquired by ieee80211_txq_schedule_end(). + * Schedules a TXQ for transmission if it is not already scheduled. */ -void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac) - __releases(txq_lock); +void ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq); /** - * ieee80211_schedule_txq - schedule a TXQ for transmission + * ieee80211_return_txq - return a TXQ previously acquired by ieee80211_next_txq() * * @hw: pointer as obtained from ieee80211_alloc_hw() * @txq: pointer obtained from station or virtual interface - * - * Schedules a TXQ for transmission if it is not already scheduled. Takes a - * lock, which means it must *not* be called between - * ieee80211_txq_schedule_start() and ieee80211_txq_schedule_end() */ -void ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq) - __acquires(txq_lock) __releases(txq_lock); +static inline void +ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq) +{ + ieee80211_schedule_txq(hw, txq); +} /** * ieee80211_txq_may_transmit - check whether TXQ is allowed to transmit diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 5f546de10d96..134a3da147c6 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3649,16 +3649,17 @@ EXPORT_SYMBOL(ieee80211_tx_dequeue); struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac) { struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_txq *ret = NULL; struct txq_info *txqi = NULL; - lockdep_assert_held(&local->active_txq_lock[ac]); + spin_lock_bh(&local->active_txq_lock[ac]); begin: txqi = list_first_entry_or_null(&local->active_txqs[ac], struct txq_info, schedule_order); if (!txqi) - return NULL; + goto out; if (txqi->txq.sta) { struct sta_info *sta = container_of(txqi->txq.sta, @@ -3675,21 +3676,25 @@ struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac) if (txqi->schedule_round == local->schedule_round[ac]) - return NULL; + goto out; list_del_init(&txqi->schedule_order); txqi->schedule_round = local->schedule_round[ac]; - return &txqi->txq; + ret = &txqi->txq; + +out: + spin_unlock_bh(&local->active_txq_lock[ac]); + return ret; } EXPORT_SYMBOL(ieee80211_next_txq); -void ieee80211_return_txq(struct ieee80211_hw *hw, - struct ieee80211_txq *txq) +void ieee80211_schedule_txq(struct ieee80211_hw *hw, + struct ieee80211_txq *txq) { struct ieee80211_local *local = hw_to_local(hw); struct txq_info *txqi = to_txq_info(txq); - lockdep_assert_held(&local->active_txq_lock[txq->ac]); + spin_lock_bh(&local->active_txq_lock[txq->ac]); if (list_empty(&txqi->schedule_order) && (!skb_queue_empty(&txqi->frags) || txqi->tin.backlog_packets)) { @@ -3709,17 +3714,7 @@ void ieee80211_return_txq(struct ieee80211_hw *hw, list_add_tail(&txqi->schedule_order, &local->active_txqs[txq->ac]); } -} -EXPORT_SYMBOL(ieee80211_return_txq); -void ieee80211_schedule_txq(struct ieee80211_hw *hw, - struct ieee80211_txq *txq) - __acquires(txq_lock) __releases(txq_lock) -{ - struct ieee80211_local *local = hw_to_local(hw); - - spin_lock_bh(&local->active_txq_lock[txq->ac]); - ieee80211_return_txq(hw, txq); spin_unlock_bh(&local->active_txq_lock[txq->ac]); } EXPORT_SYMBOL(ieee80211_schedule_txq); @@ -3732,7 +3727,7 @@ bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw, struct sta_info *sta; u8 ac = txq->ac; - lockdep_assert_held(&local->active_txq_lock[ac]); + spin_lock_bh(&local->active_txq_lock[ac]); if (!txqi->txq.sta) goto out; @@ -3762,34 +3757,27 @@ bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw, sta->airtime[ac].deficit += sta->airtime_weight; list_move_tail(&txqi->schedule_order, &local->active_txqs[ac]); + spin_unlock_bh(&local->active_txq_lock[ac]); return false; out: if (!list_empty(&txqi->schedule_order)) list_del_init(&txqi->schedule_order); + spin_unlock_bh(&local->active_txq_lock[ac]); return true; } EXPORT_SYMBOL(ieee80211_txq_may_transmit); void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac) - __acquires(txq_lock) { struct ieee80211_local *local = hw_to_local(hw); spin_lock_bh(&local->active_txq_lock[ac]); local->schedule_round[ac]++; -} -EXPORT_SYMBOL(ieee80211_txq_schedule_start); - -void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac) - __releases(txq_lock) -{ - struct ieee80211_local *local = hw_to_local(hw); - spin_unlock_bh(&local->active_txq_lock[ac]); } -EXPORT_SYMBOL(ieee80211_txq_schedule_end); +EXPORT_SYMBOL(ieee80211_txq_schedule_start); void __ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev, -- cgit v1.2.3 From fe61692886669bbcc260f980903eacb4ddebaf59 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 14 Mar 2019 15:48:45 -0300 Subject: drm/atomic-helper: Make atomic_enable/disable crtc callbacks optional Allow atomic_enable and atomic_disable operations from drm_crtc_helper_funcs struct optional. With this, the target display drivers don't need to define a dummy function if they don't need one. Changes since v2: * Don't make funcs optional * Update kerneldoc for atomic_enable/disable * Replace "if (funcs->atomic_enable)" by "if (funcs->commit)" * Improve commit message Signed-off-by: Rodrigo Siqueira Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20190314184845.gjmvkamobj4dilyp@smtp.gmail.com Signed-off-by: Gerd Hoffmann --- drivers/gpu/drm/drm_atomic_helper.c | 5 ++--- include/drm/drm_modeset_helper_vtables.h | 4 ++++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 40ac19848034..fbb76332cc9f 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1034,7 +1034,7 @@ disable_outputs(struct drm_device *dev, struct drm_atomic_state *old_state) funcs->atomic_disable(crtc, old_crtc_state); else if (funcs->disable) funcs->disable(crtc); - else + else if (funcs->dpms) funcs->dpms(crtc, DRM_MODE_DPMS_OFF); if (!(dev->irq_enabled && dev->num_crtcs)) @@ -1277,10 +1277,9 @@ void drm_atomic_helper_commit_modeset_enables(struct drm_device *dev, if (new_crtc_state->enable) { DRM_DEBUG_ATOMIC("enabling [CRTC:%d:%s]\n", crtc->base.id, crtc->name); - if (funcs->atomic_enable) funcs->atomic_enable(crtc, old_crtc_state); - else + else if (funcs->commit) funcs->commit(crtc); } } diff --git a/include/drm/drm_modeset_helper_vtables.h b/include/drm/drm_modeset_helper_vtables.h index cfb7be40bed7..ce4de6b1e444 100644 --- a/include/drm/drm_modeset_helper_vtables.h +++ b/include/drm/drm_modeset_helper_vtables.h @@ -418,6 +418,8 @@ struct drm_crtc_helper_funcs { * Drivers can use the @old_crtc_state input parameter if the operations * needed to enable the CRTC don't depend solely on the new state but * also on the transition between the old state and the new state. + * + * This function is optional. */ void (*atomic_enable)(struct drm_crtc *crtc, struct drm_crtc_state *old_crtc_state); @@ -441,6 +443,8 @@ struct drm_crtc_helper_funcs { * parameter @old_crtc_state which could be used to access the old * state. Atomic drivers should consider to use this one instead * of @disable. + * + * This function is optional. */ void (*atomic_disable)(struct drm_crtc *crtc, struct drm_crtc_state *old_crtc_state); -- cgit v1.2.3 From 872e192fab643887f143106eb56443d87e5e87c1 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 27 Mar 2019 18:11:03 +0000 Subject: scsi: qedi: remove declaration of nvm_image from stack The nvm_image is a large struct qedi_nvm_iscsi_image object of over 24K so don't declare it on the stack just for a sizeof requirement; use sizeof on struct qedi_nvm_iscsi_image instead. Fixes: c77a2fa3ff8f ("scsi: qedi: Add the CRC size within iSCSI NVM image") Signed-off-by: Colin Ian King Acked-by: Manish Rangankar Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_main.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index e74a62448ba4..e5db9a9954dc 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -1392,10 +1392,8 @@ static void qedi_free_nvm_iscsi_cfg(struct qedi_ctx *qedi) static int qedi_alloc_nvm_iscsi_cfg(struct qedi_ctx *qedi) { - struct qedi_nvm_iscsi_image nvm_image; - qedi->iscsi_image = dma_alloc_coherent(&qedi->pdev->dev, - sizeof(nvm_image), + sizeof(struct qedi_nvm_iscsi_image), &qedi->nvm_buf_dma, GFP_KERNEL); if (!qedi->iscsi_image) { QEDI_ERR(&qedi->dbg_ctx, "Could not allocate NVM BUF.\n"); @@ -2236,14 +2234,13 @@ static void qedi_boot_release(void *data) static int qedi_get_boot_info(struct qedi_ctx *qedi) { int ret = 1; - struct qedi_nvm_iscsi_image nvm_image; QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO, "Get NVM iSCSI CFG image\n"); ret = qedi_ops->common->nvm_get_image(qedi->cdev, QED_NVM_IMAGE_ISCSI_CFG, (char *)qedi->iscsi_image, - sizeof(nvm_image)); + sizeof(struct qedi_nvm_iscsi_image)); if (ret) QEDI_ERR(&qedi->dbg_ctx, "Could not get NVM image. ret = %d\n", ret); -- cgit v1.2.3 From a165dcc923ada2ffdee1d4f41f12f81b66d04c55 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 11 Mar 2019 17:57:30 +0800 Subject: hwmon: (w83773g) Select REGMAP_I2C to fix build error Select REGMAP_I2C to avoid below build error: ERROR: "__devm_regmap_init_i2c" [drivers/hwmon/w83773g.ko] undefined! Fixes: ee249f271524 ("hwmon: Add W83773G driver") Cc: stable@vger.kernel.org Signed-off-by: Axel Lin Signed-off-by: Guenter Roeck --- drivers/hwmon/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 6f929bfa9fcd..d0f1dfe2bcbb 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -1759,6 +1759,7 @@ config SENSORS_VT8231 config SENSORS_W83773G tristate "Nuvoton W83773G" depends on I2C + select REGMAP_I2C help If you say yes here you get support for the Nuvoton W83773G hardware monitoring chip. -- cgit v1.2.3 From 8e6af454117a51dbf6c8a47c00180a0c235052fe Mon Sep 17 00:00:00 2001 From: Eddie James Date: Tue, 19 Mar 2019 16:01:58 -0500 Subject: hwmon: (occ) Fix power sensor indexing In the case of power sensor version 0xA0, the sensor indexing overlapped with the "caps" power sensors, resulting in probe failure and kernel warnings. Fix this by specifying the next index for each power sensor version. Fixes: 54076cb3b5ff ("hwmon (occ): Add sensor attributes and register ...") Cc: stable@vger.kernel.org Signed-off-by: Eddie James Tested-by: Joel Stanley Signed-off-by: Guenter Roeck --- drivers/hwmon/occ/common.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/occ/common.c b/drivers/hwmon/occ/common.c index b91a80abf724..4679acb4918e 100644 --- a/drivers/hwmon/occ/common.c +++ b/drivers/hwmon/occ/common.c @@ -890,6 +890,8 @@ static int occ_setup_sensor_attrs(struct occ *occ) s++; } } + + s = (sensors->power.num_sensors * 4) + 1; } else { for (i = 0; i < sensors->power.num_sensors; ++i) { s = i + 1; @@ -918,11 +920,11 @@ static int occ_setup_sensor_attrs(struct occ *occ) show_power, NULL, 3, i); attr++; } - } - if (sensors->caps.num_sensors >= 1) { s = sensors->power.num_sensors + 1; + } + if (sensors->caps.num_sensors >= 1) { snprintf(attr->name, sizeof(attr->name), "power%d_label", s); attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_caps, NULL, 0, 0); -- cgit v1.2.3 From 5fd43ddbec7623441239d247155a30b69e51bea1 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 20 Mar 2019 10:32:58 -0700 Subject: hwmon: (ntc_thermistor) Fix temperature type reporting Commit 7cc7de93fad4 ("hwmon: (ntc_thermistor) Convert to new hwmon API") converted the driver to use the new hwmon API, but introduced a subtle error: The temperature type is no longer reported as temp1_type, but as temp2_type. Fixes: 7cc7de93fad4 ("hwmon: (ntc_thermistor) Convert to new hwmon API") Signed-off-by: Guenter Roeck --- drivers/hwmon/ntc_thermistor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/ntc_thermistor.c b/drivers/hwmon/ntc_thermistor.c index e4f9f7ce92fa..f9abeeeead9e 100644 --- a/drivers/hwmon/ntc_thermistor.c +++ b/drivers/hwmon/ntc_thermistor.c @@ -640,7 +640,7 @@ static const struct hwmon_channel_info ntc_chip = { }; static const u32 ntc_temp_config[] = { - HWMON_T_INPUT, HWMON_T_TYPE, + HWMON_T_INPUT | HWMON_T_TYPE, 0 }; -- cgit v1.2.3 From d3b018f757560ab5c2bce0e7f46e0c1510d7afd4 Mon Sep 17 00:00:00 2001 From: Carlos Menin Date: Wed, 13 Mar 2019 11:11:26 -0300 Subject: dt-bindings: hwmon: (adc128d818) Specify ti,mode property size By default, cells in DT are 32-bit in size. The driver reads "ti,mode" using the function of_property_read_u8() which causes the value to be read incorrectly in little-endian architectures if the size is not specified. Make it explicit in the binding documentation that this prorperty must be set as a 8-bit value. Signed-off-by: Carlos Menin Reviewed-by: Rob Herring Signed-off-by: Guenter Roeck --- Documentation/devicetree/bindings/hwmon/adc128d818.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/hwmon/adc128d818.txt b/Documentation/devicetree/bindings/hwmon/adc128d818.txt index 08bab0e94d25..d0ae46d7bac3 100644 --- a/Documentation/devicetree/bindings/hwmon/adc128d818.txt +++ b/Documentation/devicetree/bindings/hwmon/adc128d818.txt @@ -26,7 +26,7 @@ Required node properties: Optional node properties: - - ti,mode: Operation mode (see above). + - ti,mode: Operation mode (u8) (see above). Example (operation mode 2): @@ -34,5 +34,5 @@ Example (operation mode 2): adc128d818@1d { compatible = "ti,adc128d818"; reg = <0x1d>; - ti,mode = <2>; + ti,mode = /bits/ 8 <2>; }; -- cgit v1.2.3 From 676e4a6fe703f2dae699ee9d56f14516f9ada4ea Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 29 Mar 2019 10:18:00 +0100 Subject: xdp: fix cpumap redirect SKB creation bug We want to avoid leaking pointer info from xdp_frame (that is placed in top of frame) like commit 6dfb970d3dbd ("xdp: avoid leaking info stored in frame data on page reuse"), and followup commit 97e19cce05e5 ("bpf: reserve xdp_frame size in xdp headroom") that reserve this headroom. These changes also affected how cpumap constructed SKBs, as xdpf->headroom size changed, the skb data starting point were in-effect shifted with 32 bytes (sizeof xdp_frame). This was still okay, as the cpumap frame_size calculation also included xdpf->headroom which were reduced by same amount. A bug was introduced in commit 77ea5f4cbe20 ("bpf/cpumap: make sure frame_size for build_skb is aligned if headroom isn't"), where the xdpf->headroom became part of the SKB_DATA_ALIGN rounding up. This round-up to find the frame_size is in principle still correct as it does not exceed the 2048 bytes frame_size (which is max for ixgbe and i40e), but the 32 bytes offset of pkt_data_start puts this over the 2048 bytes limit. This cause skb_shared_info to spill into next frame. It is a little hard to trigger, as the SKB need to use above 15 skb_shinfo->frags[] as far as I calculate. This does happen in practise for TCP streams when skb_try_coalesce() kicks in. KASAN can be used to detect these wrong memory accesses, I've seen: BUG: KASAN: use-after-free in skb_try_coalesce+0x3cb/0x760 BUG: KASAN: wild-memory-access in skb_release_data+0xe2/0x250 Driver veth also construct a SKB from xdp_frame in this way, but is not affected, as it doesn't reserve/deduct the room (used by xdp_frame) from the SKB headroom. Instead is clears the pointers via xdp_scrub_frame(), and allows SKB to use this area. The fix in this patch is to do like veth and instead allow SKB to (re)use the area occupied by xdp_frame, by clearing via xdp_scrub_frame(). (This does kill the idea of the SKB being able to access (mem) info from this area, but I guess it was a bad idea anyhow, and it was already killed by the veth changes.) Fixes: 77ea5f4cbe20 ("bpf/cpumap: make sure frame_size for build_skb is aligned if headroom isn't") Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Alexei Starovoitov --- kernel/bpf/cpumap.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 8974b3755670..3c18260403dd 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -162,10 +162,14 @@ static void cpu_map_kthread_stop(struct work_struct *work) static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf) { + unsigned int hard_start_headroom; unsigned int frame_size; void *pkt_data_start; struct sk_buff *skb; + /* Part of headroom was reserved to xdpf */ + hard_start_headroom = sizeof(struct xdp_frame) + xdpf->headroom; + /* build_skb need to place skb_shared_info after SKB end, and * also want to know the memory "truesize". Thus, need to * know the memory frame size backing xdp_buff. @@ -183,15 +187,15 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu, * is not at a fixed memory location, with mixed length * packets, which is bad for cache-line hotness. */ - frame_size = SKB_DATA_ALIGN(xdpf->len + xdpf->headroom) + + frame_size = SKB_DATA_ALIGN(xdpf->len + hard_start_headroom) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - pkt_data_start = xdpf->data - xdpf->headroom; + pkt_data_start = xdpf->data - hard_start_headroom; skb = build_skb(pkt_data_start, frame_size); if (!skb) return NULL; - skb_reserve(skb, xdpf->headroom); + skb_reserve(skb, hard_start_headroom); __skb_put(skb, xdpf->len); if (xdpf->metasize) skb_metadata_set(skb, xdpf->metasize); @@ -205,6 +209,9 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu, * - RX ring dev queue index (skb_record_rx_queue) */ + /* Allow SKB to reuse area used by xdp_frame */ + xdp_scrub_frame(xdpf); + return skb; } -- cgit v1.2.3 From e8b26b2135dedc0284490bfeac06dfc4418d0105 Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Tue, 19 Mar 2019 11:24:38 +0200 Subject: net/mlx5: Decrease default mr cache size Delete initialization of high order entries in mr cache to decrease initial memory footprint. When required, the administrator can populate the entries with memory keys via the /sys interface. This approach is very helpful to significantly reduce the per HW function memory footprint in virtualization environments such as SRIOV. Fixes: 9603b61de1ee ("mlx5: Move pci device handling from mlx5_ib to mlx5_core") Signed-off-by: Artemy Kovalyov Signed-off-by: Moni Shoua Signed-off-by: Leon Romanovsky Reported-by: Shalom Toledo Acked-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 70cc906a102b..76716419370d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -164,26 +164,6 @@ static struct mlx5_profile profile[] = { .size = 8, .limit = 4 }, - .mr_cache[16] = { - .size = 8, - .limit = 4 - }, - .mr_cache[17] = { - .size = 8, - .limit = 4 - }, - .mr_cache[18] = { - .size = 8, - .limit = 4 - }, - .mr_cache[19] = { - .size = 4, - .limit = 2 - }, - .mr_cache[20] = { - .size = 4, - .limit = 2 - }, }, }; -- cgit v1.2.3 From bc87a0036826a37b43489b029af8143bd07c6cca Mon Sep 17 00:00:00 2001 From: Gavi Teitz Date: Mon, 11 Mar 2019 11:56:34 +0200 Subject: net/mlx5e: Fix error handling when refreshing TIRs Previously, a false positive would be caught if the TIRs list is empty, since the err value was initialized to -ENOMEM, and was only updated if a TIR is refreshed. This is resolved by initializing the err value to zero. Fixes: b676f653896a ("net/mlx5e: Refactor refresh TIRs") Signed-off-by: Gavi Teitz Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_common.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index 3078491cc0d0..8100786f6fb5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -141,15 +141,17 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb) { struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_tir *tir; - int err = -ENOMEM; + int err = 0; u32 tirn = 0; int inlen; void *in; inlen = MLX5_ST_SZ_BYTES(modify_tir_in); in = kvzalloc(inlen, GFP_KERNEL); - if (!in) + if (!in) { + err = -ENOMEM; goto out; + } if (enable_uc_lb) MLX5_SET(modify_tir_in, in, ctx.self_lb_block, -- cgit v1.2.3 From 8998576bd9c695ef1297540a50d7b3abbc69286b Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Mon, 4 Feb 2019 09:45:47 +0000 Subject: net/mlx5e: Allow IPv4 ttl & IPv6 hop_limit rewrite for all L4 protocols For some protocols we are not allowing IP header rewrite offload, since the HW is not capable to properly adjust the l4 checksum. However, TTL & HOPLIMIT modification can be done for all IP protocols, because they are not part of the pseudo header taken into account for checksum. Fixes: 738678817573 ("drivers: net: use flow action infrastructure") Signed-off-by: Dmytro Linkin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 52 +++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index b4967a0ff8c7..2b85f93a1c52 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2158,6 +2158,52 @@ static bool csum_offload_supported(struct mlx5e_priv *priv, return true; } +struct ip_ttl_word { + __u8 ttl; + __u8 protocol; + __sum16 check; +}; + +struct ipv6_hoplimit_word { + __be16 payload_len; + __u8 nexthdr; + __u8 hop_limit; +}; + +static bool is_action_keys_supported(const struct flow_action_entry *act) +{ + u32 mask, offset; + u8 htype; + + htype = act->mangle.htype; + offset = act->mangle.offset; + mask = ~act->mangle.mask; + /* For IPv4 & IPv6 header check 4 byte word, + * to determine that modified fields + * are NOT ttl & hop_limit only. + */ + if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) { + struct ip_ttl_word *ttl_word = + (struct ip_ttl_word *)&mask; + + if (offset != offsetof(struct iphdr, ttl) || + ttl_word->protocol || + ttl_word->check) { + return true; + } + } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) { + struct ipv6_hoplimit_word *hoplimit_word = + (struct ipv6_hoplimit_word *)&mask; + + if (offset != offsetof(struct ipv6hdr, payload_len) || + hoplimit_word->payload_len || + hoplimit_word->nexthdr) { + return true; + } + } + return false; +} + static bool modify_header_match_supported(struct mlx5_flow_spec *spec, struct flow_action *flow_action, u32 actions, @@ -2165,9 +2211,9 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec, { const struct flow_action_entry *act; bool modify_ip_header; - u8 htype, ip_proto; void *headers_v; u16 ethertype; + u8 ip_proto; int i; if (actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) @@ -2187,9 +2233,7 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec, act->id != FLOW_ACTION_ADD) continue; - htype = act->mangle.htype; - if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4 || - htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) { + if (is_action_keys_supported(act)) { modify_ip_header = true; break; } -- cgit v1.2.3 From 8e949363f017e2011464812a714fb29710fb95b4 Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Tue, 19 Mar 2019 16:42:40 -0500 Subject: net: mlx5: Add a missing check on idr_find, free buf idr_find() can return a NULL value to 'flow' which is used without a check. The patch adds a check to avoid potential NULL pointer dereference. In case of mlx5_fpga_sbu_conn_sendmsg() failure, free buf allocated using kzalloc. Fixes: ab412e1dd7db ("net/mlx5: Accel, add TLS rx offload routines") Signed-off-by: Aditya Pakki Reviewed-by: Yuval Shaia Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c index 5cf5f2a9d51f..8de64e88c670 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c @@ -217,15 +217,21 @@ int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq, void *cmd; int ret; + rcu_read_lock(); + flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle)); + rcu_read_unlock(); + + if (!flow) { + WARN_ONCE(1, "Received NULL pointer for handle\n"); + return -EINVAL; + } + buf = kzalloc(size, GFP_ATOMIC); if (!buf) return -ENOMEM; cmd = (buf + 1); - rcu_read_lock(); - flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle)); - rcu_read_unlock(); mlx5_fpga_tls_flow_to_cmd(flow, cmd); MLX5_SET(tls_cmd, cmd, swid, ntohl(handle)); @@ -238,6 +244,8 @@ int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq, buf->complete = mlx_tls_kfree_complete; ret = mlx5_fpga_sbu_conn_sendmsg(mdev->fpga->tls->conn, buf); + if (ret < 0) + kfree(buf); return ret; } -- cgit v1.2.3 From 80a2a9026b24c6bd34b8d58256973e22270bedec Mon Sep 17 00:00:00 2001 From: Yuval Avnery Date: Mon, 11 Mar 2019 06:18:24 +0200 Subject: net/mlx5e: Add a lock on tir list Refresh tirs is looping over a global list of tirs while netdevs are adding and removing tirs from that list. That is why a lock is required. Fixes: 724b2aa15126 ("net/mlx5e: TIRs management refactoring") Signed-off-by: Yuval Avnery Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_common.c | 7 +++++++ include/linux/mlx5/driver.h | 2 ++ 2 files changed, 9 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index 8100786f6fb5..1539cf3de5dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -45,7 +45,9 @@ int mlx5e_create_tir(struct mlx5_core_dev *mdev, if (err) return err; + mutex_lock(&mdev->mlx5e_res.td.list_lock); list_add(&tir->list, &mdev->mlx5e_res.td.tirs_list); + mutex_unlock(&mdev->mlx5e_res.td.list_lock); return 0; } @@ -53,8 +55,10 @@ int mlx5e_create_tir(struct mlx5_core_dev *mdev, void mlx5e_destroy_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir) { + mutex_lock(&mdev->mlx5e_res.td.list_lock); mlx5_core_destroy_tir(mdev, tir->tirn); list_del(&tir->list); + mutex_unlock(&mdev->mlx5e_res.td.list_lock); } static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, @@ -114,6 +118,7 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev) } INIT_LIST_HEAD(&mdev->mlx5e_res.td.tirs_list); + mutex_init(&mdev->mlx5e_res.td.list_lock); return 0; @@ -159,6 +164,7 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb) MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1); + mutex_lock(&mdev->mlx5e_res.td.list_lock); list_for_each_entry(tir, &mdev->mlx5e_res.td.tirs_list, list) { tirn = tir->tirn; err = mlx5_core_modify_tir(mdev, tirn, in, inlen); @@ -170,6 +176,7 @@ out: kvfree(in); if (err) netdev_err(priv->netdev, "refresh tir(0x%x) failed, %d\n", tirn, err); + mutex_unlock(&mdev->mlx5e_res.td.list_lock); return err; } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 022541dc5dbf..0d0729648844 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -594,6 +594,8 @@ enum mlx5_pagefault_type_flags { }; struct mlx5_td { + /* protects tirs list changes while tirs refresh */ + struct mutex list_lock; struct list_head tirs_list; u32 tdn; }; -- cgit v1.2.3 From 8d047bf56a2cc13d90e6a5074015d65045fd43e7 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Thu, 28 Feb 2019 09:27:33 +0200 Subject: net/mlx5: ethtool, Fix type analysis of advertised link-mode Ethtool option set_link_ksettings allows setting of legacy link-modes or extended link-modes. Refine the decision of which type of link-modes is set. Fixes: 6a897372417e ("net/mlx5: ethtool, Add ethtool support for 50Gbps per lane link modes") Signed-off-by: Aya Levin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index a0987cc5fe4a..561e36af8e77 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -997,8 +997,9 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, #define MLX5E_PTYS_EXT ((1ULL << ETHTOOL_LINK_MODE_50000baseKR_Full_BIT) - 1) - ext_requested = (link_ksettings->link_modes.advertising[0] > - MLX5E_PTYS_EXT); + ext_requested = !!(link_ksettings->link_modes.advertising[0] > + MLX5E_PTYS_EXT || + link_ksettings->link_modes.advertising[1]); ext_supported = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); /*when ptys_extended_ethernet is set legacy link modes are deprecated */ -- cgit v1.2.3 From dd1b9e09c12b4231148f446c2eefd886ef6e3ddd Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Thu, 28 Feb 2019 09:39:02 +0200 Subject: net/mlx5: ethtool, Allow legacy link-modes configuration via non-extended ptys Allow configuration of legacy link-modes even when extended link-modes are supported. This requires reading of legacy advertisement even when extended link-modes are supported. Since legacy and extended advertisement are mutually excluded, wait for empty reply from extended advertisement before reading legacy advertisement. Fixes: 6a897372417e ("net/mlx5: ethtool, Add ethtool support for 50Gbps per lane link modes") Signed-off-by: Aya Levin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/port.c | 3 -- .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 47 ++++++++++++++-------- 2 files changed, 31 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index 122927f3a600..d5e5afbdca6d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -96,9 +96,6 @@ int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, if (!eproto) return -EINVAL; - if (ext != MLX5_CAP_PCAM_FEATURE(dev, ptys_extended_ethernet)) - return -EOPNOTSUPP; - err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, port); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 561e36af8e77..5efce4a3ff79 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -603,16 +603,18 @@ static void ptys2ethtool_supported_link(struct mlx5_core_dev *mdev, __ETHTOOL_LINK_MODE_MASK_NBITS); } -static void ptys2ethtool_adver_link(struct mlx5_core_dev *mdev, - unsigned long *advertising_modes, - u32 eth_proto_cap) +static void ptys2ethtool_adver_link(unsigned long *advertising_modes, + u32 eth_proto_cap, bool ext) { unsigned long proto_cap = eth_proto_cap; struct ptys2ethtool_config *table; u32 max_size; int proto; - mlx5e_ethtool_get_speed_arr(mdev, &table, &max_size); + table = ext ? ptys2ext_ethtool_table : ptys2legacy_ethtool_table; + max_size = ext ? ARRAY_SIZE(ptys2ext_ethtool_table) : + ARRAY_SIZE(ptys2legacy_ethtool_table); + for_each_set_bit(proto, &proto_cap, max_size) bitmap_or(advertising_modes, advertising_modes, table[proto].advertised, @@ -794,12 +796,12 @@ static void get_supported(struct mlx5_core_dev *mdev, u32 eth_proto_cap, ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Pause); } -static void get_advertising(struct mlx5_core_dev *mdev, u32 eth_proto_cap, - u8 tx_pause, u8 rx_pause, - struct ethtool_link_ksettings *link_ksettings) +static void get_advertising(u32 eth_proto_cap, u8 tx_pause, u8 rx_pause, + struct ethtool_link_ksettings *link_ksettings, + bool ext) { unsigned long *advertising = link_ksettings->link_modes.advertising; - ptys2ethtool_adver_link(mdev, advertising, eth_proto_cap); + ptys2ethtool_adver_link(advertising, eth_proto_cap, ext); if (rx_pause) ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Pause); @@ -854,8 +856,9 @@ static void get_lp_advertising(struct mlx5_core_dev *mdev, u32 eth_proto_lp, struct ethtool_link_ksettings *link_ksettings) { unsigned long *lp_advertising = link_ksettings->link_modes.lp_advertising; + bool ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); - ptys2ethtool_adver_link(mdev, lp_advertising, eth_proto_lp); + ptys2ethtool_adver_link(lp_advertising, eth_proto_lp, ext); } int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, @@ -872,6 +875,7 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, u8 an_disable_admin; u8 an_status; u8 connector_type; + bool admin_ext; bool ext; int err; @@ -886,6 +890,19 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, eth_proto_capability); eth_proto_admin = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_admin); + /* Fields: eth_proto_admin and ext_eth_proto_admin are + * mutually exclusive. Hence try reading legacy advertising + * when extended advertising is zero. + * admin_ext indicates how eth_proto_admin should be + * interpreted + */ + admin_ext = ext; + if (ext && !eth_proto_admin) { + eth_proto_admin = MLX5_GET_ETH_PROTO(ptys_reg, out, false, + eth_proto_admin); + admin_ext = false; + } + eth_proto_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper); eth_proto_lp = MLX5_GET(ptys_reg, out, eth_proto_lp_advertise); @@ -899,7 +916,8 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising); get_supported(mdev, eth_proto_cap, link_ksettings); - get_advertising(mdev, eth_proto_admin, tx_pause, rx_pause, link_ksettings); + get_advertising(eth_proto_admin, tx_pause, rx_pause, link_ksettings, + admin_ext); get_speed_duplex(priv->netdev, eth_proto_oper, link_ksettings); eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; @@ -1001,16 +1019,13 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, MLX5E_PTYS_EXT || link_ksettings->link_modes.advertising[1]); ext_supported = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); - - /*when ptys_extended_ethernet is set legacy link modes are deprecated */ - if (ext_requested != ext_supported) - return -EPROTONOSUPPORT; + ext_requested &= ext_supported; speed = link_ksettings->base.speed; ethtool2ptys_adver_func = ext_requested ? mlx5e_ethtool2ptys_ext_adver_link : mlx5e_ethtool2ptys_adver_link; - err = mlx5_port_query_eth_proto(mdev, 1, ext_supported, &eproto); + err = mlx5_port_query_eth_proto(mdev, 1, ext_requested, &eproto); if (err) { netdev_err(priv->netdev, "%s: query port eth proto failed: %d\n", __func__, err); @@ -1038,7 +1053,7 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, if (!an_changes && link_modes == eproto.admin) goto out; - mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, ext_supported); + mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, ext_requested); mlx5_toggle_port_link(mdev); out: -- cgit v1.2.3 From 8a91ad9355c66c5026d3d911b434a25408ab876c Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Thu, 7 Mar 2019 09:27:18 +0200 Subject: net/mlx5: E-Switch, Fix access to invalid memory when toggling esw modes The esw fdb table has a union of legacy and offloads members. So if we were in a certain esw mode we could set some memebers and not set null which is fine as on destroy path and don't care. But then moving from legacy to switchdev a second time, the cleanup flow of legacy mode checks if a struct member was in use if it's not null so we need to make sure to reset the code to null when we init legacy mode. Fixes: 8da202b24913 ("net/mlx5: E-Switch, Add support for VEPA in legacy mode.") Signed-off-by: Roi Dayan Reviewed-by: Huy Nguyen Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index ecd2c747f726..1a3b4d5d77fb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -431,6 +431,8 @@ static int esw_create_legacy_table(struct mlx5_eswitch *esw) { int err; + memset(&esw->fdb_table.legacy, 0, sizeof(struct legacy_fdb)); + err = esw_create_legacy_vepa_table(esw); if (err) return err; -- cgit v1.2.3 From 84be899f6fd233ff2aeaf14cc43e6457425122b2 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Tue, 26 Feb 2019 04:28:32 -0800 Subject: net/mlx5e: Correctly use the namespace type when allocating pedit action The capacity of FDB offloading and NIC offloading table are different, and when allocating the pedit actions, we should use the correct namespace type. Fixes: c500c86b0c75d ("net/mlx5e: support for two independent packet edit actions") Cc: Pablo Neira Ayuso Signed-off-by: Tonghao Zhang Reviewed-by: Roi Dayan Acked-by: Pablo Neira Ayuso Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 2b85f93a1c52..5fb5cab36bf6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2701,7 +2701,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits || hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) { - err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_KERNEL, + err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_FDB, parse_attr, hdrs, extack); if (err) return err; -- cgit v1.2.3 From 5c1d260ed10cf08dd7a0299c103ad0a3f9a9f7a1 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Thu, 21 Mar 2019 15:51:35 -0700 Subject: net/mlx5: E-Switch, Protect from invalid memory access in offload fdb table The esw offloads structures share a union with the legacy mode structs. Reset the offloads struct to zero in init to protect from null assumptions made by the legacy mode code. Signed-off-by: Roi Dayan Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index f2260391be5b..9b2d78ee22b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1611,6 +1611,7 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int nvports) { int err; + memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb)); mutex_init(&esw->fdb_table.offloads.fdb_prio_lock); err = esw_create_offloads_fdb_tables(esw, nvports); -- cgit v1.2.3 From eca4a928585ac08147e5cc8e2111ecbc6279ee31 Mon Sep 17 00:00:00 2001 From: Omri Kahalon Date: Sun, 24 Feb 2019 16:31:08 +0200 Subject: net/mlx5: E-Switch, Fix esw manager vport indication for more vport commands Traditionally, the PF (Physical Function) which resides on vport 0 was the E-switch manager. Since the ECPF (Embedded CPU Physical Function), which resides on vport 0xfffe, was introduced as the E-Switch manager, the assumption that the E-switch manager is on vport 0 is incorrect. Since the eswitch code already uses the actual vport value, all we need is to always set other_vport=1. Signed-off-by: Omri Kahalon Reviewed-by: Max Gurtovoy Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 1a3b4d5d77fb..c938954599c9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -105,8 +105,7 @@ static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); MLX5_SET(modify_nic_vport_context_in, in, field_select.change_event, 1); MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); - if (vport) - MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in, nic_vport_context); @@ -134,8 +133,7 @@ static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, MLX5_SET(modify_esw_vport_context_in, in, opcode, MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT); MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport); - if (vport) - MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1); + MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1); return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } -- cgit v1.2.3 From 36acf63a066f7e095ef2322118c2742a177daa65 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Fri, 22 Mar 2019 09:42:08 -0500 Subject: net/mlx5: E-Switch, fix syndrome (0x678139) when turn on vepa Make sure the struct mlx5_flow_destination is zero before filling in the field. Fixes: 8da202b24913 ("net/mlx5: E-Switch, Add support for VEPA in legacy mode.") Signed-off-by: Huy Nguyen Reviewed-by: Daniel Jurgens Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index c938954599c9..8a67fd197b79 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -2157,6 +2157,7 @@ static int _mlx5_eswitch_set_vepa_locked(struct mlx5_eswitch *esw, /* Star rule to forward all traffic to uplink vport */ memset(spec, 0, sizeof(*spec)); + memset(&dest, 0, sizeof(dest)); dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest.vport.num = MLX5_VPORT_UPLINK; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; -- cgit v1.2.3 From 5ec983e924c7978aaec3cf8679ece9436508bb20 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Thu, 7 Mar 2019 14:49:50 -0600 Subject: net/mlx5e: Update xoff formula Set minimum speed in xoff threshold formula to 40Gbps Fixes: 0696d60853d5 ("net/mlx5e: Receive buffer configuration") Signed-off-by: Huy Nguyen Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c index eac245a93f91..f00de0c987cd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c @@ -122,7 +122,9 @@ out: return err; } -/* xoff = ((301+2.16 * len [m]) * speed [Gbps] + 2.72 MTU [B]) */ +/* xoff = ((301+2.16 * len [m]) * speed [Gbps] + 2.72 MTU [B]) + * minimum speed value is 40Gbps + */ static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu) { u32 speed; @@ -130,10 +132,9 @@ static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu) int err; err = mlx5e_port_linkspeed(priv->mdev, &speed); - if (err) { - mlx5_core_warn(priv->mdev, "cannot get port speed\n"); - return 0; - } + if (err) + speed = SPEED_40000; + speed = max_t(u32, speed, SPEED_40000); xoff = (301 + 216 * priv->dcbx.cable_len / 100) * speed / 1000 + 272 * mtu / 100; -- cgit v1.2.3 From e28408e98bced123038857b6e3c81fa12a2e3e68 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Thu, 7 Mar 2019 14:07:32 -0600 Subject: net/mlx5e: Update xon formula Set xon = xoff - netdev's max_mtu. netdev's max_mtu will give enough time for the pause frame to arrive at the sender. Fixes: 0696d60853d5 ("net/mlx5e: Receive buffer configuration") Signed-off-by: Huy Nguyen Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/port_buffer.c | 28 ++++++++++++---------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c index f00de0c987cd..4ab0d030b544 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c @@ -143,7 +143,7 @@ static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu) } static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, - u32 xoff, unsigned int mtu) + u32 xoff, unsigned int max_mtu) { int i; @@ -155,11 +155,12 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, } if (port_buffer->buffer[i].size < - (xoff + mtu + (1 << MLX5E_BUFFER_CELL_SHIFT))) + (xoff + max_mtu + (1 << MLX5E_BUFFER_CELL_SHIFT))) return -ENOMEM; port_buffer->buffer[i].xoff = port_buffer->buffer[i].size - xoff; - port_buffer->buffer[i].xon = port_buffer->buffer[i].xoff - mtu; + port_buffer->buffer[i].xon = + port_buffer->buffer[i].xoff - max_mtu; } return 0; @@ -167,7 +168,7 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, /** * update_buffer_lossy() - * mtu: device's MTU + * max_mtu: netdev's max_mtu * pfc_en: current pfc configuration * buffer: current prio to buffer mapping * xoff: xoff value @@ -184,7 +185,7 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, * Return 0 if no error. * Set change to true if buffer configuration is modified. */ -static int update_buffer_lossy(unsigned int mtu, +static int update_buffer_lossy(unsigned int max_mtu, u8 pfc_en, u8 *buffer, u32 xoff, struct mlx5e_port_buffer *port_buffer, bool *change) @@ -221,7 +222,7 @@ static int update_buffer_lossy(unsigned int mtu, } if (changed) { - err = update_xoff_threshold(port_buffer, xoff, mtu); + err = update_xoff_threshold(port_buffer, xoff, max_mtu); if (err) return err; @@ -231,6 +232,7 @@ static int update_buffer_lossy(unsigned int mtu, return 0; } +#define MINIMUM_MAX_MTU 9216 int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, u32 change, unsigned int mtu, struct ieee_pfc *pfc, @@ -242,12 +244,14 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, bool update_prio2buffer = false; u8 buffer[MLX5E_MAX_PRIORITY]; bool update_buffer = false; + unsigned int max_mtu; u32 total_used = 0; u8 curr_pfc_en; int err; int i; mlx5e_dbg(HW, priv, "%s: change=%x\n", __func__, change); + max_mtu = max_t(unsigned int, priv->netdev->max_mtu, MINIMUM_MAX_MTU); err = mlx5e_port_query_buffer(priv, &port_buffer); if (err) @@ -255,7 +259,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, if (change & MLX5E_PORT_BUFFER_CABLE_LEN) { update_buffer = true; - err = update_xoff_threshold(&port_buffer, xoff, mtu); + err = update_xoff_threshold(&port_buffer, xoff, max_mtu); if (err) return err; } @@ -265,7 +269,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, if (err) return err; - err = update_buffer_lossy(mtu, pfc->pfc_en, buffer, xoff, + err = update_buffer_lossy(max_mtu, pfc->pfc_en, buffer, xoff, &port_buffer, &update_buffer); if (err) return err; @@ -277,8 +281,8 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, if (err) return err; - err = update_buffer_lossy(mtu, curr_pfc_en, prio2buffer, xoff, - &port_buffer, &update_buffer); + err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer, + xoff, &port_buffer, &update_buffer); if (err) return err; } @@ -302,7 +306,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, return -EINVAL; update_buffer = true; - err = update_xoff_threshold(&port_buffer, xoff, mtu); + err = update_xoff_threshold(&port_buffer, xoff, max_mtu); if (err) return err; } @@ -310,7 +314,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, /* Need to update buffer configuration if xoff value is changed */ if (!update_buffer && xoff != priv->dcbx.xoff) { update_buffer = true; - err = update_xoff_threshold(&port_buffer, xoff, mtu); + err = update_xoff_threshold(&port_buffer, xoff, max_mtu); if (err) return err; } -- cgit v1.2.3 From 7f1a546e322287ae948e0f5eb8d12b7b638d93a6 Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Mon, 18 Mar 2019 09:25:59 +0000 Subject: net/mlx5e: Consider tunnel type for encap contexts The driver allocates an encap context based on the tunnel properties, and reuse that context for all flows using the same tunnel properties. Commit df2ef3bff193 ("net/mlx5e: Add GRE protocol offloading") introduced another tunnel protocol other than the single VXLAN previously supported. A flow that uses a tunnel with the same tunnel properties but with a different tunnel type (GRE vs VXLAN for example) would mistakenly reuse the previous alocated context, causing the traffic to be sent with the wrong encapsulation. Fix that by considering the tunnel type for encap contexts. Fixes: df2ef3bff193 ("net/mlx5e: Add GRE protocol offloading") Signed-off-by: Eli Britstein Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 28 +++++++++++++++++-------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 5fb5cab36bf6..d75dc44eb2ff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2384,15 +2384,22 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, return 0; } -static inline int cmp_encap_info(struct ip_tunnel_key *a, - struct ip_tunnel_key *b) +struct encap_key { + struct ip_tunnel_key *ip_tun_key; + int tunnel_type; +}; + +static inline int cmp_encap_info(struct encap_key *a, + struct encap_key *b) { - return memcmp(a, b, sizeof(*a)); + return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) || + a->tunnel_type != b->tunnel_type; } -static inline int hash_encap_info(struct ip_tunnel_key *key) +static inline int hash_encap_info(struct encap_key *key) { - return jhash(key, sizeof(*key), 0); + return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key), + key->tunnel_type); } @@ -2423,7 +2430,7 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct mlx5_esw_flow_attr *attr = flow->esw_attr; struct mlx5e_tc_flow_parse_attr *parse_attr; struct ip_tunnel_info *tun_info; - struct ip_tunnel_key *key; + struct encap_key key, e_key; struct mlx5e_encap_entry *e; unsigned short family; uintptr_t hash_key; @@ -2433,13 +2440,16 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, parse_attr = attr->parse_attr; tun_info = &parse_attr->tun_info[out_index]; family = ip_tunnel_info_af(tun_info); - key = &tun_info->key; + key.ip_tun_key = &tun_info->key; + key.tunnel_type = mlx5e_tc_tun_get_type(mirred_dev); - hash_key = hash_encap_info(key); + hash_key = hash_encap_info(&key); hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, encap_hlist, hash_key) { - if (!cmp_encap_info(&e->tun_info.key, key)) { + e_key.ip_tun_key = &e->tun_info.key; + e_key.tunnel_type = e->tunnel_type; + if (!cmp_encap_info(&e_key, &key)) { found = true; break; } -- cgit v1.2.3 From 18bebc6dd3281955240062655a4df35eef2c46b3 Mon Sep 17 00:00:00 2001 From: Konstantin Khorenko Date: Thu, 28 Mar 2019 13:29:21 +0300 Subject: bonding: show full hw address in sysfs for slave entries Bond expects ethernet hwaddr for its slave, but it can be longer than 6 bytes - infiniband interface for example. # cat /sys/devices//net/ib0/address 80:00:02:08:fe:80:00:00:00:00:00:00:7c:fe:90:03:00:be:5d:e1 # cat /sys/devices//net/ib0/bonding_slave/perm_hwaddr 80:00:02:08:fe:80 So print full hwaddr in sysfs "bonding_slave/perm_hwaddr" as well. Signed-off-by: Konstantin Khorenko Signed-off-by: David S. Miller --- drivers/net/bonding/bond_sysfs_slave.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_sysfs_slave.c b/drivers/net/bonding/bond_sysfs_slave.c index 2f120b2ffef0..4985268e2273 100644 --- a/drivers/net/bonding/bond_sysfs_slave.c +++ b/drivers/net/bonding/bond_sysfs_slave.c @@ -55,7 +55,9 @@ static SLAVE_ATTR_RO(link_failure_count); static ssize_t perm_hwaddr_show(struct slave *slave, char *buf) { - return sprintf(buf, "%pM\n", slave->perm_hwaddr); + return sprintf(buf, "%*phC\n", + slave->dev->addr_len, + slave->perm_hwaddr); } static SLAVE_ATTR_RO(perm_hwaddr); -- cgit v1.2.3 From 1b704c4a1ba95574832e730f23817b651db2aa59 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Thu, 28 Mar 2019 19:40:36 +0000 Subject: hv_netvsc: Fix unwanted wakeup after tx_disable After queue stopped, the wakeup mechanism may wake it up again when ring buffer usage is lower than a threshold. This may cause send path panic on NULL pointer when we stopped all tx queues in netvsc_detach and start removing the netvsc device. This patch fix it by adding a tx_disable flag to prevent unwanted queue wakeup. Fixes: 7b2ee50c0cd5 ("hv_netvsc: common detach logic") Reported-by: Mohammed Gamal Signed-off-by: Haiyang Zhang Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 1 + drivers/net/hyperv/netvsc.c | 6 ++++-- drivers/net/hyperv/netvsc_drv.c | 32 ++++++++++++++++++++++++++------ 3 files changed, 31 insertions(+), 8 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index e859ae2e42d5..49f41b64077b 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -987,6 +987,7 @@ struct netvsc_device { wait_queue_head_t wait_drain; bool destroy; + bool tx_disable; /* if true, do not wake up queue again */ /* Receive buffer allocated by us but manages by NetVSP */ void *recv_buf; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 813d195bbd57..e0dce373cdd9 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -110,6 +110,7 @@ static struct netvsc_device *alloc_net_device(void) init_waitqueue_head(&net_device->wait_drain); net_device->destroy = false; + net_device->tx_disable = false; net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT; net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT; @@ -719,7 +720,7 @@ static void netvsc_send_tx_complete(struct net_device *ndev, } else { struct netdev_queue *txq = netdev_get_tx_queue(ndev, q_idx); - if (netif_tx_queue_stopped(txq) && + if (netif_tx_queue_stopped(txq) && !net_device->tx_disable && (hv_get_avail_to_write_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER || queue_sends < 1)) { netif_tx_wake_queue(txq); @@ -874,7 +875,8 @@ static inline int netvsc_send_pkt( } else if (ret == -EAGAIN) { netif_tx_stop_queue(txq); ndev_ctx->eth_stats.stop_queue++; - if (atomic_read(&nvchan->queue_sends) < 1) { + if (atomic_read(&nvchan->queue_sends) < 1 && + !net_device->tx_disable) { netif_tx_wake_queue(txq); ndev_ctx->eth_stats.wake_queue++; ret = -ENOSPC; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index cf4897043e83..b20fb0fb595b 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -109,6 +109,15 @@ static void netvsc_set_rx_mode(struct net_device *net) rcu_read_unlock(); } +static void netvsc_tx_enable(struct netvsc_device *nvscdev, + struct net_device *ndev) +{ + nvscdev->tx_disable = false; + virt_wmb(); /* ensure queue wake up mechanism is on */ + + netif_tx_wake_all_queues(ndev); +} + static int netvsc_open(struct net_device *net) { struct net_device_context *ndev_ctx = netdev_priv(net); @@ -129,7 +138,7 @@ static int netvsc_open(struct net_device *net) rdev = nvdev->extension; if (!rdev->link_state) { netif_carrier_on(net); - netif_tx_wake_all_queues(net); + netvsc_tx_enable(nvdev, net); } if (vf_netdev) { @@ -184,6 +193,17 @@ static int netvsc_wait_until_empty(struct netvsc_device *nvdev) } } +static void netvsc_tx_disable(struct netvsc_device *nvscdev, + struct net_device *ndev) +{ + if (nvscdev) { + nvscdev->tx_disable = true; + virt_wmb(); /* ensure txq will not wake up after stop */ + } + + netif_tx_disable(ndev); +} + static int netvsc_close(struct net_device *net) { struct net_device_context *net_device_ctx = netdev_priv(net); @@ -192,7 +212,7 @@ static int netvsc_close(struct net_device *net) struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev); int ret; - netif_tx_disable(net); + netvsc_tx_disable(nvdev, net); /* No need to close rndis filter if it is removed already */ if (!nvdev) @@ -920,7 +940,7 @@ static int netvsc_detach(struct net_device *ndev, /* If device was up (receiving) then shutdown */ if (netif_running(ndev)) { - netif_tx_disable(ndev); + netvsc_tx_disable(nvdev, ndev); ret = rndis_filter_close(nvdev); if (ret) { @@ -1908,7 +1928,7 @@ static void netvsc_link_change(struct work_struct *w) if (rdev->link_state) { rdev->link_state = false; netif_carrier_on(net); - netif_tx_wake_all_queues(net); + netvsc_tx_enable(net_device, net); } else { notify = true; } @@ -1918,7 +1938,7 @@ static void netvsc_link_change(struct work_struct *w) if (!rdev->link_state) { rdev->link_state = true; netif_carrier_off(net); - netif_tx_stop_all_queues(net); + netvsc_tx_disable(net_device, net); } kfree(event); break; @@ -1927,7 +1947,7 @@ static void netvsc_link_change(struct work_struct *w) if (!rdev->link_state) { rdev->link_state = true; netif_carrier_off(net); - netif_tx_stop_all_queues(net); + netvsc_tx_disable(net_device, net); event->event = RNDIS_STATUS_MEDIA_CONNECT; spin_lock_irqsave(&ndev_ctx->lock, flags); list_add(&event->list, &ndev_ctx->reconfig_events); -- cgit v1.2.3 From c43ac97bac987e56c179598ce3398a95d55067bc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 28 Mar 2019 14:54:43 -0700 Subject: net: tls: prevent false connection termination with offload Only decrypt_internal() performs zero copy on rx, all paths which don't hit decrypt_internal() must set zc to false, otherwise tls_sw_recvmsg() may return 0 causing the application to believe that that connection got closed. Currently this happens with device offload when new record is first read from. Fixes: d069b780e367 ("tls: Fix tls_device receive") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reported-by: David Beckett Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 425351ac2a9b..20b191227969 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1484,6 +1484,8 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, return err; } + } else { + *zc = false; } rxm->full_len -= padding_length(ctx, tls_ctx, skb); -- cgit v1.2.3 From 3d8830266ffc28c16032b859e38a0252e014b631 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Fri, 29 Mar 2019 09:18:02 +0800 Subject: net: ethtool: not call vzalloc for zero sized memory request NULL or ZERO_SIZE_PTR will be returned for zero sized memory request, and derefencing them will lead to a segfault so it is unnecessory to call vzalloc for zero sized memory request and not call functions which maybe derefence the NULL allocated memory this also fixes a possible memory leak if phy_ethtool_get_stats returns error, memory should be freed before exit Signed-off-by: Li RongQing Reviewed-by: Wang Li Reviewed-by: Michal Kubecek Signed-off-by: David S. Miller --- net/core/ethtool.c | 46 ++++++++++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index b1eb32419732..36ed619faf36 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1797,11 +1797,16 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) WARN_ON_ONCE(!ret); gstrings.len = ret; - data = vzalloc(array_size(gstrings.len, ETH_GSTRING_LEN)); - if (gstrings.len && !data) - return -ENOMEM; - __ethtool_get_strings(dev, gstrings.string_set, data); + if (gstrings.len) { + data = vzalloc(array_size(gstrings.len, ETH_GSTRING_LEN)); + if (!data) + return -ENOMEM; + + __ethtool_get_strings(dev, gstrings.string_set, data); + } else { + data = NULL; + } ret = -EFAULT; if (copy_to_user(useraddr, &gstrings, sizeof(gstrings))) @@ -1897,11 +1902,15 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) return -EFAULT; stats.n_stats = n_stats; - data = vzalloc(array_size(n_stats, sizeof(u64))); - if (n_stats && !data) - return -ENOMEM; - ops->get_ethtool_stats(dev, &stats, data); + if (n_stats) { + data = vzalloc(array_size(n_stats, sizeof(u64))); + if (!data) + return -ENOMEM; + ops->get_ethtool_stats(dev, &stats, data); + } else { + data = NULL; + } ret = -EFAULT; if (copy_to_user(useraddr, &stats, sizeof(stats))) @@ -1941,16 +1950,21 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr) return -EFAULT; stats.n_stats = n_stats; - data = vzalloc(array_size(n_stats, sizeof(u64))); - if (n_stats && !data) - return -ENOMEM; - if (dev->phydev && !ops->get_ethtool_phy_stats) { - ret = phy_ethtool_get_stats(dev->phydev, &stats, data); - if (ret < 0) - return ret; + if (n_stats) { + data = vzalloc(array_size(n_stats, sizeof(u64))); + if (!data) + return -ENOMEM; + + if (dev->phydev && !ops->get_ethtool_phy_stats) { + ret = phy_ethtool_get_stats(dev->phydev, &stats, data); + if (ret < 0) + goto out; + } else { + ops->get_ethtool_phy_stats(dev, &stats, data); + } } else { - ops->get_ethtool_phy_stats(dev, &stats, data); + data = NULL; } ret = -EFAULT; -- cgit v1.2.3 From 4d31c4fa3f9ef7b7e2e79fd57d21290f64c938f5 Mon Sep 17 00:00:00 2001 From: Vishal Kulkarni Date: Fri, 29 Mar 2019 16:56:09 +0530 Subject: cxgb4: Update 1.23.3.0 as the latest firmware supported. Change t4fw_version.h to update latest firmware version number to 1.23.3.0. Signed-off-by: Vishal Kulkarni Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h index 9125ddd89dd1..a02b1dff403e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h @@ -36,8 +36,8 @@ #define __T4FW_VERSION_H__ #define T4FW_VERSION_MAJOR 0x01 -#define T4FW_VERSION_MINOR 0x16 -#define T4FW_VERSION_MICRO 0x09 +#define T4FW_VERSION_MINOR 0x17 +#define T4FW_VERSION_MICRO 0x03 #define T4FW_VERSION_BUILD 0x00 #define T4FW_MIN_VERSION_MAJOR 0x01 @@ -45,8 +45,8 @@ #define T4FW_MIN_VERSION_MICRO 0x00 #define T5FW_VERSION_MAJOR 0x01 -#define T5FW_VERSION_MINOR 0x16 -#define T5FW_VERSION_MICRO 0x09 +#define T5FW_VERSION_MINOR 0x17 +#define T5FW_VERSION_MICRO 0x03 #define T5FW_VERSION_BUILD 0x00 #define T5FW_MIN_VERSION_MAJOR 0x00 @@ -54,8 +54,8 @@ #define T5FW_MIN_VERSION_MICRO 0x00 #define T6FW_VERSION_MAJOR 0x01 -#define T6FW_VERSION_MINOR 0x16 -#define T6FW_VERSION_MICRO 0x09 +#define T6FW_VERSION_MINOR 0x17 +#define T6FW_VERSION_MICRO 0x03 #define T6FW_VERSION_BUILD 0x00 #define T6FW_MIN_VERSION_MAJOR 0x00 -- cgit v1.2.3 From ec915f4744a0a556090874a4a78e85afea77471a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 29 Mar 2019 13:47:14 -0700 Subject: Revert "cxgb4: Update 1.23.3.0 as the latest firmware supported." This reverts commit 4d31c4fa3f9ef7b7e2e79fd57d21290f64c938f5. Accidently applied this to the wrong tree. Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h index a02b1dff403e..9125ddd89dd1 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h @@ -36,8 +36,8 @@ #define __T4FW_VERSION_H__ #define T4FW_VERSION_MAJOR 0x01 -#define T4FW_VERSION_MINOR 0x17 -#define T4FW_VERSION_MICRO 0x03 +#define T4FW_VERSION_MINOR 0x16 +#define T4FW_VERSION_MICRO 0x09 #define T4FW_VERSION_BUILD 0x00 #define T4FW_MIN_VERSION_MAJOR 0x01 @@ -45,8 +45,8 @@ #define T4FW_MIN_VERSION_MICRO 0x00 #define T5FW_VERSION_MAJOR 0x01 -#define T5FW_VERSION_MINOR 0x17 -#define T5FW_VERSION_MICRO 0x03 +#define T5FW_VERSION_MINOR 0x16 +#define T5FW_VERSION_MICRO 0x09 #define T5FW_VERSION_BUILD 0x00 #define T5FW_MIN_VERSION_MAJOR 0x00 @@ -54,8 +54,8 @@ #define T5FW_MIN_VERSION_MICRO 0x00 #define T6FW_VERSION_MAJOR 0x01 -#define T6FW_VERSION_MINOR 0x17 -#define T6FW_VERSION_MICRO 0x03 +#define T6FW_VERSION_MINOR 0x16 +#define T6FW_VERSION_MICRO 0x09 #define T6FW_VERSION_BUILD 0x00 #define T6FW_MIN_VERSION_MAJOR 0x00 -- cgit v1.2.3 From 037c8489ade669e0f09ad40d5b91e5e1159a14b1 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 27 Mar 2019 11:10:44 -0700 Subject: libnvdimm/security: provide fix for secure-erase to use zero-key Add a zero key in order to standardize hardware that want a key of 0's to be passed. Some platforms defaults to a zero-key with security enabled rather than allow the OS to enable the security. The zero key would allow us to manage those platform as well. This also adds a fix to secure erase so it can use the zero key to do crypto erase. Some other security commands already use zero keys. This introduces a standard zero-key to allow unification of semantics cross nvdimm security commands. Signed-off-by: Dave Jiang Signed-off-by: Dan Williams --- drivers/nvdimm/security.c | 17 ++++++++++++----- tools/testing/nvdimm/test/nfit.c | 11 +++++++++-- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/nvdimm/security.c b/drivers/nvdimm/security.c index f8bb746a549f..6bea6852bf27 100644 --- a/drivers/nvdimm/security.c +++ b/drivers/nvdimm/security.c @@ -22,6 +22,8 @@ static bool key_revalidate = true; module_param(key_revalidate, bool, 0444); MODULE_PARM_DESC(key_revalidate, "Require key validation at init."); +static const char zero_key[NVDIMM_PASSPHRASE_LEN]; + static void *key_data(struct key *key) { struct encrypted_key_payload *epayload = dereference_key_locked(key); @@ -286,8 +288,9 @@ int nvdimm_security_erase(struct nvdimm *nvdimm, unsigned int keyid, { struct device *dev = &nvdimm->dev; struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); - struct key *key; + struct key *key = NULL; int rc; + const void *data; /* The bus lock should be held at the top level of the call stack */ lockdep_assert_held(&nvdimm_bus->reconfig_mutex); @@ -319,11 +322,15 @@ int nvdimm_security_erase(struct nvdimm *nvdimm, unsigned int keyid, return -EOPNOTSUPP; } - key = nvdimm_lookup_user_key(nvdimm, keyid, NVDIMM_BASE_KEY); - if (!key) - return -ENOKEY; + if (keyid != 0) { + key = nvdimm_lookup_user_key(nvdimm, keyid, NVDIMM_BASE_KEY); + if (!key) + return -ENOKEY; + data = key_data(key); + } else + data = zero_key; - rc = nvdimm->sec.ops->erase(nvdimm, key_data(key), pass_type); + rc = nvdimm->sec.ops->erase(nvdimm, data, pass_type); dev_dbg(dev, "key: %d erase%s: %s\n", key_serial(key), pass_type == NVDIMM_MASTER ? "(master)" : "(user)", rc == 0 ? "success" : "fail"); diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index b579f962451d..cad719876ef4 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -225,6 +225,8 @@ static struct workqueue_struct *nfit_wq; static struct gen_pool *nfit_pool; +static const char zero_key[NVDIMM_PASSPHRASE_LEN]; + static struct nfit_test *to_nfit_test(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); @@ -1059,8 +1061,7 @@ static int nd_intel_test_cmd_secure_erase(struct nfit_test *t, struct device *dev = &t->pdev.dev; struct nfit_test_sec *sec = &dimm_sec_info[dimm]; - if (!(sec->state & ND_INTEL_SEC_STATE_ENABLED) || - (sec->state & ND_INTEL_SEC_STATE_FROZEN)) { + if (sec->state & ND_INTEL_SEC_STATE_FROZEN) { nd_cmd->status = ND_INTEL_STATUS_INVALID_STATE; dev_dbg(dev, "secure erase: wrong security state\n"); } else if (memcmp(nd_cmd->passphrase, sec->passphrase, @@ -1068,6 +1069,12 @@ static int nd_intel_test_cmd_secure_erase(struct nfit_test *t, nd_cmd->status = ND_INTEL_STATUS_INVALID_PASS; dev_dbg(dev, "secure erase: wrong passphrase\n"); } else { + if (!(sec->state & ND_INTEL_SEC_STATE_ENABLED) + && (memcmp(nd_cmd->passphrase, zero_key, + ND_INTEL_PASSPHRASE_SIZE) != 0)) { + dev_dbg(dev, "invalid zero key\n"); + return 0; + } memset(sec->passphrase, 0, ND_INTEL_PASSPHRASE_SIZE); memset(sec->master_passphrase, 0, ND_INTEL_PASSPHRASE_SIZE); sec->state = 0; -- cgit v1.2.3 From d2e5b6436c28e7ee4988497d31122e06217876fb Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 27 Mar 2019 11:12:45 -0700 Subject: libnvdimm/security, acpi/nfit: unify zero-key for all security commands With zero-key defined, we can remove previous detection of key id 0 or null key in order to deal with a zero-key situation. Syncing all security commands to use the zero-key. Helper functions are introduced to return the data that points to the actual key payload or the zero_key. This helps uniformly handle the key material even with zero_key. Signed-off-by: Dave Jiang Signed-off-by: Dan Williams --- drivers/acpi/nfit/intel.c | 10 ++-- drivers/nvdimm/security.c | 117 +++++++++++++++++++++++++++------------------- 2 files changed, 73 insertions(+), 54 deletions(-) diff --git a/drivers/acpi/nfit/intel.c b/drivers/acpi/nfit/intel.c index f70de71f79d6..cddd0fcf622c 100644 --- a/drivers/acpi/nfit/intel.c +++ b/drivers/acpi/nfit/intel.c @@ -122,9 +122,8 @@ static int intel_security_change_key(struct nvdimm *nvdimm, if (!test_bit(cmd, &nfit_mem->dsm_mask)) return -ENOTTY; - if (old_data) - memcpy(nd_cmd.cmd.old_pass, old_data->data, - sizeof(nd_cmd.cmd.old_pass)); + memcpy(nd_cmd.cmd.old_pass, old_data->data, + sizeof(nd_cmd.cmd.old_pass)); memcpy(nd_cmd.cmd.new_pass, new_data->data, sizeof(nd_cmd.cmd.new_pass)); rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL); @@ -336,9 +335,8 @@ static int __maybe_unused intel_security_overwrite(struct nvdimm *nvdimm, /* flush all cache before we erase DIMM */ nvdimm_invalidate_cache(); - if (nkey) - memcpy(nd_cmd.cmd.passphrase, nkey->data, - sizeof(nd_cmd.cmd.passphrase)); + memcpy(nd_cmd.cmd.passphrase, nkey->data, + sizeof(nd_cmd.cmd.passphrase)); rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL); if (rc < 0) return rc; diff --git a/drivers/nvdimm/security.c b/drivers/nvdimm/security.c index 6bea6852bf27..a570f2263a42 100644 --- a/drivers/nvdimm/security.c +++ b/drivers/nvdimm/security.c @@ -77,6 +77,16 @@ static struct key *nvdimm_request_key(struct nvdimm *nvdimm) return key; } +static const void *nvdimm_get_key_payload(struct nvdimm *nvdimm, + struct key **key) +{ + *key = nvdimm_request_key(nvdimm); + if (!*key) + return zero_key; + + return key_data(*key); +} + static struct key *nvdimm_lookup_user_key(struct nvdimm *nvdimm, key_serial_t id, int subclass) { @@ -107,36 +117,57 @@ static struct key *nvdimm_lookup_user_key(struct nvdimm *nvdimm, return key; } -static struct key *nvdimm_key_revalidate(struct nvdimm *nvdimm) +static const void *nvdimm_get_user_key_payload(struct nvdimm *nvdimm, + key_serial_t id, int subclass, struct key **key) +{ + *key = NULL; + if (id == 0) { + if (subclass == NVDIMM_BASE_KEY) + return zero_key; + else + return NULL; + } + + *key = nvdimm_lookup_user_key(nvdimm, id, subclass); + if (!*key) + return NULL; + + return key_data(*key); +} + + +static int nvdimm_key_revalidate(struct nvdimm *nvdimm) { struct key *key; int rc; + const void *data; if (!nvdimm->sec.ops->change_key) - return NULL; + return -EOPNOTSUPP; - key = nvdimm_request_key(nvdimm); - if (!key) - return NULL; + data = nvdimm_get_key_payload(nvdimm, &key); /* * Send the same key to the hardware as new and old key to * verify that the key is good. */ - rc = nvdimm->sec.ops->change_key(nvdimm, key_data(key), - key_data(key), NVDIMM_USER); + rc = nvdimm->sec.ops->change_key(nvdimm, data, data, NVDIMM_USER); if (rc < 0) { nvdimm_put_key(key); - key = NULL; + return rc; } - return key; + + nvdimm_put_key(key); + nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER); + return 0; } static int __nvdimm_security_unlock(struct nvdimm *nvdimm) { struct device *dev = &nvdimm->dev; struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); - struct key *key = NULL; + struct key *key; + const void *data; int rc; /* The bus lock should be held at the top level of the call stack */ @@ -162,16 +193,11 @@ static int __nvdimm_security_unlock(struct nvdimm *nvdimm) if (!key_revalidate) return 0; - key = nvdimm_key_revalidate(nvdimm); - if (!key) - return nvdimm_security_freeze(nvdimm); + return nvdimm_key_revalidate(nvdimm); } else - key = nvdimm_request_key(nvdimm); + data = nvdimm_get_key_payload(nvdimm, &key); - if (!key) - return -ENOKEY; - - rc = nvdimm->sec.ops->unlock(nvdimm, key_data(key)); + rc = nvdimm->sec.ops->unlock(nvdimm, data); dev_dbg(dev, "key: %d unlock: %s\n", key_serial(key), rc == 0 ? "success" : "fail"); @@ -197,6 +223,7 @@ int nvdimm_security_disable(struct nvdimm *nvdimm, unsigned int keyid) struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); struct key *key; int rc; + const void *data; /* The bus lock should be held at the top level of the call stack */ lockdep_assert_held(&nvdimm_bus->reconfig_mutex); @@ -216,11 +243,12 @@ int nvdimm_security_disable(struct nvdimm *nvdimm, unsigned int keyid) return -EBUSY; } - key = nvdimm_lookup_user_key(nvdimm, keyid, NVDIMM_BASE_KEY); - if (!key) + data = nvdimm_get_user_key_payload(nvdimm, keyid, + NVDIMM_BASE_KEY, &key); + if (!data) return -ENOKEY; - rc = nvdimm->sec.ops->disable(nvdimm, key_data(key)); + rc = nvdimm->sec.ops->disable(nvdimm, data); dev_dbg(dev, "key: %d disable: %s\n", key_serial(key), rc == 0 ? "success" : "fail"); @@ -237,6 +265,7 @@ int nvdimm_security_update(struct nvdimm *nvdimm, unsigned int keyid, struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); struct key *key, *newkey; int rc; + const void *data, *newdata; /* The bus lock should be held at the top level of the call stack */ lockdep_assert_held(&nvdimm_bus->reconfig_mutex); @@ -251,22 +280,19 @@ int nvdimm_security_update(struct nvdimm *nvdimm, unsigned int keyid, return -EIO; } - if (keyid == 0) - key = NULL; - else { - key = nvdimm_lookup_user_key(nvdimm, keyid, NVDIMM_BASE_KEY); - if (!key) - return -ENOKEY; - } + data = nvdimm_get_user_key_payload(nvdimm, keyid, + NVDIMM_BASE_KEY, &key); + if (!data) + return -ENOKEY; - newkey = nvdimm_lookup_user_key(nvdimm, new_keyid, NVDIMM_NEW_KEY); - if (!newkey) { + newdata = nvdimm_get_user_key_payload(nvdimm, new_keyid, + NVDIMM_NEW_KEY, &newkey); + if (!newdata) { nvdimm_put_key(key); return -ENOKEY; } - rc = nvdimm->sec.ops->change_key(nvdimm, key ? key_data(key) : NULL, - key_data(newkey), pass_type); + rc = nvdimm->sec.ops->change_key(nvdimm, data, newdata, pass_type); dev_dbg(dev, "key: %d %d update%s: %s\n", key_serial(key), key_serial(newkey), pass_type == NVDIMM_MASTER ? "(master)" : "(user)", @@ -322,13 +348,10 @@ int nvdimm_security_erase(struct nvdimm *nvdimm, unsigned int keyid, return -EOPNOTSUPP; } - if (keyid != 0) { - key = nvdimm_lookup_user_key(nvdimm, keyid, NVDIMM_BASE_KEY); - if (!key) - return -ENOKEY; - data = key_data(key); - } else - data = zero_key; + data = nvdimm_get_user_key_payload(nvdimm, keyid, + NVDIMM_BASE_KEY, &key); + if (!data) + return -ENOKEY; rc = nvdimm->sec.ops->erase(nvdimm, data, pass_type); dev_dbg(dev, "key: %d erase%s: %s\n", key_serial(key), @@ -344,8 +367,9 @@ int nvdimm_security_overwrite(struct nvdimm *nvdimm, unsigned int keyid) { struct device *dev = &nvdimm->dev; struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); - struct key *key; + struct key *key = NULL; int rc; + const void *data; /* The bus lock should be held at the top level of the call stack */ lockdep_assert_held(&nvdimm_bus->reconfig_mutex); @@ -375,15 +399,12 @@ int nvdimm_security_overwrite(struct nvdimm *nvdimm, unsigned int keyid) return -EBUSY; } - if (keyid == 0) - key = NULL; - else { - key = nvdimm_lookup_user_key(nvdimm, keyid, NVDIMM_BASE_KEY); - if (!key) - return -ENOKEY; - } + data = nvdimm_get_user_key_payload(nvdimm, keyid, + NVDIMM_BASE_KEY, &key); + if (!data) + return -ENOKEY; - rc = nvdimm->sec.ops->overwrite(nvdimm, key ? key_data(key) : NULL); + rc = nvdimm->sec.ops->overwrite(nvdimm, data); dev_dbg(dev, "key: %d overwrite submission: %s\n", key_serial(key), rc == 0 ? "success" : "fail"); -- cgit v1.2.3 From 288ac524cf70a8e7ed851a61ed2a9744039dae8d Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 30 Mar 2019 17:13:24 +0100 Subject: r8169: disable default rx interrupt coalescing on RTL8168 It was reported that re-introducing ASPM, in combination with RX interrupt coalescing, results in significantly increased packet latency, see [0]. Disabling ASPM or RX interrupt coalescing fixes the issue. Therefore change the driver's default to disable RX interrupt coalescing. Users still have the option to enable RX coalescing via ethtool. [0] https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=925496 Fixes: a99790bf5c7f ("r8169: Reinstate ASPM Support") Reported-by: Mike Crowe Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 7562ccbbb39a..19efa88f3f02 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -5460,7 +5460,7 @@ static void rtl_hw_start_8168(struct rtl8169_private *tp) tp->cp_cmd |= PktCntrDisable | INTT_1; RTL_W16(tp, CPlusCmd, tp->cp_cmd); - RTL_W16(tp, IntrMitigate, 0x5151); + RTL_W16(tp, IntrMitigate, 0x5100); /* Work around for RxFIFO overflow. */ if (tp->mac_version == RTL_GIGA_MAC_VER_11) { -- cgit v1.2.3 From 7f75591fc5a123929a29636834d1bcb8b5c9fee3 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Mon, 25 Mar 2019 14:01:23 +0100 Subject: iio: core: fix a possible circular locking dependency This fixes a possible circular locking dependency detected warning seen with: - CONFIG_PROVE_LOCKING=y - consumer/provider IIO devices (ex: "voltage-divider" consumer of "adc") When using the IIO consumer interface, e.g. iio_channel_get(), the consumer device will likely call iio_read_channel_raw() or similar that rely on 'info_exist_lock' mutex. typically: ... mutex_lock(&chan->indio_dev->info_exist_lock); if (chan->indio_dev->info == NULL) { ret = -ENODEV; goto err_unlock; } ret = do_some_ops() err_unlock: mutex_unlock(&chan->indio_dev->info_exist_lock); return ret; ... Same mutex is also hold in iio_device_unregister(). The following deadlock warning happens when: - the consumer device has called an API like iio_read_channel_raw() at least once. - the consumer driver is unregistered, removed (unbind from sysfs) ====================================================== WARNING: possible circular locking dependency detected 4.19.24 #577 Not tainted ------------------------------------------------------ sh/372 is trying to acquire lock: (kn->count#30){++++}, at: kernfs_remove_by_name_ns+0x3c/0x84 but task is already holding lock: (&dev->info_exist_lock){+.+.}, at: iio_device_unregister+0x18/0x60 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&dev->info_exist_lock){+.+.}: __mutex_lock+0x70/0xa3c mutex_lock_nested+0x1c/0x24 iio_read_channel_raw+0x1c/0x60 iio_read_channel_info+0xa8/0xb0 dev_attr_show+0x1c/0x48 sysfs_kf_seq_show+0x84/0xec seq_read+0x154/0x528 __vfs_read+0x2c/0x15c vfs_read+0x8c/0x110 ksys_read+0x4c/0xac ret_fast_syscall+0x0/0x28 0xbedefb60 -> #0 (kn->count#30){++++}: lock_acquire+0xd8/0x268 __kernfs_remove+0x288/0x374 kernfs_remove_by_name_ns+0x3c/0x84 remove_files+0x34/0x78 sysfs_remove_group+0x40/0x9c sysfs_remove_groups+0x24/0x34 device_remove_attrs+0x38/0x64 device_del+0x11c/0x360 cdev_device_del+0x14/0x2c iio_device_unregister+0x24/0x60 release_nodes+0x1bc/0x200 device_release_driver_internal+0x1a0/0x230 unbind_store+0x80/0x130 kernfs_fop_write+0x100/0x1e4 __vfs_write+0x2c/0x160 vfs_write+0xa4/0x17c ksys_write+0x4c/0xac ret_fast_syscall+0x0/0x28 0xbe906840 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&dev->info_exist_lock); lock(kn->count#30); lock(&dev->info_exist_lock); lock(kn->count#30); *** DEADLOCK *** ... cdev_device_del() can be called without holding the lock. It should be safe as info_exist_lock prevents kernelspace consumers to use the exported routines during/after provider removal. cdev_device_del() is for userspace. Help to reproduce: See example: Documentation/devicetree/bindings/iio/afe/voltage-divider.txt sysv { compatible = "voltage-divider"; io-channels = <&adc 0>; output-ohms = <22>; full-ohms = <222>; }; First, go to iio:deviceX for the "voltage-divider", do one read: $ cd /sys/bus/iio/devices/iio:deviceX $ cat in_voltage0_raw Then, unbind the consumer driver. It triggers above deadlock warning. $ cd /sys/bus/platform/drivers/iio-rescale/ $ echo sysv > unbind Note I don't actually expect stable will pick this up all the way back into IIO being in staging, but if's probably valid that far back. Signed-off-by: Fabrice Gasnier Fixes: ac917a81117c ("staging:iio:core set the iio_dev.info pointer to null on unregister") Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index 4700fd5d8c90..9c4d92115504 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -1743,10 +1743,10 @@ EXPORT_SYMBOL(__iio_device_register); **/ void iio_device_unregister(struct iio_dev *indio_dev) { - mutex_lock(&indio_dev->info_exist_lock); - cdev_device_del(&indio_dev->chrdev, &indio_dev->dev); + mutex_lock(&indio_dev->info_exist_lock); + iio_device_unregister_debugfs(indio_dev); iio_disable_all_buffers(indio_dev); -- cgit v1.2.3 From 583e6361414903c5206258a30e5bd88cb03c0254 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Wed, 27 Mar 2019 22:35:35 +0200 Subject: net: stmmac: use correct DMA buffer size in the RX descriptor We always program the maximum DMA buffer size into the receive descriptor, although the allocated size may be less. E.g. with the default MTU size we allocate only 1536 bytes. If somebody sends us a bigger frame, then memory may get corrupted. Fix by using exact buffer sizes. Signed-off-by: Aaro Koskinen Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/descs_com.h | 22 ++++++++++++++-------- drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c | 2 +- .../net/ethernet/stmicro/stmmac/dwxgmac2_descs.c | 2 +- drivers/net/ethernet/stmicro/stmmac/enh_desc.c | 10 +++++++--- drivers/net/ethernet/stmicro/stmmac/hwif.h | 2 +- drivers/net/ethernet/stmicro/stmmac/norm_desc.c | 10 +++++++--- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 ++++-- 7 files changed, 35 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/descs_com.h b/drivers/net/ethernet/stmicro/stmmac/descs_com.h index 40d6356a7e73..3dfb07a78952 100644 --- a/drivers/net/ethernet/stmicro/stmmac/descs_com.h +++ b/drivers/net/ethernet/stmicro/stmmac/descs_com.h @@ -29,11 +29,13 @@ /* Specific functions used for Ring mode */ /* Enhanced descriptors */ -static inline void ehn_desc_rx_set_on_ring(struct dma_desc *p, int end) +static inline void ehn_desc_rx_set_on_ring(struct dma_desc *p, int end, + int bfsize) { - p->des1 |= cpu_to_le32((BUF_SIZE_8KiB - << ERDES1_BUFFER2_SIZE_SHIFT) - & ERDES1_BUFFER2_SIZE_MASK); + if (bfsize == BUF_SIZE_16KiB) + p->des1 |= cpu_to_le32((BUF_SIZE_8KiB + << ERDES1_BUFFER2_SIZE_SHIFT) + & ERDES1_BUFFER2_SIZE_MASK); if (end) p->des1 |= cpu_to_le32(ERDES1_END_RING); @@ -59,11 +61,15 @@ static inline void enh_set_tx_desc_len_on_ring(struct dma_desc *p, int len) } /* Normal descriptors */ -static inline void ndesc_rx_set_on_ring(struct dma_desc *p, int end) +static inline void ndesc_rx_set_on_ring(struct dma_desc *p, int end, int bfsize) { - p->des1 |= cpu_to_le32(((BUF_SIZE_2KiB - 1) - << RDES1_BUFFER2_SIZE_SHIFT) - & RDES1_BUFFER2_SIZE_MASK); + if (bfsize >= BUF_SIZE_2KiB) { + int bfsize2; + + bfsize2 = min(bfsize - BUF_SIZE_2KiB + 1, BUF_SIZE_2KiB - 1); + p->des1 |= cpu_to_le32((bfsize2 << RDES1_BUFFER2_SIZE_SHIFT) + & RDES1_BUFFER2_SIZE_MASK); + } if (end) p->des1 |= cpu_to_le32(RDES1_END_RING); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index 7fbb6a4dbf51..e061e9f5fad7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -296,7 +296,7 @@ exit: } static void dwmac4_rd_init_rx_desc(struct dma_desc *p, int disable_rx_ic, - int mode, int end) + int mode, int end, int bfsize) { dwmac4_set_rx_owner(p, disable_rx_ic); } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c index 1d858fdec997..98fa471da7c0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c @@ -123,7 +123,7 @@ static int dwxgmac2_get_rx_timestamp_status(void *desc, void *next_desc, } static void dwxgmac2_init_rx_desc(struct dma_desc *p, int disable_rx_ic, - int mode, int end) + int mode, int end, int bfsize) { dwxgmac2_set_rx_owner(p, disable_rx_ic); } diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c index 5ef91a790f9d..e8855e6adb48 100644 --- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c @@ -259,15 +259,19 @@ static int enh_desc_get_rx_status(void *data, struct stmmac_extra_stats *x, } static void enh_desc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, - int mode, int end) + int mode, int end, int bfsize) { + int bfsize1; + p->des0 |= cpu_to_le32(RDES0_OWN); - p->des1 |= cpu_to_le32(BUF_SIZE_8KiB & ERDES1_BUFFER1_SIZE_MASK); + + bfsize1 = min(bfsize, BUF_SIZE_8KiB); + p->des1 |= cpu_to_le32(bfsize1 & ERDES1_BUFFER1_SIZE_MASK); if (mode == STMMAC_CHAIN_MODE) ehn_desc_rx_set_on_chain(p); else - ehn_desc_rx_set_on_ring(p, end); + ehn_desc_rx_set_on_ring(p, end, bfsize); if (disable_rx_ic) p->des1 |= cpu_to_le32(ERDES1_DISABLE_IC); diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index 92b8944f26e3..5bb00234d961 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -33,7 +33,7 @@ struct dma_extended_desc; struct stmmac_desc_ops { /* DMA RX descriptor ring initialization */ void (*init_rx_desc)(struct dma_desc *p, int disable_rx_ic, int mode, - int end); + int end, int bfsize); /* DMA TX descriptor ring initialization */ void (*init_tx_desc)(struct dma_desc *p, int mode, int end); /* Invoked by the xmit function to prepare the tx descriptor */ diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c index de65bb29feba..c55a9815b394 100644 --- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c @@ -135,15 +135,19 @@ static int ndesc_get_rx_status(void *data, struct stmmac_extra_stats *x, } static void ndesc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode, - int end) + int end, int bfsize) { + int bfsize1; + p->des0 |= cpu_to_le32(RDES0_OWN); - p->des1 |= cpu_to_le32((BUF_SIZE_2KiB - 1) & RDES1_BUFFER1_SIZE_MASK); + + bfsize1 = min(bfsize, BUF_SIZE_2KiB - 1); + p->des1 |= cpu_to_le32(bfsize & RDES1_BUFFER1_SIZE_MASK); if (mode == STMMAC_CHAIN_MODE) ndesc_rx_set_on_chain(p, end); else - ndesc_rx_set_on_ring(p, end); + ndesc_rx_set_on_ring(p, end, bfsize); if (disable_rx_ic) p->des1 |= cpu_to_le32(RDES1_DISABLE_IC); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 6a2e1031a62a..4e496cf655f2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1136,11 +1136,13 @@ static void stmmac_clear_rx_descriptors(struct stmmac_priv *priv, u32 queue) if (priv->extend_desc) stmmac_init_rx_desc(priv, &rx_q->dma_erx[i].basic, priv->use_riwt, priv->mode, - (i == DMA_RX_SIZE - 1)); + (i == DMA_RX_SIZE - 1), + priv->dma_buf_sz); else stmmac_init_rx_desc(priv, &rx_q->dma_rx[i], priv->use_riwt, priv->mode, - (i == DMA_RX_SIZE - 1)); + (i == DMA_RX_SIZE - 1), + priv->dma_buf_sz); } /** -- cgit v1.2.3 From 972c9be784e077bc56472c78243e0326e525b689 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Wed, 27 Mar 2019 22:35:36 +0200 Subject: net: stmmac: ratelimit RX error logs Ratelimit RX error logs. Signed-off-by: Aaro Koskinen Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 4e496cf655f2..392d94cede17 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3433,9 +3433,10 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) * ignored */ if (frame_len > priv->dma_buf_sz) { - netdev_err(priv->dev, - "len %d larger than size (%d)\n", - frame_len, priv->dma_buf_sz); + if (net_ratelimit()) + netdev_err(priv->dev, + "len %d larger than size (%d)\n", + frame_len, priv->dma_buf_sz); priv->dev->stats.rx_length_errors++; break; } @@ -3492,9 +3493,10 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) } else { skb = rx_q->rx_skbuff[entry]; if (unlikely(!skb)) { - netdev_err(priv->dev, - "%s: Inconsistent Rx chain\n", - priv->dev->name); + if (net_ratelimit()) + netdev_err(priv->dev, + "%s: Inconsistent Rx chain\n", + priv->dev->name); priv->dev->stats.rx_dropped++; break; } -- cgit v1.2.3 From 07b3975352374c3f5ebb4a42ef0b253fe370542d Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Wed, 27 Mar 2019 22:35:37 +0200 Subject: net: stmmac: don't stop NAPI processing when dropping a packet Currently, if we drop a packet, we exit from NAPI loop before the budget is consumed. In some situations this will make the RX processing stall e.g. when flood pinging the system with oversized packets, as the errorneous packets are not dropped efficiently. If we drop a packet, we should just continue to the next one as long as the budget allows. Signed-off-by: Aaro Koskinen Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 392d94cede17..a26e36dbb5df 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3354,9 +3354,8 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) { struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; struct stmmac_channel *ch = &priv->channel[queue]; - unsigned int entry = rx_q->cur_rx; + unsigned int next_entry = rx_q->cur_rx; int coe = priv->hw->rx_csum; - unsigned int next_entry; unsigned int count = 0; bool xmac; @@ -3374,10 +3373,12 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) stmmac_display_ring(priv, rx_head, DMA_RX_SIZE, true); } while (count < limit) { - int status; + int entry, status; struct dma_desc *p; struct dma_desc *np; + entry = next_entry; + if (priv->extend_desc) p = (struct dma_desc *)(rx_q->dma_erx + entry); else @@ -3438,7 +3439,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) "len %d larger than size (%d)\n", frame_len, priv->dma_buf_sz); priv->dev->stats.rx_length_errors++; - break; + continue; } /* ACS is set; GMAC core strips PAD/FCS for IEEE 802.3 @@ -3473,7 +3474,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) dev_warn(priv->device, "packet dropped\n"); priv->dev->stats.rx_dropped++; - break; + continue; } dma_sync_single_for_cpu(priv->device, @@ -3498,7 +3499,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) "%s: Inconsistent Rx chain\n", priv->dev->name); priv->dev->stats.rx_dropped++; - break; + continue; } prefetch(skb->data - NET_IP_ALIGN); rx_q->rx_skbuff[entry] = NULL; @@ -3533,7 +3534,6 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) priv->dev->stats.rx_packets++; priv->dev->stats.rx_bytes += frame_len; } - entry = next_entry; } stmmac_rx_refill(priv, queue); -- cgit v1.2.3 From 1b746ce8b397e58f9e40ce5c63b7198de6930482 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Wed, 27 Mar 2019 22:35:38 +0200 Subject: net: stmmac: don't overwrite discard_frame status If we have error bits set, the discard_frame status will get overwritten by checksum bit checks, which might set the status back to good one. Fix by checking the COE status only if the frame is good. Signed-off-by: Aaro Koskinen Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/enh_desc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c index e8855e6adb48..c42ef6c729c0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c @@ -231,9 +231,10 @@ static int enh_desc_get_rx_status(void *data, struct stmmac_extra_stats *x, * It doesn't match with the information reported into the databook. * At any rate, we need to understand if the CSUM hw computation is ok * and report this info to the upper layers. */ - ret = enh_desc_coe_rdes0(!!(rdes0 & RDES0_IPC_CSUM_ERROR), - !!(rdes0 & RDES0_FRAME_TYPE), - !!(rdes0 & ERDES0_RX_MAC_ADDR)); + if (likely(ret == good_frame)) + ret = enh_desc_coe_rdes0(!!(rdes0 & RDES0_IPC_CSUM_ERROR), + !!(rdes0 & RDES0_FRAME_TYPE), + !!(rdes0 & ERDES0_RX_MAC_ADDR)); if (unlikely(rdes0 & RDES0_DRIBBLING)) x->dribbling_bit++; -- cgit v1.2.3 From 8ac0c24fe1c256af6644caf3d311029440ec2fbd Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Wed, 27 Mar 2019 22:35:39 +0200 Subject: net: stmmac: fix dropping of multi-descriptor RX frames Packets without the last descriptor set should be dropped early. If we receive a frame larger than the DMA buffer, the HW will continue using the next descriptor. Driver mistakes these as individual frames, and sometimes a truncated frame (without the LD set) may look like a valid packet. This fixes a strange issue where the system replies to 4098-byte ping although the MTU/DMA buffer size is set to 4096, and yet at the same time it's logging an oversized packet. Signed-off-by: Aaro Koskinen Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/enh_desc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c index c42ef6c729c0..5202d6ad7919 100644 --- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c @@ -201,6 +201,11 @@ static int enh_desc_get_rx_status(void *data, struct stmmac_extra_stats *x, if (unlikely(rdes0 & RDES0_OWN)) return dma_own; + if (unlikely(!(rdes0 & RDES0_LAST_DESCRIPTOR))) { + stats->rx_length_errors++; + return discard_frame; + } + if (unlikely(rdes0 & RDES0_ERROR_SUMMARY)) { if (unlikely(rdes0 & RDES0_DESCRIPTOR_ERROR)) { x->rx_desc++; -- cgit v1.2.3 From 057a0c5642a2ff2db7c421cdcde34294a23bf37b Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Wed, 27 Mar 2019 22:35:40 +0200 Subject: net: stmmac: don't log oversized frames This is log is harmful as it can trigger multiple times per packet. Delete it. Signed-off-by: Aaro Koskinen Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/norm_desc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c index c55a9815b394..b7dd4e3c760d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c @@ -91,8 +91,6 @@ static int ndesc_get_rx_status(void *data, struct stmmac_extra_stats *x, return dma_own; if (unlikely(!(rdes0 & RDES0_LAST_DESCRIPTOR))) { - pr_warn("%s: Oversized frame spanned multiple buffers\n", - __func__); stats->rx_length_errors++; return discard_frame; } -- cgit v1.2.3 From 6f07e5f06c8712acc423485f657799fc8e11e56c Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 31 Mar 2019 22:50:08 +0800 Subject: tipc: check bearer name with right length in tipc_nl_compat_bearer_enable Syzbot reported the following crash: BUG: KMSAN: uninit-value in memchr+0xce/0x110 lib/string.c:961 memchr+0xce/0x110 lib/string.c:961 string_is_valid net/tipc/netlink_compat.c:176 [inline] tipc_nl_compat_bearer_enable+0x2c4/0x910 net/tipc/netlink_compat.c:401 __tipc_nl_compat_doit net/tipc/netlink_compat.c:321 [inline] tipc_nl_compat_doit+0x3aa/0xaf0 net/tipc/netlink_compat.c:354 tipc_nl_compat_handle net/tipc/netlink_compat.c:1162 [inline] tipc_nl_compat_recv+0x1ae7/0x2750 net/tipc/netlink_compat.c:1265 genl_family_rcv_msg net/netlink/genetlink.c:601 [inline] genl_rcv_msg+0x185f/0x1a60 net/netlink/genetlink.c:626 netlink_rcv_skb+0x431/0x620 net/netlink/af_netlink.c:2477 genl_rcv+0x63/0x80 net/netlink/genetlink.c:637 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] netlink_unicast+0xf3e/0x1020 net/netlink/af_netlink.c:1336 netlink_sendmsg+0x127f/0x1300 net/netlink/af_netlink.c:1917 sock_sendmsg_nosec net/socket.c:622 [inline] sock_sendmsg net/socket.c:632 [inline] Uninit was created at: __alloc_skb+0x309/0xa20 net/core/skbuff.c:208 alloc_skb include/linux/skbuff.h:1012 [inline] netlink_alloc_large_skb net/netlink/af_netlink.c:1182 [inline] netlink_sendmsg+0xb82/0x1300 net/netlink/af_netlink.c:1892 sock_sendmsg_nosec net/socket.c:622 [inline] sock_sendmsg net/socket.c:632 [inline] It was triggered when the bearer name size < TIPC_MAX_BEARER_NAME, it would check with a wrong len/TLV_GET_DATA_LEN(msg->req), which also includes priority and disc_domain length. This patch is to fix it by checking it with a right length: 'TLV_GET_DATA_LEN(msg->req) - offsetof(struct tipc_bearer_config, name)'. Reported-by: syzbot+8b707430713eb46e1e45@syzkaller.appspotmail.com Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/tipc/netlink_compat.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 4ad3586da8f0..5f8e53cca222 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -397,7 +397,12 @@ static int tipc_nl_compat_bearer_enable(struct tipc_nl_compat_cmd_doit *cmd, if (!bearer) return -EMSGSIZE; - len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_BEARER_NAME); + len = TLV_GET_DATA_LEN(msg->req); + len -= offsetof(struct tipc_bearer_config, name); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_BEARER_NAME); if (!string_is_valid(b->name, len)) return -EINVAL; -- cgit v1.2.3 From 8c63bf9ab4be8b83bd8c34aacfd2f1d2c8901c8a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 31 Mar 2019 22:50:09 +0800 Subject: tipc: check link name with right length in tipc_nl_compat_link_set A similar issue as fixed by Patch "tipc: check bearer name with right length in tipc_nl_compat_bearer_enable" was also found by syzbot in tipc_nl_compat_link_set(). The length to check with should be 'TLV_GET_DATA_LEN(msg->req) - offsetof(struct tipc_link_config, name)'. Reported-by: syzbot+de00a87b8644a582ae79@syzkaller.appspotmail.com Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/tipc/netlink_compat.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 5f8e53cca222..0bfd03d67fdd 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -771,7 +771,12 @@ static int tipc_nl_compat_link_set(struct tipc_nl_compat_cmd_doit *cmd, lc = (struct tipc_link_config *)TLV_DATA(msg->req); - len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME); + len = TLV_GET_DATA_LEN(msg->req); + len -= offsetof(struct tipc_link_config, name); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_LINK_NAME); if (!string_is_valid(lc->name, len)) return -EINVAL; -- cgit v1.2.3 From 2ac695d1d602ce00b12170242f58c3d3a8e36d04 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 31 Mar 2019 22:50:10 +0800 Subject: tipc: handle the err returned from cmd header function Syzbot found a crash: BUG: KMSAN: uninit-value in tipc_nl_compat_name_table_dump+0x54f/0xcd0 net/tipc/netlink_compat.c:872 Call Trace: tipc_nl_compat_name_table_dump+0x54f/0xcd0 net/tipc/netlink_compat.c:872 __tipc_nl_compat_dumpit+0x59e/0xda0 net/tipc/netlink_compat.c:215 tipc_nl_compat_dumpit+0x63a/0x820 net/tipc/netlink_compat.c:280 tipc_nl_compat_handle net/tipc/netlink_compat.c:1226 [inline] tipc_nl_compat_recv+0x1b5f/0x2750 net/tipc/netlink_compat.c:1265 genl_family_rcv_msg net/netlink/genetlink.c:601 [inline] genl_rcv_msg+0x185f/0x1a60 net/netlink/genetlink.c:626 netlink_rcv_skb+0x431/0x620 net/netlink/af_netlink.c:2477 genl_rcv+0x63/0x80 net/netlink/genetlink.c:637 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] netlink_unicast+0xf3e/0x1020 net/netlink/af_netlink.c:1336 netlink_sendmsg+0x127f/0x1300 net/netlink/af_netlink.c:1917 sock_sendmsg_nosec net/socket.c:622 [inline] sock_sendmsg net/socket.c:632 [inline] Uninit was created at: __alloc_skb+0x309/0xa20 net/core/skbuff.c:208 alloc_skb include/linux/skbuff.h:1012 [inline] netlink_alloc_large_skb net/netlink/af_netlink.c:1182 [inline] netlink_sendmsg+0xb82/0x1300 net/netlink/af_netlink.c:1892 sock_sendmsg_nosec net/socket.c:622 [inline] sock_sendmsg net/socket.c:632 [inline] It was supposed to be fixed on commit 974cb0e3e7c9 ("tipc: fix uninit-value in tipc_nl_compat_name_table_dump") by checking TLV_GET_DATA_LEN(msg->req) in cmd->header()/tipc_nl_compat_name_table_dump_header(), which is called ahead of tipc_nl_compat_name_table_dump(). However, tipc_nl_compat_dumpit() doesn't handle the error returned from cmd header function. It means even when the check added in that fix fails, it won't stop calling tipc_nl_compat_name_table_dump(), and the issue will be triggered again. So this patch is to add the process for the err returned from cmd header function in tipc_nl_compat_dumpit(). Reported-by: syzbot+3ce8520484b0d4e260a5@syzkaller.appspotmail.com Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/tipc/netlink_compat.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 0bfd03d67fdd..340a6e7c43a7 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -267,8 +267,14 @@ static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, if (msg->rep_type) tipc_tlv_init(msg->rep, msg->rep_type); - if (cmd->header) - (*cmd->header)(msg); + if (cmd->header) { + err = (*cmd->header)(msg); + if (err) { + kfree_skb(msg->rep); + msg->rep = NULL; + return err; + } + } arg = nlmsg_new(0, GFP_KERNEL); if (!arg) { -- cgit v1.2.3 From c63adb28f6d913310430f14c69f0a2ea55eed0cc Mon Sep 17 00:00:00 2001 From: Annaliese McDermond Date: Sat, 30 Mar 2019 09:02:02 -0700 Subject: ASoC: tlv320aic32x4: Fix Common Pins The common pins were mistakenly not added to the DAPM graph. Adding these pins will allow valid graphs to be created. Signed-off-by: Annaliese McDermond Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320aic32x4.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/tlv320aic32x4.c b/sound/soc/codecs/tlv320aic32x4.c index 96f1526cb258..5520044929f4 100644 --- a/sound/soc/codecs/tlv320aic32x4.c +++ b/sound/soc/codecs/tlv320aic32x4.c @@ -490,6 +490,8 @@ static const struct snd_soc_dapm_widget aic32x4_dapm_widgets[] = { SND_SOC_DAPM_INPUT("IN2_R"), SND_SOC_DAPM_INPUT("IN3_L"), SND_SOC_DAPM_INPUT("IN3_R"), + SND_SOC_DAPM_INPUT("CM_L"), + SND_SOC_DAPM_INPUT("CM_R"), }; static const struct snd_soc_dapm_route aic32x4_dapm_routes[] = { -- cgit v1.2.3 From fd427103e8dfcb4b438269afd710b63e7af61463 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Mar 2019 08:43:33 +0000 Subject: powerpc/32: Fix early boot failure with RTAS built-in Commit 0df977eafc79 ("powerpc/6xx: Don't use SPRN_SPRG2 for storing stack pointer while in RTAS") changes the code to use a field in thread struct to store the stack pointer while in RTAS instead of using SPRN_SPRG2. It therefore converts all places which were manipulating SPRN_SPRG2 to use that field. During early startup, the zeroing of SPRN_SPRG2 has been replaced by a zeroing of that field in thread struct. But at least in start_here, that's done wrongly because it used the physical address of the fields while MMU is on at that time. So the virtual address of the field should be used instead, but in the meantime, thread struct has already been zeroed and initialised so we can just drop this initialisation. Reported-by: Larry Finger Fixes: 0df977eafc79 ("powerpc/6xx: Don't use SPRN_SPRG2 for storing stack pointer while in RTAS") Signed-off-by: Christophe Leroy Tested-by: Larry Finger Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/head_32.S | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 48051c8977c5..e25b615e9f9e 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -851,10 +851,6 @@ __secondary_start: tophys(r4,r2) addi r4,r4,THREAD /* phys address of our thread_struct */ mtspr SPRN_SPRG_THREAD,r4 -#ifdef CONFIG_PPC_RTAS - li r3,0 - stw r3, RTAS_SP(r4) /* 0 => not in RTAS */ -#endif lis r4, (swapper_pg_dir - PAGE_OFFSET)@h ori r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l mtspr SPRN_SPRG_PGDIR, r4 @@ -941,10 +937,6 @@ start_here: tophys(r4,r2) addi r4,r4,THREAD /* init task's THREAD */ mtspr SPRN_SPRG_THREAD,r4 -#ifdef CONFIG_PPC_RTAS - li r3,0 - stw r3, RTAS_SP(r4) /* 0 => not in RTAS */ -#endif lis r4, (swapper_pg_dir - PAGE_OFFSET)@h ori r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l mtspr SPRN_SPRG_PGDIR, r4 -- cgit v1.2.3 From 42b1bd33dcdef4ffd98f695e188bab82f9fa46d8 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Fri, 29 Mar 2019 17:01:18 +0300 Subject: block/bfq: fix ifdef for CONFIG_BFQ_GROUP_IOSCHED=y MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace BFQ_GROUP_IOSCHED_ENABLED with CONFIG_BFQ_GROUP_IOSCHED. Code under these ifdefs never worked, something might be broken. Fixes: 0471559c2fbd ("block, bfq: add/remove entity weights correctly") Fixes: 73d58118498b ("block, bfq: consider also ioprio classes in symmetry detection") Reviewed-by: Holger Hoffstätte Signed-off-by: Konstantin Khlebnikov Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 2 +- block/bfq-wf2q.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 4c592496a16a..fac188dd78fa 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -674,7 +674,7 @@ static bool bfq_symmetric_scenario(struct bfq_data *bfqd) * at least two nodes. */ return !(varied_queue_weights || multiple_classes_busy -#ifdef BFQ_GROUP_IOSCHED_ENABLED +#ifdef CONFIG_BFQ_GROUP_IOSCHED || bfqd->num_groups_with_pending_reqs > 0 #endif ); diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index 63311d1ff1ed..a11bef75483d 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -1012,7 +1012,7 @@ static void __bfq_activate_entity(struct bfq_entity *entity, entity->on_st = true; } -#ifdef BFQ_GROUP_IOSCHED_ENABLED +#ifdef CONFIG_BFQ_GROUP_IOSCHED if (!bfq_entity_to_bfqq(entity)) { /* bfq_group */ struct bfq_group *bfqg = container_of(entity, struct bfq_group, entity); -- cgit v1.2.3 From 74e7c6c877f620d65a8269692d089bbd066f626c Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Fri, 29 Mar 2019 14:13:23 +0800 Subject: HID: i2c-hid: Disable runtime PM on Synaptics touchpad We have a new Dell laptop which has the synaptics I2C touchpad (06cb:7e7e) on it. After booting up the Linux, the touchpad doesn't work, there is no interrupt when touching the touchpad, after disable the runtime PM, everything works well. I also tried the quirk of I2C_HID_QUIRK_DELAY_AFTER_SLEEP, it is better after applied this quirk, there are interrupts but data it reports is invalid. Signed-off-by: Hui Wang Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/i2c-hid/i2c-hid-core.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index b6d93f4ad037..adce58f24f76 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -1083,6 +1083,7 @@ #define USB_DEVICE_ID_SYNAPTICS_HD 0x0ac3 #define USB_DEVICE_ID_SYNAPTICS_QUAD_HD 0x1ac3 #define USB_DEVICE_ID_SYNAPTICS_TP_V103 0x5710 +#define I2C_DEVICE_ID_SYNAPTICS_7E7E 0x7e7e #define USB_VENDOR_ID_TEXAS_INSTRUMENTS 0x2047 #define USB_DEVICE_ID_TEXAS_INSTRUMENTS_LENOVO_YOGA 0x0855 diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c index 90164fed08d3..4d1f24ee249c 100644 --- a/drivers/hid/i2c-hid/i2c-hid-core.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -184,6 +184,8 @@ static const struct i2c_hid_quirks { I2C_HID_QUIRK_NO_RUNTIME_PM }, { USB_VENDOR_ID_ELAN, HID_ANY_ID, I2C_HID_QUIRK_BOGUS_IRQ }, + { USB_VENDOR_ID_SYNAPTICS, I2C_DEVICE_ID_SYNAPTICS_7E7E, + I2C_HID_QUIRK_NO_RUNTIME_PM }, { 0, 0 } }; -- cgit v1.2.3 From faa3604eda325588451c9c1eb4c8a8d04c1cd633 Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Sat, 30 Mar 2019 05:50:38 +0800 Subject: x86/resctrl: Fix typos in the mba_sc mount option The user can control the MBA memory bandwidth in MBps (Mega Bytes per second) units of the MBA Software Controller (mba_sc) by using the "mba_MBps" mount option. For details, see Documentation/x86/resctrl_ui.txt. However, commit 23bf1b6be9c2 ("kernfs, sysfs, cgroup, intel_rdt: Support fs_context") changed the mount option name from "mba_MBps" to "mba_mpbs" by mistake. Change it back from to "mba_MBps" because it is user-visible, and correct "Opt_mba_mpbs" spelling to "Opt_mba_mbps". [ bp: massage commit message. ] Fixes: 23bf1b6be9c2 ("kernfs, sysfs, cgroup, intel_rdt: Support fs_context") Signed-off-by: Xiaochen Shen Signed-off-by: Borislav Petkov Cc: dhowells@redhat.com Cc: Fenghua Yu Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: pei.p.jia@intel.com Cc: Reinette Chatre Cc: Thomas Gleixner Cc: Tony Luck Cc: x86-ml Link: https://lkml.kernel.org/r/1553896238-22130-1-git-send-email-xiaochen.shen@intel.com --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 399601eda8e4..54b9eef3eea9 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -2039,14 +2039,14 @@ out: enum rdt_param { Opt_cdp, Opt_cdpl2, - Opt_mba_mpbs, + Opt_mba_mbps, nr__rdt_params }; static const struct fs_parameter_spec rdt_param_specs[] = { fsparam_flag("cdp", Opt_cdp), fsparam_flag("cdpl2", Opt_cdpl2), - fsparam_flag("mba_mpbs", Opt_mba_mpbs), + fsparam_flag("mba_MBps", Opt_mba_mbps), {} }; @@ -2072,7 +2072,7 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param) case Opt_cdpl2: ctx->enable_cdpl2 = true; return 0; - case Opt_mba_mpbs: + case Opt_mba_mbps: if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return -EINVAL; ctx->enable_mba_mbps = true; -- cgit v1.2.3 From b506bc975f60f06e13e74adb35e708a23dc4e87c Mon Sep 17 00:00:00 2001 From: Dust Li Date: Mon, 1 Apr 2019 16:04:53 +0800 Subject: tcp: fix a potential NULL pointer dereference in tcp_sk_exit When tcp_sk_init() failed in inet_ctl_sock_create(), 'net->ipv4.tcp_congestion_control' will be left uninitialized, but tcp_sk_exit() hasn't check for that. This patch add checking on 'net->ipv4.tcp_congestion_control' in tcp_sk_exit() to prevent NULL-ptr dereference. Fixes: 6670e1524477 ("tcp: Namespace-ify sysctl_tcp_default_congestion_control") Signed-off-by: Dust Li Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 277d71239d75..2f8039a26b08 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2578,7 +2578,8 @@ static void __net_exit tcp_sk_exit(struct net *net) { int cpu; - module_put(net->ipv4.tcp_congestion_control->owner); + if (net->ipv4.tcp_congestion_control) + module_put(net->ipv4.tcp_congestion_control->owner); for_each_possible_cpu(cpu) inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu)); -- cgit v1.2.3 From a145b5b0e48783d0cd3ee605ed00b133d5c8ffed Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 29 Mar 2019 16:51:52 +0000 Subject: drm/i915: Always backoff after a drm_modeset_lock() deadlock If drm_modeset_lock() reports a deadlock it sets the ctx->contexted field and insists that the caller calls drm_modeset_backoff() or else it generates a WARN on cleanup. <4> [1601.870376] WARNING: CPU: 3 PID: 8445 at drivers/gpu/drm/drm_modeset_lock.c:228 drm_modeset_drop_locks+0x35/0x40 <4> [1601.870395] Modules linked in: vgem snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic x86_pkg_temp_thermal i915 coretemp crct10dif_pclmul <6> [1601.870403] Console: switching <4> [1601.870403] snd_hda_intel <4> [1601.870406] to colour frame buffer device 320x90 <4> [1601.870406] crc32_pclmul snd_hda_codec snd_hwdep ghash_clmulni_intel e1000e snd_hda_core cdc_ether ptp usbnet mii pps_core snd_pcm i2c_i801 mei_me mei prime_numbers <4> [1601.870422] CPU: 3 PID: 8445 Comm: cat Tainted: G U 5.0.0-rc7-CI-CI_DRM_5650+ #1 <4> [1601.870424] Hardware name: Intel Corporation Ice Lake Client Platform/IceLake U DDR4 SODIMM PD RVP TLC, BIOS ICLSFWR1.R00.2402.AD3.1810170014 10/17/2018 <4> [1601.870427] RIP: 0010:drm_modeset_drop_locks+0x35/0x40 <4> [1601.870430] Code: 29 48 8b 43 60 48 8d 6b 60 48 39 c5 74 19 48 8b 43 60 48 8d b8 70 ff ff ff e8 87 ff ff ff 48 8b 43 60 48 39 c5 75 e7 5b 5d c3 <0f> 0b eb d3 0f 1f 80 00 00 00 00 41 56 41 55 41 54 55 53 48 8b 6f <4> [1601.870432] RSP: 0018:ffffc90000d67ce8 EFLAGS: 00010282 <4> [1601.870435] RAX: 00000000ffffffdd RBX: ffffc90000d67d00 RCX: 5dbbe23d00000000 <4> [1601.870437] RDX: 0000000000000000 RSI: 0000000093e6194a RDI: ffffc90000d67d00 <4> [1601.870439] RBP: ffff88849e62e678 R08: 0000000003b7329a R09: 0000000000000001 <4> [1601.870441] R10: 0000000000000000 R11: 0000000000000000 R12: ffff888492100410 <4> [1601.870442] R13: ffff88849ea50958 R14: ffff8884a67eb028 R15: ffff8884a67eb028 <4> [1601.870445] FS: 00007fa7a27745c0(0000) GS:ffff8884aff80000(0000) knlGS:0000000000000000 <4> [1601.870447] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4> [1601.870449] CR2: 000055af07e66000 CR3: 00000004a8cc2006 CR4: 0000000000760ee0 <4> [1601.870451] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 <4> [1601.870453] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 <4> [1601.870454] PKRU: 55555554 <4> [1601.870456] Call Trace: <4> [1601.870505] i915_dsc_fec_support_show+0x91/0x190 [i915] <4> [1601.870522] seq_read+0xdb/0x3c0 <4> [1601.870531] full_proxy_read+0x51/0x80 <4> [1601.870538] __vfs_read+0x31/0x190 <4> [1601.870546] ? __se_sys_newfstat+0x3c/0x60 <4> [1601.870552] vfs_read+0x9e/0x150 <4> [1601.870557] ksys_read+0x50/0xc0 <4> [1601.870564] do_syscall_64+0x55/0x190 <4> [1601.870569] entry_SYSCALL_64_after_hwframe+0x49/0xbe <4> [1601.870572] RIP: 0033:0x7fa7a226d081 <4> [1601.870574] Code: fe ff ff 48 8d 3d 67 9c 0a 00 48 83 ec 08 e8 a6 4c 02 00 66 0f 1f 44 00 00 48 8d 05 81 08 2e 00 8b 00 85 c0 75 13 31 c0 0f 05 <48> 3d 00 f0 ff ff 77 57 f3 c3 0f 1f 44 00 00 41 54 55 49 89 d4 53 <4> [1601.870576] RSP: 002b:00007ffcc05140c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 <4> [1601.870579] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007fa7a226d081 <4> [1601.870581] RDX: 0000000000020000 RSI: 000055af07e63000 RDI: 0000000000000007 <4> [1601.870583] RBP: 0000000000020000 R08: 000000000000007b R09: 0000000000000000 <4> [1601.870585] R10: 000055af07e60010 R11: 0000000000000246 R12: 000055af07e63000 <4> [1601.870587] R13: 0000000000000007 R14: 000055af07e634bf R15: 0000000000020000 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109745 Fixes: e845f099f1c6 ("drm/i915/dsc: Add Per connector debugfs node for DSC support/enable") Signed-off-by: Chris Wilson Cc: Rodrigo Vivi Cc: Ville Syrjala Cc: Anusha Srivatsa Cc: Lyude Paul Cc: Manasi Navare Reviewed-by: Manasi Navare Link: https://patchwork.freedesktop.org/patch/msgid/20190329165152.29259-1-chris@chris-wilson.co.uk (cherry picked from commit ee6df5694a9a2e30566ae05e9c145a0f6d5e087f) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_debugfs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 0bd890c04fe4..f6f6e5b78e97 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4830,7 +4830,10 @@ static int i915_dsc_fec_support_show(struct seq_file *m, void *data) ret = drm_modeset_lock(&dev->mode_config.connection_mutex, &ctx); if (ret) { - ret = -EINTR; + if (ret == -EDEADLK && !drm_modeset_backoff(&ctx)) { + try_again = true; + continue; + } break; } crtc = connector->state->crtc; -- cgit v1.2.3 From 8c1074f690bca6c6c8d79fc4a5752635cd0bfbe0 Mon Sep 17 00:00:00 2001 From: Bert Kenward Date: Mon, 1 Apr 2019 13:24:00 +0100 Subject: MAINTAINERS: net: update Solarflare maintainers Cc: Martin Habets Signed-off-by: Bert Kenward Acked-by: Martin Habets Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 1fdc8970e816..ade9e18488d2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13982,7 +13982,7 @@ F: drivers/media/rc/serial_ir.c SFC NETWORK DRIVER M: Solarflare linux maintainers M: Edward Cree -M: Bert Kenward +M: Martin Habets L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/sfc/ -- cgit v1.2.3 From 8c83f2df9c6578ea4c5b940d8238ad8a41b87e9e Mon Sep 17 00:00:00 2001 From: Stephen Suryaputra Date: Mon, 1 Apr 2019 09:17:32 -0400 Subject: vrf: check accept_source_route on the original netdevice Configuration check to accept source route IP options should be made on the incoming netdevice when the skb->dev is an l3mdev master. The route lookup for the source route next hop also needs the incoming netdev. v2->v3: - Simplify by passing the original netdevice down the stack (per David Ahern). Signed-off-by: Stephen Suryaputra Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip.h | 2 +- net/ipv4/ip_input.c | 7 +++---- net/ipv4/ip_options.c | 4 ++-- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index be3cad9c2e4c..583526aad1d0 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -677,7 +677,7 @@ int ip_options_get_from_user(struct net *net, struct ip_options_rcu **optp, unsigned char __user *data, int optlen); void ip_options_undo(struct ip_options *opt); void ip_forward_options(struct sk_buff *skb); -int ip_options_rcv_srr(struct sk_buff *skb); +int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev); /* * Functions provided by ip_sockglue.c diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index ecce2dc78f17..1132d6d1796a 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -257,11 +257,10 @@ int ip_local_deliver(struct sk_buff *skb) ip_local_deliver_finish); } -static inline bool ip_rcv_options(struct sk_buff *skb) +static inline bool ip_rcv_options(struct sk_buff *skb, struct net_device *dev) { struct ip_options *opt; const struct iphdr *iph; - struct net_device *dev = skb->dev; /* It looks as overkill, because not all IP options require packet mangling. @@ -297,7 +296,7 @@ static inline bool ip_rcv_options(struct sk_buff *skb) } } - if (ip_options_rcv_srr(skb)) + if (ip_options_rcv_srr(skb, dev)) goto drop; } @@ -353,7 +352,7 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk, } #endif - if (iph->ihl > 5 && ip_rcv_options(skb)) + if (iph->ihl > 5 && ip_rcv_options(skb, dev)) goto drop; rt = skb_rtable(skb); diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 32a35043c9f5..3db31bb9df50 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -612,7 +612,7 @@ void ip_forward_options(struct sk_buff *skb) } } -int ip_options_rcv_srr(struct sk_buff *skb) +int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev) { struct ip_options *opt = &(IPCB(skb)->opt); int srrspace, srrptr; @@ -647,7 +647,7 @@ int ip_options_rcv_srr(struct sk_buff *skb) orefdst = skb->_skb_refdst; skb_dst_set(skb, NULL); - err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev); + err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, dev); rt2 = skb_rtable(skb); if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) { skb_dst_drop(skb); -- cgit v1.2.3 From b83f28e1e38a8324eaa5e55f2c7ee2f75e748f08 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Tue, 12 Feb 2019 09:52:04 +0100 Subject: i40e: move i40e_xsk_umem function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The i40e_xsk_umem function was explicitly inlined in i40e.h. There is no reason for that, so move it to i40e_main.c instead. Signed-off-by: Björn Töpel Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 14 -------------- drivers/net/ethernet/intel/i40e/i40e_main.c | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index d684998ba2b0..cc583ad5236b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -1096,20 +1096,6 @@ static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi) return !!vsi->xdp_prog; } -static inline struct xdp_umem *i40e_xsk_umem(struct i40e_ring *ring) -{ - bool xdp_on = i40e_enabled_xdp_vsi(ring->vsi); - int qid = ring->queue_index; - - if (ring_is_xdp(ring)) - qid -= ring->vsi->alloc_queue_pairs; - - if (!xdp_on) - return NULL; - - return xdp_get_umem_from_qid(ring->vsi->netdev, qid); -} - int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch); int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate); int i40e_add_del_cloud_filter(struct i40e_vsi *vsi, diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index da62218eb70a..dd77793a08a0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -3063,6 +3063,26 @@ static void i40e_config_xps_tx_ring(struct i40e_ring *ring) ring->queue_index); } +/** + * i40e_xsk_umem - Retrieve the AF_XDP ZC if XDP and ZC is enabled + * @ring: The Tx or Rx ring + * + * Returns the UMEM or NULL. + **/ +static struct xdp_umem *i40e_xsk_umem(struct i40e_ring *ring) +{ + bool xdp_on = i40e_enabled_xdp_vsi(ring->vsi); + int qid = ring->queue_index; + + if (ring_is_xdp(ring)) + qid -= ring->vsi->alloc_queue_pairs; + + if (!xdp_on) + return NULL; + + return xdp_get_umem_from_qid(ring->vsi->netdev, qid); +} + /** * i40e_configure_tx_ring - Configure a transmit ring context and rest * @ring: The Tx ring to configure -- cgit v1.2.3 From 44ddd4f1709249dd1779dda7c907668a0b9ef833 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Tue, 12 Feb 2019 09:52:05 +0100 Subject: i40e: add tracking of AF_XDP ZC state for each queue pair MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit f3fef2b6e1cc ("i40e: Remove umem from VSI") a regression was introduced; When the VSI was reset, the setup code would try to enable AF_XDP ZC unconditionally (as long as there was a umem placed in the netdev._rx struct). Here, we add a bitmap to the VSI that tracks if a certain queue pair has been "zero-copy enabled" via the ndo_bpf. The bitmap is used in i40e_xsk_umem, and enables zero-copy if and only if XDP is enabled, the corresponding qid in the bitmap is set and the umem is non-NULL. Fixes: f3fef2b6e1cc ("i40e: Remove umem from VSI") Signed-off-by: Björn Töpel Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 2 ++ drivers/net/ethernet/intel/i40e/i40e_main.c | 10 +++++++++- drivers/net/ethernet/intel/i40e/i40e_xsk.c | 3 +++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index cc583ad5236b..d3cc3427caad 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -790,6 +790,8 @@ struct i40e_vsi { /* VSI specific handlers */ irqreturn_t (*irq_handler)(int irq, void *data); + + unsigned long *af_xdp_zc_qps; /* tracks AF_XDP ZC enabled qps */ } ____cacheline_internodealigned_in_smp; struct i40e_netdev_priv { diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index dd77793a08a0..b1c265012c8a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -3077,7 +3077,7 @@ static struct xdp_umem *i40e_xsk_umem(struct i40e_ring *ring) if (ring_is_xdp(ring)) qid -= ring->vsi->alloc_queue_pairs; - if (!xdp_on) + if (!xdp_on || !test_bit(qid, ring->vsi->af_xdp_zc_qps)) return NULL; return xdp_get_umem_from_qid(ring->vsi->netdev, qid); @@ -10084,6 +10084,12 @@ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type) hash_init(vsi->mac_filter_hash); vsi->irqs_ready = false; + if (type == I40E_VSI_MAIN) { + vsi->af_xdp_zc_qps = bitmap_zalloc(pf->num_lan_qps, GFP_KERNEL); + if (!vsi->af_xdp_zc_qps) + goto err_rings; + } + ret = i40e_set_num_rings_in_vsi(vsi); if (ret) goto err_rings; @@ -10102,6 +10108,7 @@ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type) goto unlock_pf; err_rings: + bitmap_free(vsi->af_xdp_zc_qps); pf->next_vsi = i - 1; kfree(vsi); unlock_pf: @@ -10182,6 +10189,7 @@ static int i40e_vsi_clear(struct i40e_vsi *vsi) i40e_put_lump(pf->qp_pile, vsi->base_queue, vsi->idx); i40e_put_lump(pf->irq_pile, vsi->base_vector, vsi->idx); + bitmap_free(vsi->af_xdp_zc_qps); i40e_vsi_free_arrays(vsi, true); i40e_clear_rss_config_user(vsi); diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index b5c182e688e3..1b17486543ac 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -102,6 +102,8 @@ static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem, if (err) return err; + set_bit(qid, vsi->af_xdp_zc_qps); + if_running = netif_running(vsi->netdev) && i40e_enabled_xdp_vsi(vsi); if (if_running) { @@ -148,6 +150,7 @@ static int i40e_xsk_umem_disable(struct i40e_vsi *vsi, u16 qid) return err; } + clear_bit(qid, vsi->af_xdp_zc_qps); i40e_xsk_umem_dma_unmap(vsi, umem); if (if_running) { -- cgit v1.2.3 From 2f94a3125b8742b05a011d62b16f52eb8f9ebe1c Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 28 Mar 2019 11:20:02 +1000 Subject: cifs: fix kref underflow in close_shroot() Fix a bug where we used to not initialize the cached fid structure at all in open_shroot() if the open was successful but we did not get a lease. This would leave the structure uninitialized and later when we close the handle we would in close_shroot() try to kref_put() an uninitialized refcount. Fix this by always initializing this structure if the open was successful but only do the extra get() if we got a lease. This extra get() is only used to hold the structure until we get a lease break from the server at which point we will kref_put() it during lease processing. Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French CC: Stable --- fs/cifs/smb2ops.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 1022a3771e14..7cfafac255aa 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -717,20 +717,18 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid) oparms.fid->mid = le64_to_cpu(o_rsp->sync_hdr.MessageId); #endif /* CIFS_DEBUG2 */ - if (o_rsp->OplockLevel == SMB2_OPLOCK_LEVEL_LEASE) - oplock = smb2_parse_lease_state(server, o_rsp, - &oparms.fid->epoch, - oparms.fid->lease_key); - else - goto oshr_exit; - - memcpy(tcon->crfid.fid, pfid, sizeof(struct cifs_fid)); tcon->crfid.tcon = tcon; tcon->crfid.is_valid = true; kref_init(&tcon->crfid.refcount); - kref_get(&tcon->crfid.refcount); + if (o_rsp->OplockLevel == SMB2_OPLOCK_LEVEL_LEASE) { + kref_get(&tcon->crfid.refcount); + oplock = smb2_parse_lease_state(server, o_rsp, + &oparms.fid->epoch, + oparms.fid->lease_key); + } else + goto oshr_exit; qi_rsp = (struct smb2_query_info_rsp *)rsp_iov[1].iov_base; if (le32_to_cpu(qi_rsp->OutputBufferLength) < sizeof(struct smb2_file_all_info)) -- cgit v1.2.3 From 153322f7536a181e4d1b288aa6f01c0ce65f5c7c Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 28 Mar 2019 22:32:49 -0500 Subject: smb3: Fix enumerating snapshots to Azure Some servers (see MS-SMB2 protocol specification section 3.3.5.15.1) expect that the FSCTL enumerate snapshots is done twice, with the first query having EXACTLY the minimum size response buffer requested (16 bytes) which refreshes the snapshot list (otherwise that and subsequent queries get an empty list returned). So had to add code to set the maximum response size differently for the first snapshot query (which gets the size needed for the second query which contains the actual list of snapshots). Signed-off-by: Steve French Reviewed-by: Ronnie Sahlberg Reviewed-by: Pavel Shilovsky CC: Stable # 4.19+ --- fs/cifs/smb2file.c | 2 +- fs/cifs/smb2ops.c | 44 ++++++++++++++++++++++++++++++++------------ fs/cifs/smb2pdu.c | 35 ++++++++++++++++++++++------------- fs/cifs/smb2proto.h | 5 +++-- 4 files changed, 58 insertions(+), 28 deletions(-) diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index b204e84b87fb..ac97e0c01d4e 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -74,7 +74,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, fid->volatile_fid, FSCTL_LMR_REQUEST_RESILIENCY, true /* is_fsctl */, (char *)&nr_ioctl_req, sizeof(nr_ioctl_req), - NULL, NULL /* no return info */); + CIFSMaxBufSize, NULL, NULL /* no return info */); if (rc == -EOPNOTSUPP) { cifs_dbg(VFS, "resiliency not supported by server, disabling\n"); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 7cfafac255aa..b22be10ee980 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -581,7 +581,7 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon) rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID, FSCTL_QUERY_NETWORK_INTERFACE_INFO, true /* is_fsctl */, NULL /* no data input */, 0 /* no data input */, - (char **)&out_buf, &ret_data_len); + CIFSMaxBufSize, (char **)&out_buf, &ret_data_len); if (rc == -EOPNOTSUPP) { cifs_dbg(FYI, "server does not support query network interfaces\n"); @@ -1297,7 +1297,7 @@ SMB2_request_res_key(const unsigned int xid, struct cifs_tcon *tcon, rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid, FSCTL_SRV_REQUEST_RESUME_KEY, true /* is_fsctl */, - NULL, 0 /* no input */, + NULL, 0 /* no input */, CIFSMaxBufSize, (char **)&res_key, &ret_data_len); if (rc) { @@ -1402,7 +1402,7 @@ smb2_ioctl_query_info(const unsigned int xid, rc = SMB2_ioctl_init(tcon, &rqst[1], COMPOUND_FID, COMPOUND_FID, qi.info_type, true, NULL, - 0); + 0, CIFSMaxBufSize); } } else if (qi.flags == PASSTHRU_QUERY_INFO) { memset(&qi_iov, 0, sizeof(qi_iov)); @@ -1530,8 +1530,8 @@ smb2_copychunk_range(const unsigned int xid, rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid, trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE, true /* is_fsctl */, (char *)pcchunk, - sizeof(struct copychunk_ioctl), (char **)&retbuf, - &ret_data_len); + sizeof(struct copychunk_ioctl), CIFSMaxBufSize, + (char **)&retbuf, &ret_data_len); if (rc == 0) { if (ret_data_len != sizeof(struct copychunk_ioctl_rsp)) { @@ -1691,7 +1691,7 @@ static bool smb2_set_sparse(const unsigned int xid, struct cifs_tcon *tcon, rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, FSCTL_SET_SPARSE, true /* is_fctl */, - &setsparse, 1, NULL, NULL); + &setsparse, 1, CIFSMaxBufSize, NULL, NULL); if (rc) { tcon->broken_sparse_sup = true; cifs_dbg(FYI, "set sparse rc = %d\n", rc); @@ -1764,7 +1764,7 @@ smb2_duplicate_extents(const unsigned int xid, true /* is_fsctl */, (char *)&dup_ext_buf, sizeof(struct duplicate_extents_to_file), - NULL, + CIFSMaxBufSize, NULL, &ret_data_len); if (ret_data_len > 0) @@ -1799,7 +1799,7 @@ smb3_set_integrity(const unsigned int xid, struct cifs_tcon *tcon, true /* is_fsctl */, (char *)&integr_info, sizeof(struct fsctl_set_integrity_information_req), - NULL, + CIFSMaxBufSize, NULL, &ret_data_len); } @@ -1807,6 +1807,8 @@ smb3_set_integrity(const unsigned int xid, struct cifs_tcon *tcon, /* GMT Token is @GMT-YYYY.MM.DD-HH.MM.SS Unicode which is 48 bytes + null */ #define GMT_TOKEN_SIZE 50 +#define MIN_SNAPSHOT_ARRAY_SIZE 16 /* See MS-SMB2 section 3.3.5.15.1 */ + /* * Input buffer contains (empty) struct smb_snapshot array with size filled in * For output see struct SRV_SNAPSHOT_ARRAY in MS-SMB2 section 2.2.32.2 @@ -1818,13 +1820,29 @@ smb3_enum_snapshots(const unsigned int xid, struct cifs_tcon *tcon, char *retbuf = NULL; unsigned int ret_data_len = 0; int rc; + u32 max_response_size; struct smb_snapshot_array snapshot_in; + if (get_user(ret_data_len, (unsigned int __user *)ioc_buf)) + return -EFAULT; + + /* + * Note that for snapshot queries that servers like Azure expect that + * the first query be minimal size (and just used to get the number/size + * of previous versions) so response size must be specified as EXACTLY + * sizeof(struct snapshot_array) which is 16 when rounded up to multiple + * of eight bytes. + */ + if (ret_data_len == 0) + max_response_size = MIN_SNAPSHOT_ARRAY_SIZE; + else + max_response_size = CIFSMaxBufSize; + rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, FSCTL_SRV_ENUMERATE_SNAPSHOTS, true /* is_fsctl */, - NULL, 0 /* no input data */, + NULL, 0 /* no input data */, max_response_size, (char **)&retbuf, &ret_data_len); cifs_dbg(FYI, "enum snaphots ioctl returned %d and ret buflen is %d\n", @@ -2302,7 +2320,7 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID, FSCTL_DFS_GET_REFERRALS, true /* is_fsctl */, - (char *)dfs_req, dfs_req_size, + (char *)dfs_req, dfs_req_size, CIFSMaxBufSize, (char **)&dfs_rsp, &dfs_rsp_size); } while (rc == -EAGAIN); @@ -2656,7 +2674,8 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, rc = SMB2_ioctl_init(tcon, &rqst[num++], cfile->fid.persistent_fid, cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA, true /* is_fctl */, (char *)&fsctl_buf, - sizeof(struct file_zero_data_information)); + sizeof(struct file_zero_data_information), + CIFSMaxBufSize); if (rc) goto zero_range_exit; @@ -2733,7 +2752,8 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA, true /* is_fctl */, (char *)&fsctl_buf, - sizeof(struct file_zero_data_information), NULL, NULL); + sizeof(struct file_zero_data_information), + CIFSMaxBufSize, NULL, NULL); free_xid(xid); return rc; } diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 21ac19ff19cb..0dc66f36ff5b 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1002,7 +1002,8 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID, FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */, - (char *)pneg_inbuf, inbuflen, (char **)&pneg_rsp, &rsplen); + (char *)pneg_inbuf, inbuflen, CIFSMaxBufSize, + (char **)&pneg_rsp, &rsplen); if (rc == -EOPNOTSUPP) { /* * Old Windows versions or Netapp SMB server can return @@ -2478,7 +2479,8 @@ creat_exit: int SMB2_ioctl_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, u64 persistent_fid, u64 volatile_fid, u32 opcode, - bool is_fsctl, char *in_data, u32 indatalen) + bool is_fsctl, char *in_data, u32 indatalen, + __u32 max_response_size) { struct smb2_ioctl_req *req; struct kvec *iov = rqst->rq_iov; @@ -2520,16 +2522,21 @@ SMB2_ioctl_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, req->OutputCount = 0; /* MBZ */ /* - * Could increase MaxOutputResponse, but that would require more - * than one credit. Windows typically sets this smaller, but for some + * In most cases max_response_size is set to 16K (CIFSMaxBufSize) + * We Could increase default MaxOutputResponse, but that could require + * more credits. Windows typically sets this smaller, but for some * ioctls it may be useful to allow server to send more. No point * limiting what the server can send as long as fits in one credit - * Unfortunately - we can not handle more than CIFS_MAX_MSG_SIZE - * (by default, note that it can be overridden to make max larger) - * in responses (except for read responses which can be bigger. - * We may want to bump this limit up + * We can not handle more than CIFS_MAX_BUF_SIZE yet but may want + * to increase this limit up in the future. + * Note that for snapshot queries that servers like Azure expect that + * the first query be minimal size (and just used to get the number/size + * of previous versions) so response size must be specified as EXACTLY + * sizeof(struct snapshot_array) which is 16 when rounded up to multiple + * of eight bytes. Currently that is the only case where we set max + * response size smaller. */ - req->MaxOutputResponse = cpu_to_le32(CIFSMaxBufSize); + req->MaxOutputResponse = cpu_to_le32(max_response_size); if (is_fsctl) req->Flags = cpu_to_le32(SMB2_0_IOCTL_IS_FSCTL); @@ -2550,13 +2557,14 @@ SMB2_ioctl_free(struct smb_rqst *rqst) cifs_small_buf_release(rqst->rq_iov[0].iov_base); /* request */ } + /* * SMB2 IOCTL is used for both IOCTLs and FSCTLs */ int SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, u32 opcode, bool is_fsctl, - char *in_data, u32 indatalen, + char *in_data, u32 indatalen, u32 max_out_data_len, char **out_data, u32 *plen /* returned data len */) { struct smb_rqst rqst; @@ -2593,8 +2601,8 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, rqst.rq_iov = iov; rqst.rq_nvec = SMB2_IOCTL_IOV_SIZE; - rc = SMB2_ioctl_init(tcon, &rqst, persistent_fid, volatile_fid, - opcode, is_fsctl, in_data, indatalen); + rc = SMB2_ioctl_init(tcon, &rqst, persistent_fid, volatile_fid, opcode, + is_fsctl, in_data, indatalen, max_out_data_len); if (rc) goto ioctl_exit; @@ -2672,7 +2680,8 @@ SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon, rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid, FSCTL_SET_COMPRESSION, true /* is_fsctl */, (char *)&fsctl_input /* data input */, - 2 /* in data len */, &ret_data /* out data */, NULL); + 2 /* in data len */, CIFSMaxBufSize /* max out data */, + &ret_data /* out data */, NULL); cifs_dbg(FYI, "set compression rc %d\n", rc); diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 3c32d0cfea69..52df125e9189 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -142,11 +142,12 @@ extern int SMB2_open_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, extern void SMB2_open_free(struct smb_rqst *rqst); extern int SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, u32 opcode, - bool is_fsctl, char *in_data, u32 indatalen, + bool is_fsctl, char *in_data, u32 indatalen, u32 maxoutlen, char **out_data, u32 *plen /* returned data len */); extern int SMB2_ioctl_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, u64 persistent_fid, u64 volatile_fid, u32 opcode, - bool is_fsctl, char *in_data, u32 indatalen); + bool is_fsctl, char *in_data, u32 indatalen, + __u32 max_response_size); extern void SMB2_ioctl_free(struct smb_rqst *rqst); extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id); -- cgit v1.2.3 From ca567eb2b3f014d5be0f44c6f68b01a522f15ca4 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 29 Mar 2019 16:31:07 -0500 Subject: SMB3: Allow persistent handle timeout to be configurable on mount Reconnecting after server or network failure can be improved (to maintain availability and protect data integrity) by allowing the client to choose the default persistent (or resilient) handle timeout in some use cases. Today we default to 0 which lets the server pick the default timeout (usually 120 seconds) but this can be problematic for some workloads. Add the new mount parameter to cifs.ko for SMB3 mounts "handletimeout" which enables the user to override the default handle timeout for persistent (mount option "persistenthandles") or resilient handles (mount option "resilienthandles"). Maximum allowed is 16 minutes (960000 ms). Units for the timeout are expressed in milliseconds. See section 2.2.14.2.12 and 2.2.31.3 of the MS-SMB2 protocol specification for more information. Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky Reviewed-by: Ronnie Sahlberg CC: Stable --- fs/cifs/cifsfs.c | 2 ++ fs/cifs/cifsglob.h | 8 ++++++++ fs/cifs/connect.c | 30 +++++++++++++++++++++++++++++- fs/cifs/smb2file.c | 4 +++- fs/cifs/smb2pdu.c | 14 +++++++++++--- 5 files changed, 53 insertions(+), 5 deletions(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index f9b71c12cc9f..a05bf1d6e1d0 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -559,6 +559,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root) tcon->ses->server->echo_interval / HZ); if (tcon->snapshot_time) seq_printf(s, ",snapshot=%llu", tcon->snapshot_time); + if (tcon->handle_timeout) + seq_printf(s, ",handletimeout=%u", tcon->handle_timeout); /* convert actimeo and display it in seconds */ seq_printf(s, ",actimeo=%lu", cifs_sb->actimeo / HZ); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 38feae812b47..5b18d4585740 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -59,6 +59,12 @@ */ #define CIFS_MAX_ACTIMEO (1 << 30) +/* + * Max persistent and resilient handle timeout (milliseconds). + * Windows durable max was 960000 (16 minutes) + */ +#define SMB3_MAX_HANDLE_TIMEOUT 960000 + /* * MAX_REQ is the maximum number of requests that WE will send * on one socket concurrently. @@ -586,6 +592,7 @@ struct smb_vol { struct nls_table *local_nls; unsigned int echo_interval; /* echo interval in secs */ __u64 snapshot_time; /* needed for timewarp tokens */ + __u32 handle_timeout; /* persistent and durable handle timeout in ms */ unsigned int max_credits; /* smb3 max_credits 10 < credits < 60000 */ }; @@ -1058,6 +1065,7 @@ struct cifs_tcon { __u32 vol_serial_number; __le64 vol_create_time; __u64 snapshot_time; /* for timewarp tokens - timestamp of snapshot */ + __u32 handle_timeout; /* persistent and durable handle timeout in ms */ __u32 ss_flags; /* sector size flags */ __u32 perf_sector_size; /* best sector size for perf */ __u32 max_chunks; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index a8e9738db691..4c0e44489f21 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -103,7 +103,7 @@ enum { Opt_cruid, Opt_gid, Opt_file_mode, Opt_dirmode, Opt_port, Opt_blocksize, Opt_rsize, Opt_wsize, Opt_actimeo, - Opt_echo_interval, Opt_max_credits, + Opt_echo_interval, Opt_max_credits, Opt_handletimeout, Opt_snapshot, /* Mount options which take string value */ @@ -208,6 +208,7 @@ static const match_table_t cifs_mount_option_tokens = { { Opt_rsize, "rsize=%s" }, { Opt_wsize, "wsize=%s" }, { Opt_actimeo, "actimeo=%s" }, + { Opt_handletimeout, "handletimeout=%s" }, { Opt_echo_interval, "echo_interval=%s" }, { Opt_max_credits, "max_credits=%s" }, { Opt_snapshot, "snapshot=%s" }, @@ -1619,6 +1620,9 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, vol->actimeo = CIFS_DEF_ACTIMEO; + /* Most clients set timeout to 0, allows server to use its default */ + vol->handle_timeout = 0; /* See MS-SMB2 spec section 2.2.14.2.12 */ + /* offer SMB2.1 and later (SMB3 etc). Secure and widely accepted */ vol->ops = &smb30_operations; vol->vals = &smbdefault_values; @@ -2017,6 +2021,18 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, goto cifs_parse_mount_err; } break; + case Opt_handletimeout: + if (get_option_ul(args, &option)) { + cifs_dbg(VFS, "%s: Invalid handletimeout value\n", + __func__); + goto cifs_parse_mount_err; + } + vol->handle_timeout = option; + if (vol->handle_timeout > SMB3_MAX_HANDLE_TIMEOUT) { + cifs_dbg(VFS, "Invalid handle cache timeout, longer than 16 minutes\n"); + goto cifs_parse_mount_err; + } + break; case Opt_echo_interval: if (get_option_ul(args, &option)) { cifs_dbg(VFS, "%s: Invalid echo interval value\n", @@ -3183,6 +3199,8 @@ static int match_tcon(struct cifs_tcon *tcon, struct smb_vol *volume_info) return 0; if (tcon->snapshot_time != volume_info->snapshot_time) return 0; + if (tcon->handle_timeout != volume_info->handle_timeout) + return 0; return 1; } @@ -3297,6 +3315,16 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info) tcon->snapshot_time = volume_info->snapshot_time; } + if (volume_info->handle_timeout) { + if (ses->server->vals->protocol_id == 0) { + cifs_dbg(VFS, + "Use SMB2.1 or later for handle timeout option\n"); + rc = -EOPNOTSUPP; + goto out_fail; + } else + tcon->handle_timeout = volume_info->handle_timeout; + } + tcon->ses = ses; if (volume_info->password) { tcon->password = kstrdup(volume_info->password, GFP_KERNEL); diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index ac97e0c01d4e..54bffb2a1786 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -68,7 +68,9 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, if (oparms->tcon->use_resilient) { - nr_ioctl_req.Timeout = 0; /* use server default (120 seconds) */ + /* default timeout is 0, servers pick default (120 seconds) */ + nr_ioctl_req.Timeout = + cpu_to_le32(oparms->tcon->handle_timeout); nr_ioctl_req.Reserved = 0; rc = SMB2_ioctl(xid, oparms->tcon, fid->persistent_fid, fid->volatile_fid, FSCTL_LMR_REQUEST_RESILIENCY, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 0dc66f36ff5b..21ad01d55ab2 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1859,8 +1859,9 @@ add_lease_context(struct TCP_Server_Info *server, struct kvec *iov, } static struct create_durable_v2 * -create_durable_v2_buf(struct cifs_fid *pfid) +create_durable_v2_buf(struct cifs_open_parms *oparms) { + struct cifs_fid *pfid = oparms->fid; struct create_durable_v2 *buf; buf = kzalloc(sizeof(struct create_durable_v2), GFP_KERNEL); @@ -1874,7 +1875,14 @@ create_durable_v2_buf(struct cifs_fid *pfid) (struct create_durable_v2, Name)); buf->ccontext.NameLength = cpu_to_le16(4); - buf->dcontext.Timeout = 0; /* Should this be configurable by workload */ + /* + * NB: Handle timeout defaults to 0, which allows server to choose + * (most servers default to 120 seconds) and most clients default to 0. + * This can be overridden at mount ("handletimeout=") if the user wants + * a different persistent (or resilient) handle timeout for all opens + * opens on a particular SMB3 mount. + */ + buf->dcontext.Timeout = cpu_to_le32(oparms->tcon->handle_timeout); buf->dcontext.Flags = cpu_to_le32(SMB2_DHANDLE_FLAG_PERSISTENT); generate_random_uuid(buf->dcontext.CreateGuid); memcpy(pfid->create_guid, buf->dcontext.CreateGuid, 16); @@ -1927,7 +1935,7 @@ add_durable_v2_context(struct kvec *iov, unsigned int *num_iovec, struct smb2_create_req *req = iov[0].iov_base; unsigned int num = *num_iovec; - iov[num].iov_base = create_durable_v2_buf(oparms->fid); + iov[num].iov_base = create_durable_v2_buf(oparms); if (iov[num].iov_base == NULL) return -ENOMEM; iov[num].iov_len = sizeof(struct create_durable_v2); -- cgit v1.2.3 From 4811e3096daaa56e145a1d2bec45e2e9fe790729 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Mon, 1 Apr 2019 09:53:44 +1000 Subject: cifs: a smb2_validate_and_copy_iov failure does not mean the handle is invalid. It only means that we do not have a valid cached value for the file_all_info structure. CC: Stable Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky --- fs/cifs/smb2ops.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index b22be10ee980..00225e699d03 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -733,14 +733,12 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid) qi_rsp = (struct smb2_query_info_rsp *)rsp_iov[1].iov_base; if (le32_to_cpu(qi_rsp->OutputBufferLength) < sizeof(struct smb2_file_all_info)) goto oshr_exit; - rc = smb2_validate_and_copy_iov( + if (!smb2_validate_and_copy_iov( le16_to_cpu(qi_rsp->OutputBufferOffset), sizeof(struct smb2_file_all_info), &rsp_iov[1], sizeof(struct smb2_file_all_info), - (char *)&tcon->crfid.file_all_info); - if (rc) - goto oshr_exit; - tcon->crfid.file_all_info_is_valid = 1; + (char *)&tcon->crfid.file_all_info)) + tcon->crfid.file_all_info_is_valid = 1; oshr_exit: mutex_unlock(&tcon->crfid.fid_mutex); -- cgit v1.2.3 From 0d74e6a3b6421d98eeafbed26f29156d469bc0b5 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 13 Mar 2019 07:56:02 -0400 Subject: dm integrity: change memcmp to strncmp in dm_integrity_ctr If the string opt_string is small, the function memcmp can access bytes that are beyond the terminating nul character. In theory, it could cause segfault, if opt_string were located just below some unmapped memory. Change from memcmp to strncmp so that we don't read bytes beyond the end of the string. Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index d57d997a52c8..33fac437569f 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -3185,7 +3185,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) journal_watermark = val; else if (sscanf(opt_string, "commit_time:%u%c", &val, &dummy) == 1) sync_msec = val; - else if (!memcmp(opt_string, "meta_device:", strlen("meta_device:"))) { + else if (!strncmp(opt_string, "meta_device:", strlen("meta_device:"))) { if (ic->meta_dev) { dm_put_device(ti, ic->meta_dev); ic->meta_dev = NULL; @@ -3204,17 +3204,17 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } ic->sectors_per_block = val >> SECTOR_SHIFT; - } else if (!memcmp(opt_string, "internal_hash:", strlen("internal_hash:"))) { + } else if (!strncmp(opt_string, "internal_hash:", strlen("internal_hash:"))) { r = get_alg_and_key(opt_string, &ic->internal_hash_alg, &ti->error, "Invalid internal_hash argument"); if (r) goto bad; - } else if (!memcmp(opt_string, "journal_crypt:", strlen("journal_crypt:"))) { + } else if (!strncmp(opt_string, "journal_crypt:", strlen("journal_crypt:"))) { r = get_alg_and_key(opt_string, &ic->journal_crypt_alg, &ti->error, "Invalid journal_crypt argument"); if (r) goto bad; - } else if (!memcmp(opt_string, "journal_mac:", strlen("journal_mac:"))) { + } else if (!strncmp(opt_string, "journal_mac:", strlen("journal_mac:"))) { r = get_alg_and_key(opt_string, &ic->journal_mac_alg, &ti->error, "Invalid journal_mac argument"); if (r) -- cgit v1.2.3 From 5efedc9b62b5cf0ccc84ed427a07f0d2485091c4 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 22 Mar 2019 22:16:34 +0800 Subject: dm integrity: make dm_integrity_init and dm_integrity_exit static Fix sparse warnings: drivers/md/dm-integrity.c:3619:12: warning: symbol 'dm_integrity_init' was not declared. Should it be static? drivers/md/dm-integrity.c:3638:6: warning: symbol 'dm_integrity_exit' was not declared. Should it be static? Signed-off-by: YueHaibing Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 33fac437569f..94b865c5ce71 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -3616,7 +3616,7 @@ static struct target_type integrity_target = { .io_hints = dm_integrity_io_hints, }; -int __init dm_integrity_init(void) +static int __init dm_integrity_init(void) { int r; @@ -3635,7 +3635,7 @@ int __init dm_integrity_init(void) return r; } -void dm_integrity_exit(void) +static void __exit dm_integrity_exit(void) { dm_unregister_target(&integrity_target); kmem_cache_destroy(journal_io_cache); -- cgit v1.2.3 From 93fc91675a6c84d6ab355188aea398bda2cc51f8 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 21 Mar 2019 15:00:09 -0700 Subject: dm init: fix const confusion for dm_allowed_targets array A non const pointer to const cannot be marked initconst. Mark the array actually const. Fixes: 6bbc923dfcf5 dm: add support to directly boot to a mapped device Signed-off-by: Andi Kleen Signed-off-by: Mike Snitzer --- drivers/md/dm-init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-init.c b/drivers/md/dm-init.c index b53f30f16b4d..4b76f84424c3 100644 --- a/drivers/md/dm-init.c +++ b/drivers/md/dm-init.c @@ -36,7 +36,7 @@ struct dm_device { struct list_head list; }; -const char *dm_allowed_targets[] __initconst = { +const char * const dm_allowed_targets[] __initconst = { "crypt", "delay", "linear", -- cgit v1.2.3 From 75ae193626de3238ca5fb895868ec91c94e63b1b Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 21 Mar 2019 16:46:12 -0400 Subject: dm: revert 8f50e358153d ("dm: limit the max bio size as BIO_MAX_PAGES * PAGE_SIZE") The limit was already incorporated to dm-crypt with commit 4e870e948fba ("dm crypt: fix error with too large bios"), so we don't need to apply it globally to all targets. The quantity BIO_MAX_PAGES * PAGE_SIZE is wrong anyway because the variable ti->max_io_len it is supposed to be in the units of 512-byte sectors not in bytes. Reduction of the limit to 1048576 sectors could even cause data corruption in rare cases - suppose that we have a dm-striped device with stripe size 768MiB. The target will call dm_set_target_max_io_len with the value 1572864. The buggy code would reduce it to 1048576. Now, the dm-core will errorneously split the bios on 1048576-sector boundary insetad of 1572864-sector boundary and pass these stripe-crossing bios to the striped target. Cc: stable@vger.kernel.org # v4.16+ Fixes: 8f50e358153d ("dm: limit the max bio size as BIO_MAX_PAGES * PAGE_SIZE") Signed-off-by: Mikulas Patocka Acked-by: Ming Lei Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 68d24056d0b1..89b04169658d 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1042,15 +1042,7 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len) return -EINVAL; } - /* - * BIO based queue uses its own splitting. When multipage bvecs - * is switched on, size of the incoming bio may be too big to - * be handled in some targets, such as crypt. - * - * When these targets are ready for the big bio, we can remove - * the limit. - */ - ti->max_io_len = min_t(uint32_t, len, BIO_MAX_PAGES * PAGE_SIZE); + ti->max_io_len = (uint32_t) len; return 0; } -- cgit v1.2.3 From eb40c0acdc342b815d4d03ae6abb09e80c0f2988 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 26 Mar 2019 20:20:58 +0100 Subject: dm table: propagate BDI_CAP_STABLE_WRITES to fix sporadic checksum errors Some devices don't use blk_integrity but still want stable pages because they do their own checksumming. Examples include rbd and iSCSI when data digests are negotiated. Stacking DM (and thus LVM) on top of these devices results in sporadic checksum errors. Set BDI_CAP_STABLE_WRITES if any underlying device has it set. Cc: stable@vger.kernel.org Signed-off-by: Ilya Dryomov Signed-off-by: Mike Snitzer --- drivers/md/dm-table.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index ba9481f1bf3c..cde3b49b2a91 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1844,6 +1844,36 @@ static bool dm_table_supports_secure_erase(struct dm_table *t) return true; } +static int device_requires_stable_pages(struct dm_target *ti, + struct dm_dev *dev, sector_t start, + sector_t len, void *data) +{ + struct request_queue *q = bdev_get_queue(dev->bdev); + + return q && bdi_cap_stable_pages_required(q->backing_dev_info); +} + +/* + * If any underlying device requires stable pages, a table must require + * them as well. Only targets that support iterate_devices are considered: + * don't want error, zero, etc to require stable pages. + */ +static bool dm_table_requires_stable_pages(struct dm_table *t) +{ + struct dm_target *ti; + unsigned i; + + for (i = 0; i < dm_table_get_num_targets(t); i++) { + ti = dm_table_get_target(t, i); + + if (ti->type->iterate_devices && + ti->type->iterate_devices(ti, device_requires_stable_pages, NULL)) + return true; + } + + return false; +} + void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, struct queue_limits *limits) { @@ -1896,6 +1926,15 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, dm_table_verify_integrity(t); + /* + * Some devices don't use blk_integrity but still want stable pages + * because they do their own checksumming. + */ + if (dm_table_requires_stable_pages(t)) + q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; + else + q->backing_dev_info->capabilities &= ~BDI_CAP_STABLE_WRITES; + /* * Determine whether or not this queue's I/O timings contribute * to the entropy pool, Only request-based targets use this. -- cgit v1.2.3 From 556a888a14afe27164191955618990fb3ccc9aad Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Sat, 30 Mar 2019 03:12:32 +0100 Subject: signal: don't silently convert SI_USER signals to non-current pidfd The current sys_pidfd_send_signal() silently turns signals with explicit SI_USER context that are sent to non-current tasks into signals with kernel-generated siginfo. This is unlike do_rt_sigqueueinfo(), which returns -EPERM in this case. If a user actually wants to send a signal with kernel-provided siginfo, they can do that with pidfd_send_signal(pidfd, sig, NULL, 0); so allowing this case is unnecessary. Instead of silently replacing the siginfo, just bail out with an error; this is consistent with other interfaces and avoids special-casing behavior based on security checks. Fixes: 3eb39f47934f ("signal: add pidfd_send_signal() syscall") Signed-off-by: Jann Horn Signed-off-by: Christian Brauner --- kernel/signal.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/kernel/signal.c b/kernel/signal.c index b7953934aa99..f98448cf2def 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -3605,16 +3605,11 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig, if (unlikely(sig != kinfo.si_signo)) goto err; + /* Only allow sending arbitrary signals to yourself. */ + ret = -EPERM; if ((task_pid(current) != pid) && - (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL)) { - /* Only allow sending arbitrary signals to yourself. */ - ret = -EPERM; - if (kinfo.si_code != SI_USER) - goto err; - - /* Turn this into a regular kill signal. */ - prepare_kill_siginfo(sig, &kinfo); - } + (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL)) + goto err; } else { prepare_kill_siginfo(sig, &kinfo); } -- cgit v1.2.3 From 0db6f8befc32c68bb13d7ffbb2e563c79e913e13 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 28 Mar 2019 10:35:06 +0100 Subject: net/sched: fix ->get helper of the matchall cls It returned always NULL, thus it was never possible to get the filter. Example: $ ip link add foo type dummy $ ip link add bar type dummy $ tc qdisc add dev foo clsact $ tc filter add dev foo protocol all pref 1 ingress handle 1234 \ matchall action mirred ingress mirror dev bar Before the patch: $ tc filter get dev foo protocol all pref 1 ingress handle 1234 matchall Error: Specified filter handle not found. We have an error talking to the kernel After: $ tc filter get dev foo protocol all pref 1 ingress handle 1234 matchall filter ingress protocol all pref 1 matchall chain 0 handle 0x4d2 not_in_hw action order 1: mirred (Ingress Mirror to device bar) pipe index 1 ref 1 bind 1 CC: Yotam Gigi CC: Jiri Pirko Fixes: fd62d9f5c575 ("net/sched: matchall: Fix configuration race") Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/sched/cls_matchall.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 459921bd3d87..a13bc351a414 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -130,6 +130,11 @@ static void mall_destroy(struct tcf_proto *tp, bool rtnl_held, static void *mall_get(struct tcf_proto *tp, u32 handle) { + struct cls_mall_head *head = rtnl_dereference(tp->root); + + if (head && head->handle == handle) + return head; + return NULL; } -- cgit v1.2.3 From 4ab526468344c11d2d1807ae95feb1f5305dc014 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 1 Apr 2019 17:03:45 +0200 Subject: cpufreq/intel_pstate: Load only on Intel hardware This driver is Intel-only so loading on anything which is not Intel is pointless. Prevent it from doing so. While at it, correct the "not supported" print statement to say CPU "model" which is what that test does. Fixes: 076b862c7e44 (cpufreq: intel_pstate: Add reasons for failure and debug messages) Suggested-by: Erwan Velu Signed-off-by: Borislav Petkov Reviewed-by: Thomas Renninger Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index b599c7318aab..2986119dd31f 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2596,6 +2596,9 @@ static int __init intel_pstate_init(void) const struct x86_cpu_id *id; int rc; + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return -ENODEV; + if (no_load) return -ENODEV; @@ -2611,7 +2614,7 @@ static int __init intel_pstate_init(void) } else { id = x86_match_cpu(intel_pstate_cpu_ids); if (!id) { - pr_info("CPU ID not supported\n"); + pr_info("CPU model not supported\n"); return -ENODEV; } -- cgit v1.2.3 From 5dd431b6b92c0db324d134d2a4006dd4f87f2261 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 28 Mar 2019 16:53:12 +0100 Subject: net: sched: introduce and use qstats read helpers Classful qdiscs can't access directly the child qdiscs backlog length: if such qdisc is NOLOCK, per CPU values should be accounted instead. Most qdiscs no not respect the above. As a result, qstats fetching for most classful qdisc is currently incorrect: if the child qdisc is NOLOCK, it always reports 0 len backlog. This change introduces a pair of helpers to safely fetch both backlog and qlen and use them in stats class dumping functions, fixing the above issue and cleaning a bit the code. DRR needs also to access the child qdisc queue length, so it needs custom handling. Fixes: c5ad119fb6c0 ("net: sched: pfifo_fast use skb_array") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/sch_generic.h | 18 ++++++++++++++++++ net/sched/sch_cbq.c | 4 +++- net/sched/sch_drr.c | 5 +++-- net/sched/sch_hfsc.c | 5 +++-- net/sched/sch_htb.c | 7 +++---- net/sched/sch_mq.c | 2 +- net/sched/sch_mqprio.c | 3 +-- net/sched/sch_multiq.c | 2 +- net/sched/sch_prio.c | 2 +- net/sched/sch_qfq.c | 3 +-- net/sched/sch_taprio.c | 2 +- 11 files changed, 36 insertions(+), 17 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 7d1a0483a17b..43e4e17aa938 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -923,6 +923,24 @@ static inline void qdisc_qstats_overlimit(struct Qdisc *sch) sch->qstats.overlimits++; } +static inline int qdisc_qstats_copy(struct gnet_dump *d, struct Qdisc *sch) +{ + __u32 qlen = qdisc_qlen_sum(sch); + + return gnet_stats_copy_queue(d, sch->cpu_qstats, &sch->qstats, qlen); +} + +static inline void qdisc_qstats_qlen_backlog(struct Qdisc *sch, __u32 *qlen, + __u32 *backlog) +{ + struct gnet_stats_queue qstats = { 0 }; + __u32 len = qdisc_qlen_sum(sch); + + __gnet_stats_copy_queue(&qstats, sch->cpu_qstats, &sch->qstats, len); + *qlen = qstats.qlen; + *backlog = qstats.backlog; +} + static inline void qdisc_skb_head_init(struct qdisc_skb_head *qh) { qh->head = NULL; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 4dc05409e3fb..651879c1b655 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1358,9 +1358,11 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg, { struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *cl = (struct cbq_class *)arg; + __u32 qlen; cl->xstats.avgidle = cl->avgidle; cl->xstats.undertime = 0; + qdisc_qstats_qlen_backlog(cl->q, &qlen, &cl->qstats.backlog); if (cl->undertime != PSCHED_PASTPERFECT) cl->xstats.undertime = cl->undertime - q->now; @@ -1368,7 +1370,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg, if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || - gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->q->q.qlen) < 0) + gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0) return -1; return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats)); diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 09b800991065..8a181591b0ea 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -269,7 +269,8 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) { struct drr_class *cl = (struct drr_class *)arg; - __u32 qlen = cl->qdisc->q.qlen; + __u32 qlen = qdisc_qlen_sum(cl->qdisc); + struct Qdisc *cl_q = cl->qdisc; struct tc_drr_stats xstats; memset(&xstats, 0, sizeof(xstats)); @@ -279,7 +280,7 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg, if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || - gnet_stats_copy_queue(d, NULL, &cl->qdisc->qstats, qlen) < 0) + gnet_stats_copy_queue(d, cl_q->cpu_qstats, &cl_q->qstats, qlen) < 0) return -1; return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 24cc220a3218..a946a419d717 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1328,8 +1328,9 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg, { struct hfsc_class *cl = (struct hfsc_class *)arg; struct tc_hfsc_stats xstats; + __u32 qlen; - cl->qstats.backlog = cl->qdisc->qstats.backlog; + qdisc_qstats_qlen_backlog(cl->qdisc, &qlen, &cl->qstats.backlog); xstats.level = cl->level; xstats.period = cl->cl_vtperiod; xstats.work = cl->cl_total; @@ -1337,7 +1338,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg, if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || - gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->qdisc->q.qlen) < 0) + gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0) return -1; return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 30f9da7e1076..ed92836f528a 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1127,10 +1127,9 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) }; __u32 qlen = 0; - if (!cl->level && cl->leaf.q) { - qlen = cl->leaf.q->q.qlen; - qs.backlog = cl->leaf.q->qstats.backlog; - } + if (!cl->level && cl->leaf.q) + qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog); + cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens), INT_MIN, INT_MAX); cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens), diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index 203659bc3906..3a3312467692 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -249,7 +249,7 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl, sch = dev_queue->qdisc_sleeping; if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 || - gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0) + qdisc_qstats_copy(d, sch) < 0) return -1; return 0; } diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index d364e63c396d..ea0dc112b38d 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -561,8 +561,7 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, sch = dev_queue->qdisc_sleeping; if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &sch->bstats) < 0 || - gnet_stats_copy_queue(d, NULL, - &sch->qstats, sch->q.qlen) < 0) + qdisc_qstats_copy(d, sch) < 0) return -1; } return 0; diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 7410ce4d0321..53c918a11378 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -344,7 +344,7 @@ static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl, cl_q = q->queues[cl - 1]; if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl_q->bstats) < 0 || - gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0) + qdisc_qstats_copy(d, cl_q) < 0) return -1; return 0; diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 847141cd900f..dfb06d5bfacc 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -365,7 +365,7 @@ static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl, cl_q = q->queues[cl - 1]; if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl_q->bstats) < 0 || - gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0) + qdisc_qstats_copy(d, cl_q) < 0) return -1; return 0; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 29f5c4a24688..9fbda3ec5861 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -655,8 +655,7 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg, if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || - gnet_stats_copy_queue(d, NULL, - &cl->qdisc->qstats, cl->qdisc->q.qlen) < 0) + qdisc_qstats_copy(d, cl->qdisc) < 0) return -1; return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 206e4dbed12f..c7041999eb5d 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -895,7 +895,7 @@ static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, sch = dev_queue->qdisc_sleeping; if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 || - gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0) + qdisc_qstats_copy(d, sch) < 0) return -1; return 0; } -- cgit v1.2.3 From e5f0e8f8e456589d56e4955154ed5d468cd6d286 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 28 Mar 2019 16:53:13 +0100 Subject: net: sched: introduce and use qdisc tree flush/purge helpers The same code to flush qdisc tree and purge the qdisc queue is duplicated in many places and in most cases it does not respect NOLOCK qdisc: the global backlog len is used and the per CPU values are ignored. This change addresses the above, factoring-out the relevant code and using the helpers introduced by the previous patch to fetch the correct backlog len. Fixes: c5ad119fb6c0 ("net: sched: pfifo_fast use skb_array") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/sch_generic.h | 26 +++++++++++++++++++------- net/sched/sch_cbq.c | 6 +----- net/sched/sch_drr.c | 11 +---------- net/sched/sch_hfsc.c | 14 ++------------ net/sched/sch_htb.c | 15 +++------------ net/sched/sch_multiq.c | 8 +++----- net/sched/sch_prio.c | 8 ++------ net/sched/sch_qfq.c | 11 +---------- net/sched/sch_red.c | 3 +-- net/sched/sch_sfb.c | 3 +-- net/sched/sch_tbf.c | 3 +-- 11 files changed, 35 insertions(+), 73 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 43e4e17aa938..a2b38b3deeca 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -941,6 +941,23 @@ static inline void qdisc_qstats_qlen_backlog(struct Qdisc *sch, __u32 *qlen, *backlog = qstats.backlog; } +static inline void qdisc_tree_flush_backlog(struct Qdisc *sch) +{ + __u32 qlen, backlog; + + qdisc_qstats_qlen_backlog(sch, &qlen, &backlog); + qdisc_tree_reduce_backlog(sch, qlen, backlog); +} + +static inline void qdisc_purge_queue(struct Qdisc *sch) +{ + __u32 qlen, backlog; + + qdisc_qstats_qlen_backlog(sch, &qlen, &backlog); + qdisc_reset(sch); + qdisc_tree_reduce_backlog(sch, qlen, backlog); +} + static inline void qdisc_skb_head_init(struct qdisc_skb_head *qh) { qh->head = NULL; @@ -1124,13 +1141,8 @@ static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new, sch_tree_lock(sch); old = *pold; *pold = new; - if (old != NULL) { - unsigned int qlen = old->q.qlen; - unsigned int backlog = old->qstats.backlog; - - qdisc_reset(old); - qdisc_tree_reduce_backlog(old, qlen, backlog); - } + if (old != NULL) + qdisc_tree_flush_backlog(old); sch_tree_unlock(sch); return old; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 651879c1b655..114b9048ea7e 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1667,17 +1667,13 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg) { struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *cl = (struct cbq_class *)arg; - unsigned int qlen, backlog; if (cl->filters || cl->children || cl == &q->link) return -EBUSY; sch_tree_lock(sch); - qlen = cl->q->q.qlen; - backlog = cl->q->qstats.backlog; - qdisc_reset(cl->q); - qdisc_tree_reduce_backlog(cl->q, qlen, backlog); + qdisc_purge_queue(cl->q); if (cl->next_alive) cbq_deactivate_class(cl); diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 8a181591b0ea..430df9a55ec4 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -50,15 +50,6 @@ static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid) return container_of(clc, struct drr_class, common); } -static void drr_purge_queue(struct drr_class *cl) -{ - unsigned int len = cl->qdisc->q.qlen; - unsigned int backlog = cl->qdisc->qstats.backlog; - - qdisc_reset(cl->qdisc); - qdisc_tree_reduce_backlog(cl->qdisc, len, backlog); -} - static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = { [TCA_DRR_QUANTUM] = { .type = NLA_U32 }, }; @@ -167,7 +158,7 @@ static int drr_delete_class(struct Qdisc *sch, unsigned long arg) sch_tree_lock(sch); - drr_purge_queue(cl); + qdisc_purge_queue(cl->qdisc); qdisc_class_hash_remove(&q->clhash, &cl->common); sch_tree_unlock(sch); diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index a946a419d717..d2ab463f22ae 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -844,16 +844,6 @@ qdisc_peek_len(struct Qdisc *sch) return len; } -static void -hfsc_purge_queue(struct Qdisc *sch, struct hfsc_class *cl) -{ - unsigned int len = cl->qdisc->q.qlen; - unsigned int backlog = cl->qdisc->qstats.backlog; - - qdisc_reset(cl->qdisc); - qdisc_tree_reduce_backlog(cl->qdisc, len, backlog); -} - static void hfsc_adjust_levels(struct hfsc_class *cl) { @@ -1076,7 +1066,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, qdisc_class_hash_insert(&q->clhash, &cl->cl_common); list_add_tail(&cl->siblings, &parent->children); if (parent->level == 0) - hfsc_purge_queue(sch, parent); + qdisc_purge_queue(parent->qdisc); hfsc_adjust_levels(parent); sch_tree_unlock(sch); @@ -1112,7 +1102,7 @@ hfsc_delete_class(struct Qdisc *sch, unsigned long arg) list_del(&cl->siblings); hfsc_adjust_levels(cl->cl_parent); - hfsc_purge_queue(sch, cl); + qdisc_purge_queue(cl->qdisc); qdisc_class_hash_remove(&q->clhash, &cl->cl_common); sch_tree_unlock(sch); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index ed92836f528a..2f9883b196e8 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1269,13 +1269,8 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) sch_tree_lock(sch); - if (!cl->level) { - unsigned int qlen = cl->leaf.q->q.qlen; - unsigned int backlog = cl->leaf.q->qstats.backlog; - - qdisc_reset(cl->leaf.q); - qdisc_tree_reduce_backlog(cl->leaf.q, qlen, backlog); - } + if (!cl->level) + qdisc_purge_queue(cl->leaf.q); /* delete from hash and active; remainder in destroy_class */ qdisc_class_hash_remove(&q->clhash, &cl->common); @@ -1403,12 +1398,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, classid, NULL); sch_tree_lock(sch); if (parent && !parent->level) { - unsigned int qlen = parent->leaf.q->q.qlen; - unsigned int backlog = parent->leaf.q->qstats.backlog; - /* turn parent into inner node */ - qdisc_reset(parent->leaf.q); - qdisc_tree_reduce_backlog(parent->leaf.q, qlen, backlog); + qdisc_purge_queue(parent->leaf.q); qdisc_put(parent->leaf.q); if (parent->prio_activity) htb_deactivate(q, parent); diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 53c918a11378..35b03ae08e0f 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -201,9 +201,9 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt, for (i = q->bands; i < q->max_bands; i++) { if (q->queues[i] != &noop_qdisc) { struct Qdisc *child = q->queues[i]; + q->queues[i] = &noop_qdisc; - qdisc_tree_reduce_backlog(child, child->q.qlen, - child->qstats.backlog); + qdisc_tree_flush_backlog(child); qdisc_put(child); } } @@ -225,9 +225,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt, qdisc_hash_add(child, true); if (old != &noop_qdisc) { - qdisc_tree_reduce_backlog(old, - old->q.qlen, - old->qstats.backlog); + qdisc_tree_flush_backlog(old); qdisc_put(old); } sch_tree_unlock(sch); diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index dfb06d5bfacc..d519b21535b3 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -216,12 +216,8 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt, q->bands = qopt->bands; memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); - for (i = q->bands; i < oldbands; i++) { - struct Qdisc *child = q->queues[i]; - - qdisc_tree_reduce_backlog(child, child->q.qlen, - child->qstats.backlog); - } + for (i = q->bands; i < oldbands; i++) + qdisc_tree_flush_backlog(q->queues[i]); for (i = oldbands; i < q->bands; i++) { q->queues[i] = queues[i]; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 9fbda3ec5861..1589364b54da 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -217,15 +217,6 @@ static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid) return container_of(clc, struct qfq_class, common); } -static void qfq_purge_queue(struct qfq_class *cl) -{ - unsigned int len = cl->qdisc->q.qlen; - unsigned int backlog = cl->qdisc->qstats.backlog; - - qdisc_reset(cl->qdisc); - qdisc_tree_reduce_backlog(cl->qdisc, len, backlog); -} - static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = { [TCA_QFQ_WEIGHT] = { .type = NLA_U32 }, [TCA_QFQ_LMAX] = { .type = NLA_U32 }, @@ -551,7 +542,7 @@ static int qfq_delete_class(struct Qdisc *sch, unsigned long arg) sch_tree_lock(sch); - qfq_purge_queue(cl); + qdisc_purge_queue(cl->qdisc); qdisc_class_hash_remove(&q->clhash, &cl->common); sch_tree_unlock(sch); diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 9df9942340ea..4e8c0abf6194 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -233,8 +233,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt, q->flags = ctl->flags; q->limit = ctl->limit; if (child) { - qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, - q->qdisc->qstats.backlog); + qdisc_tree_flush_backlog(q->qdisc); old_child = q->qdisc; q->qdisc = child; } diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index bab506b01a32..2419fdb75966 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -521,8 +521,7 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt, qdisc_hash_add(child, true); sch_tree_lock(sch); - qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, - q->qdisc->qstats.backlog); + qdisc_tree_flush_backlog(q->qdisc); qdisc_put(q->qdisc); q->qdisc = child; diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 7f272a9070c5..f71578dbb9e3 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -391,8 +391,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt, sch_tree_lock(sch); if (child) { - qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, - q->qdisc->qstats.backlog); + qdisc_tree_flush_backlog(q->qdisc); qdisc_put(q->qdisc); q->qdisc = child; } -- cgit v1.2.3 From 3c446e6f96997f2a95bf0037ef463802162d2323 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 29 Mar 2019 12:19:46 +0100 Subject: kcm: switch order of device registration to fix a crash When kcm is loaded while many processes try to create a KCM socket, a crash occurs: BUG: unable to handle kernel NULL pointer dereference at 000000000000000e IP: mutex_lock+0x27/0x40 kernel/locking/mutex.c:240 PGD 8000000016ef2067 P4D 8000000016ef2067 PUD 3d6e9067 PMD 0 Oops: 0002 [#1] SMP KASAN PTI CPU: 0 PID: 7005 Comm: syz-executor.5 Not tainted 4.12.14-396-default #1 SLE15-SP1 (unreleased) RIP: 0010:mutex_lock+0x27/0x40 kernel/locking/mutex.c:240 RSP: 0018:ffff88000d487a00 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 000000000000000e RCX: 1ffff100082b0719 ... CR2: 000000000000000e CR3: 000000004b1bc003 CR4: 0000000000060ef0 Call Trace: kcm_create+0x600/0xbf0 [kcm] __sock_create+0x324/0x750 net/socket.c:1272 ... This is due to race between sock_create and unfinished register_pernet_device. kcm_create tries to do "net_generic(net, kcm_net_id)". but kcm_net_id is not initialized yet. So switch the order of the two to close the race. This can be reproduced with mutiple processes doing socket(PF_KCM, ...) and one process doing module removal. Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") Reviewed-by: Michal Kubecek Signed-off-by: Jiri Slaby Signed-off-by: David S. Miller --- net/kcm/kcmsock.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index c5c5ab6c5a1c..44fdc641710d 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -2054,14 +2054,14 @@ static int __init kcm_init(void) if (err) goto fail; - err = sock_register(&kcm_family_ops); - if (err) - goto sock_register_fail; - err = register_pernet_device(&kcm_net_ops); if (err) goto net_ops_fail; + err = sock_register(&kcm_family_ops); + if (err) + goto sock_register_fail; + err = kcm_proc_init(); if (err) goto proc_init_fail; @@ -2069,12 +2069,12 @@ static int __init kcm_init(void) return 0; proc_init_fail: - unregister_pernet_device(&kcm_net_ops); - -net_ops_fail: sock_unregister(PF_KCM); sock_register_fail: + unregister_pernet_device(&kcm_net_ops); + +net_ops_fail: proto_unregister(&kcm_proto); fail: @@ -2090,8 +2090,8 @@ fail: static void __exit kcm_exit(void) { kcm_proc_exit(); - unregister_pernet_device(&kcm_net_ops); sock_unregister(PF_KCM); + unregister_pernet_device(&kcm_net_ops); proto_unregister(&kcm_proto); destroy_workqueue(kcm_wq); -- cgit v1.2.3 From f7ee799a51ddbcc205ef615fe424fb5084e9e0aa Mon Sep 17 00:00:00 2001 From: Pieter Jansen van Vuuren Date: Fri, 29 Mar 2019 19:04:43 -0700 Subject: nfp: flower: replace CFI with vlan present Replace vlan CFI bit with a vlan present bit that indicates the presence of a vlan tag. Previously the driver incorrectly assumed that an vlan id of 0 is not matchable, therefore we indicate vlan presence with a vlan present bit. Fixes: 5571e8c9f241 ("nfp: extend flower matching capabilities") Signed-off-by: Pieter Jansen van Vuuren Signed-off-by: Louis Peens Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/cmsg.h | 2 +- drivers/net/ethernet/netronome/nfp/flower/match.c | 27 +++++++++++------------ 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index 4fcaf11ed56e..08ed777b86d2 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -26,7 +26,7 @@ #define NFP_FLOWER_LAYER2_GENEVE_OP BIT(6) #define NFP_FLOWER_MASK_VLAN_PRIO GENMASK(15, 13) -#define NFP_FLOWER_MASK_VLAN_CFI BIT(12) +#define NFP_FLOWER_MASK_VLAN_PRESENT BIT(12) #define NFP_FLOWER_MASK_VLAN_VID GENMASK(11, 0) #define NFP_FLOWER_MASK_MPLS_LB GENMASK(31, 12) diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c index e03c8ef2c28c..9b8b843d0340 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/match.c +++ b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -30,20 +30,19 @@ nfp_flower_compile_meta_tci(struct nfp_flower_meta_tci *ext, flow_rule_match_vlan(rule, &match); /* Populate the tci field. */ - if (match.key->vlan_id || match.key->vlan_priority) { - tmp_tci = FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, - match.key->vlan_priority) | - FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID, - match.key->vlan_id) | - NFP_FLOWER_MASK_VLAN_CFI; - ext->tci = cpu_to_be16(tmp_tci); - tmp_tci = FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, - match.mask->vlan_priority) | - FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID, - match.mask->vlan_id) | - NFP_FLOWER_MASK_VLAN_CFI; - msk->tci = cpu_to_be16(tmp_tci); - } + tmp_tci = NFP_FLOWER_MASK_VLAN_PRESENT; + tmp_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, + match.key->vlan_priority) | + FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID, + match.key->vlan_id); + ext->tci = cpu_to_be16(tmp_tci); + + tmp_tci = NFP_FLOWER_MASK_VLAN_PRESENT; + tmp_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, + match.mask->vlan_priority) | + FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID, + match.mask->vlan_id); + msk->tci = cpu_to_be16(tmp_tci); } } -- cgit v1.2.3 From 42cd5484a22f1a1b947e21e2af65fa7dab09d017 Mon Sep 17 00:00:00 2001 From: Pieter Jansen van Vuuren Date: Fri, 29 Mar 2019 19:04:44 -0700 Subject: nfp: flower: remove vlan CFI bit from push vlan action We no longer set CFI when pushing vlan tags, therefore we remove the CFI bit from push vlan. Fixes: 1a1e586f54bf ("nfp: add basic action capabilities to flower offloads") Signed-off-by: Pieter Jansen van Vuuren Signed-off-by: Louis Peens Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/action.c | 3 +-- drivers/net/ethernet/netronome/nfp/flower/cmsg.h | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index eeda4ed98333..e336f6ee94f5 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -48,8 +48,7 @@ nfp_fl_push_vlan(struct nfp_fl_push_vlan *push_vlan, tmp_push_vlan_tci = FIELD_PREP(NFP_FL_PUSH_VLAN_PRIO, act->vlan.prio) | - FIELD_PREP(NFP_FL_PUSH_VLAN_VID, act->vlan.vid) | - NFP_FL_PUSH_VLAN_CFI; + FIELD_PREP(NFP_FL_PUSH_VLAN_VID, act->vlan.vid); push_vlan->vlan_tci = cpu_to_be16(tmp_push_vlan_tci); } diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index 08ed777b86d2..0ed51e79db00 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -82,7 +82,6 @@ #define NFP_FL_OUT_FLAGS_TYPE_IDX GENMASK(2, 0) #define NFP_FL_PUSH_VLAN_PRIO GENMASK(15, 13) -#define NFP_FL_PUSH_VLAN_CFI BIT(12) #define NFP_FL_PUSH_VLAN_VID GENMASK(11, 0) #define IPV6_FLOW_LABEL_MASK cpu_to_be32(0x000fffff) -- cgit v1.2.3 From 09279e615c81ce55e04835970601ae286e3facbe Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 31 Mar 2019 16:58:15 +0800 Subject: sctp: initialize _pad of sockaddr_in before copying to user memory Syzbot report a kernel-infoleak: BUG: KMSAN: kernel-infoleak in _copy_to_user+0x16b/0x1f0 lib/usercopy.c:32 Call Trace: _copy_to_user+0x16b/0x1f0 lib/usercopy.c:32 copy_to_user include/linux/uaccess.h:174 [inline] sctp_getsockopt_peer_addrs net/sctp/socket.c:5911 [inline] sctp_getsockopt+0x1668e/0x17f70 net/sctp/socket.c:7562 ... Uninit was stored to memory at: sctp_transport_init net/sctp/transport.c:61 [inline] sctp_transport_new+0x16d/0x9a0 net/sctp/transport.c:115 sctp_assoc_add_peer+0x532/0x1f70 net/sctp/associola.c:637 sctp_process_param net/sctp/sm_make_chunk.c:2548 [inline] sctp_process_init+0x1a1b/0x3ed0 net/sctp/sm_make_chunk.c:2361 ... Bytes 8-15 of 16 are uninitialized It was caused by that th _pad field (the 8-15 bytes) of a v4 addr (saved in struct sockaddr_in) wasn't initialized, but directly copied to user memory in sctp_getsockopt_peer_addrs(). So fix it by calling memset(addr->v4.sin_zero, 0, 8) to initialize _pad of sockaddr_in before copying it to user memory in sctp_v4_addr_to_user(), as sctp_v6_addr_to_user() does. Reported-by: syzbot+86b5c7c236a22616a72f@syzkaller.appspotmail.com Signed-off-by: Xin Long Tested-by: Alexander Potapenko Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/protocol.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 6abc8b274270..951afdeea5e9 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -600,6 +600,7 @@ out: static int sctp_v4_addr_to_user(struct sctp_sock *sp, union sctp_addr *addr) { /* No address mapping for V4 sockets */ + memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero)); return sizeof(struct sockaddr_in); } -- cgit v1.2.3 From 1d3ff0950e2b40dc861b1739029649d03f591820 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Mon, 1 Apr 2019 09:35:54 +0800 Subject: dccp: Fix memleak in __feat_register_sp If dccp_feat_push_change fails, we forget free the mem which is alloced by kmemdup in dccp_feat_clone_sp_val. Reported-by: Hulk Robot Fixes: e8ef967a54f4 ("dccp: Registration routines for changing feature values") Reviewed-by: Mukesh Ojha Signed-off-by: YueHaibing Signed-off-by: David S. Miller --- net/dccp/feat.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/dccp/feat.c b/net/dccp/feat.c index f227f002c73d..db87d9f58019 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -738,7 +738,12 @@ static int __feat_register_sp(struct list_head *fn, u8 feat, u8 is_local, if (dccp_feat_clone_sp_val(&fval, sp_val, sp_len)) return -ENOMEM; - return dccp_feat_push_change(fn, feat, is_local, mandatory, &fval); + if (dccp_feat_push_change(fn, feat, is_local, mandatory, &fval)) { + kfree(fval.sp.vec); + return -ENOMEM; + } + + return 0; } /** -- cgit v1.2.3 From 2d85978341e6a32e7443d9f28639da254d53f400 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 28 Mar 2019 17:31:30 +0300 Subject: drm/mediatek: Fix an error code in mtk_hdmi_dt_parse_pdata() We don't want to overwrite "ret", it already holds the correct error code. The "regmap" variable might be a valid pointer as this point. Fixes: 8f83f26891e1 ("drm/mediatek: Add HDMI support") Signed-off-by: Dan Carpenter Signed-off-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_hdmi.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index 915cc84621ae..543a25e5765e 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -1480,7 +1480,6 @@ static int mtk_hdmi_dt_parse_pdata(struct mtk_hdmi *hdmi, if (IS_ERR(regmap)) ret = PTR_ERR(regmap); if (ret) { - ret = PTR_ERR(regmap); dev_err(dev, "Failed to get system configuration registers: %d\n", ret); -- cgit v1.2.3 From 20bb907f7dc82ecc9e135ad7067ac7eb69c81222 Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Sat, 23 Feb 2019 12:47:54 +0100 Subject: mfd: twl-core: Disable IRQ while suspended Since commit 6e2bd956936 ("i2c: omap: Use noirq system sleep pm ops to idle device for suspend") on gta04 we have handle_twl4030_pih() called in situations where pm_runtime_get() in i2c-omap.c returns -EACCES. [ 86.474365] Freezing remaining freezable tasks ... (elapsed 0.002 seconds) done. [ 86.485473] printk: Suspending console(s) (use no_console_suspend to debug) [ 86.555572] Disabling non-boot CPUs ... [ 86.555664] Successfully put all powerdomains to target state [ 86.563720] twl: Read failed (mod 1, reg 0x01 count 1) [ 86.563751] twl4030: I2C error -13 reading PIH ISR [ 86.563812] twl: Read failed (mod 1, reg 0x01 count 1) [ 86.563812] twl4030: I2C error -13 reading PIH ISR [ 86.563873] twl: Read failed (mod 1, reg 0x01 count 1) [ 86.563903] twl4030: I2C error -13 reading PIH ISR This happens when we wakeup via something behing twl4030 (powerbutton or rtc alarm). This goes on for minutes until the system is finally resumed. Disable the irq on suspend and enable it on resume to avoid having i2c access problems when the irq registers are checked. Fixes: 6e2bd956936 ("i2c: omap: Use noirq system sleep pm ops to idle device for suspend") Signed-off-by: Andreas Kemnade Tested-by: Tony Lindgren Signed-off-by: Lee Jones --- drivers/mfd/twl-core.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c index 299016bc46d9..104477b512a2 100644 --- a/drivers/mfd/twl-core.c +++ b/drivers/mfd/twl-core.c @@ -1245,6 +1245,28 @@ free: return status; } +static int __maybe_unused twl_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + + if (client->irq) + disable_irq(client->irq); + + return 0; +} + +static int __maybe_unused twl_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + + if (client->irq) + enable_irq(client->irq); + + return 0; +} + +static SIMPLE_DEV_PM_OPS(twl_dev_pm_ops, twl_suspend, twl_resume); + static const struct i2c_device_id twl_ids[] = { { "twl4030", TWL4030_VAUX2 }, /* "Triton 2" */ { "twl5030", 0 }, /* T2 updated */ @@ -1262,6 +1284,7 @@ static const struct i2c_device_id twl_ids[] = { /* One Client Driver , 4 Clients */ static struct i2c_driver twl_driver = { .driver.name = DRIVER_NAME, + .driver.pm = &twl_dev_pm_ops, .id_table = twl_ids, .probe = twl_probe, .remove = twl_remove, -- cgit v1.2.3 From 1d71670e5e09680202c975c2332bcd2b64e8091f Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 18 Mar 2019 11:26:51 +0800 Subject: mfd: sc27xx: Use SoC compatible string for PMIC devices We should use SoC compatible string in stead of wildcard string for PMIC child devices. Fixes: 0419a75b1808 (arm64: dts: sprd: Remove wildcard compatible string) Signed-off-by: Baolin Wang Signed-off-by: Lee Jones --- drivers/mfd/sprd-sc27xx-spi.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/mfd/sprd-sc27xx-spi.c b/drivers/mfd/sprd-sc27xx-spi.c index 69df27769c21..43ac71691fe4 100644 --- a/drivers/mfd/sprd-sc27xx-spi.c +++ b/drivers/mfd/sprd-sc27xx-spi.c @@ -53,67 +53,67 @@ static const struct sprd_pmic_data sc2731_data = { static const struct mfd_cell sprd_pmic_devs[] = { { .name = "sc27xx-wdt", - .of_compatible = "sprd,sc27xx-wdt", + .of_compatible = "sprd,sc2731-wdt", }, { .name = "sc27xx-rtc", - .of_compatible = "sprd,sc27xx-rtc", + .of_compatible = "sprd,sc2731-rtc", }, { .name = "sc27xx-charger", - .of_compatible = "sprd,sc27xx-charger", + .of_compatible = "sprd,sc2731-charger", }, { .name = "sc27xx-chg-timer", - .of_compatible = "sprd,sc27xx-chg-timer", + .of_compatible = "sprd,sc2731-chg-timer", }, { .name = "sc27xx-fast-chg", - .of_compatible = "sprd,sc27xx-fast-chg", + .of_compatible = "sprd,sc2731-fast-chg", }, { .name = "sc27xx-chg-wdt", - .of_compatible = "sprd,sc27xx-chg-wdt", + .of_compatible = "sprd,sc2731-chg-wdt", }, { .name = "sc27xx-typec", - .of_compatible = "sprd,sc27xx-typec", + .of_compatible = "sprd,sc2731-typec", }, { .name = "sc27xx-flash", - .of_compatible = "sprd,sc27xx-flash", + .of_compatible = "sprd,sc2731-flash", }, { .name = "sc27xx-eic", - .of_compatible = "sprd,sc27xx-eic", + .of_compatible = "sprd,sc2731-eic", }, { .name = "sc27xx-efuse", - .of_compatible = "sprd,sc27xx-efuse", + .of_compatible = "sprd,sc2731-efuse", }, { .name = "sc27xx-thermal", - .of_compatible = "sprd,sc27xx-thermal", + .of_compatible = "sprd,sc2731-thermal", }, { .name = "sc27xx-adc", - .of_compatible = "sprd,sc27xx-adc", + .of_compatible = "sprd,sc2731-adc", }, { .name = "sc27xx-audio-codec", - .of_compatible = "sprd,sc27xx-audio-codec", + .of_compatible = "sprd,sc2731-audio-codec", }, { .name = "sc27xx-regulator", - .of_compatible = "sprd,sc27xx-regulator", + .of_compatible = "sprd,sc2731-regulator", }, { .name = "sc27xx-vibrator", - .of_compatible = "sprd,sc27xx-vibrator", + .of_compatible = "sprd,sc2731-vibrator", }, { .name = "sc27xx-keypad-led", - .of_compatible = "sprd,sc27xx-keypad-led", + .of_compatible = "sprd,sc2731-keypad-led", }, { .name = "sc27xx-bltc", - .of_compatible = "sprd,sc27xx-bltc", + .of_compatible = "sprd,sc2731-bltc", }, { .name = "sc27xx-fgu", - .of_compatible = "sprd,sc27xx-fgu", + .of_compatible = "sprd,sc2731-fgu", }, { .name = "sc27xx-7sreset", - .of_compatible = "sprd,sc27xx-7sreset", + .of_compatible = "sprd,sc2731-7sreset", }, { .name = "sc27xx-poweroff", - .of_compatible = "sprd,sc27xx-poweroff", + .of_compatible = "sprd,sc2731-poweroff", }, { .name = "sc27xx-syscon", - .of_compatible = "sprd,sc27xx-syscon", + .of_compatible = "sprd,sc2731-syscon", }, }; -- cgit v1.2.3 From 6246f283d5e02ac757bd8d9bacde8fdc54c4582d Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Mon, 1 Apr 2019 15:03:54 +0200 Subject: ASoC: dpcm: skip missing substream while applying symmetry If for any reason, the backend does not have the requested substream (like capture on a playback only backend), the BE will be skipped in dpcm_be_dai_startup(). However, dpcm_apply_symmetry() does not skip those BE and will dereference the be_substream (NULL) pointer anyway. Like in dpcm_be_dai_startup(), just skip those BE. Fixes: 906c7d690c3b ("ASoC: dpcm: Apply symmetry for DPCM") Signed-off-by: Jerome Brunet Signed-off-by: Mark Brown --- sound/soc/soc-pcm.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 7fe5321000e8..2d5d5cac4ba6 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -1911,10 +1911,15 @@ static int dpcm_apply_symmetry(struct snd_pcm_substream *fe_substream, struct snd_soc_pcm_runtime *be = dpcm->be; struct snd_pcm_substream *be_substream = snd_soc_dpcm_get_substream(be, stream); - struct snd_soc_pcm_runtime *rtd = be_substream->private_data; + struct snd_soc_pcm_runtime *rtd; struct snd_soc_dai *codec_dai; int i; + /* A backend may not have the requested substream */ + if (!be_substream) + continue; + + rtd = be_substream->private_data; if (rtd->dai_link->be_hw_params_fixup) continue; -- cgit v1.2.3 From 6e3bfcff191ec9476ca5ef9b2ad85a15ba829374 Mon Sep 17 00:00:00 2001 From: Ranjani Sridharan Date: Fri, 1 Mar 2019 19:08:53 -0600 Subject: ASoC: dapm: set power_check callback for widgets that shouldnt be always on Currently, buffers, schedulers, src's, encoders, decoders and effect type dapm widgets remain always on as their power_check method is not set. Setting this callback allows these widgets in the audio path to be powered managed properly. Signed-off-by: Ranjani Sridharan Signed-off-by: Pierre-Louis Bossart Signed-off-by: Mark Brown --- sound/soc/soc-dapm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 67b032ca1601..0382a47b30bd 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -3650,6 +3650,13 @@ snd_soc_dapm_new_control_unlocked(struct snd_soc_dapm_context *dapm, case snd_soc_dapm_dac: case snd_soc_dapm_aif_in: case snd_soc_dapm_pga: + case snd_soc_dapm_buffer: + case snd_soc_dapm_scheduler: + case snd_soc_dapm_effect: + case snd_soc_dapm_src: + case snd_soc_dapm_asrc: + case snd_soc_dapm_encoder: + case snd_soc_dapm_decoder: case snd_soc_dapm_out_drv: case snd_soc_dapm_micbias: case snd_soc_dapm_line: -- cgit v1.2.3 From 8742dc86d0c7a9628117a989c11f04a9b6b898f3 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 26 Feb 2019 07:04:50 +0100 Subject: xfrm4: Fix uninitialized memory read in _decode_session4 We currently don't reload pointers pointing into skb header after doing pskb_may_pull() in _decode_session4(). So in case pskb_may_pull() changed the pointers, we read from random memory. Fix this by putting all the needed infos on the stack, so that we don't need to access the header pointers after doing pskb_may_pull(). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Steffen Klassert --- net/ipv4/xfrm4_policy.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index d73a6d6652f6..2b144b92ae46 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -111,7 +111,8 @@ static void _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) { const struct iphdr *iph = ip_hdr(skb); - u8 *xprth = skb_network_header(skb) + iph->ihl * 4; + int ihl = iph->ihl; + u8 *xprth = skb_network_header(skb) + ihl * 4; struct flowi4 *fl4 = &fl->u.ip4; int oif = 0; @@ -122,6 +123,11 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) fl4->flowi4_mark = skb->mark; fl4->flowi4_oif = reverse ? skb->skb_iif : oif; + fl4->flowi4_proto = iph->protocol; + fl4->daddr = reverse ? iph->saddr : iph->daddr; + fl4->saddr = reverse ? iph->daddr : iph->saddr; + fl4->flowi4_tos = iph->tos; + if (!ip_is_fragment(iph)) { switch (iph->protocol) { case IPPROTO_UDP: @@ -133,7 +139,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) pskb_may_pull(skb, xprth + 4 - skb->data)) { __be16 *ports; - xprth = skb_network_header(skb) + iph->ihl * 4; + xprth = skb_network_header(skb) + ihl * 4; ports = (__be16 *)xprth; fl4->fl4_sport = ports[!!reverse]; @@ -146,7 +152,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) pskb_may_pull(skb, xprth + 2 - skb->data)) { u8 *icmp; - xprth = skb_network_header(skb) + iph->ihl * 4; + xprth = skb_network_header(skb) + ihl * 4; icmp = xprth; fl4->fl4_icmp_type = icmp[0]; @@ -159,7 +165,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) pskb_may_pull(skb, xprth + 4 - skb->data)) { __be32 *ehdr; - xprth = skb_network_header(skb) + iph->ihl * 4; + xprth = skb_network_header(skb) + ihl * 4; ehdr = (__be32 *)xprth; fl4->fl4_ipsec_spi = ehdr[0]; @@ -171,7 +177,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) pskb_may_pull(skb, xprth + 8 - skb->data)) { __be32 *ah_hdr; - xprth = skb_network_header(skb) + iph->ihl * 4; + xprth = skb_network_header(skb) + ihl * 4; ah_hdr = (__be32 *)xprth; fl4->fl4_ipsec_spi = ah_hdr[1]; @@ -183,7 +189,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) pskb_may_pull(skb, xprth + 4 - skb->data)) { __be16 *ipcomp_hdr; - xprth = skb_network_header(skb) + iph->ihl * 4; + xprth = skb_network_header(skb) + ihl * 4; ipcomp_hdr = (__be16 *)xprth; fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); @@ -196,7 +202,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) __be16 *greflags; __be32 *gre_hdr; - xprth = skb_network_header(skb) + iph->ihl * 4; + xprth = skb_network_header(skb) + ihl * 4; greflags = (__be16 *)xprth; gre_hdr = (__be32 *)xprth; @@ -213,10 +219,6 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) break; } } - fl4->flowi4_proto = iph->protocol; - fl4->daddr = reverse ? iph->saddr : iph->daddr; - fl4->saddr = reverse ? iph->daddr : iph->saddr; - fl4->flowi4_tos = iph->tos; } static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk, -- cgit v1.2.3 From ea5c7eba216e832906e594799b8670f1954a588c Mon Sep 17 00:00:00 2001 From: Jian-Hong Pan Date: Mon, 1 Apr 2019 11:25:05 +0800 Subject: ALSA: hda/realtek: Enable headset MIC of Acer TravelMate B114-21 with ALC233 The Acer TravelMate B114-21 laptop cannot detect and record sound from headset MIC. This patch adds the ALC233_FIXUP_ACER_HEADSET_MIC HDA verb quirk chained with ALC233_FIXUP_ASUS_MIC_NO_PRESENCE pin quirk to fix this issue. [ fixed the missing brace and reordered the entry -- tiwai ] Signed-off-by: Jian-Hong Pan Signed-off-by: Daniel Drake Reviewed-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index a3fb3d4c5730..715ab2342151 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5663,6 +5663,7 @@ enum { ALC233_FIXUP_ASUS_MIC_NO_PRESENCE, ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE, ALC233_FIXUP_LENOVO_MULTI_CODECS, + ALC233_FIXUP_ACER_HEADSET_MIC, ALC294_FIXUP_LENOVO_MIC_LOCATION, ALC225_FIXUP_DELL_WYSE_MIC_NO_PRESENCE, ALC700_FIXUP_INTEL_REFERENCE, @@ -6490,6 +6491,16 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc233_alc662_fixup_lenovo_dual_codecs, }, + [ALC233_FIXUP_ACER_HEADSET_MIC] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + { 0x20, AC_VERB_SET_COEF_INDEX, 0x45 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x5089 }, + { } + }, + .chained = true, + .chain_id = ALC233_FIXUP_ASUS_MIC_NO_PRESENCE + }, [ALC294_FIXUP_LENOVO_MIC_LOCATION] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -6737,6 +6748,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1308, "Acer Aspire Z24-890", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), + SND_PCI_QUIRK(0x1025, 0x132a, "Acer TravelMate B114-21", ALC233_FIXUP_ACER_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1330, "Acer TravelMate X514-51T", ALC255_FIXUP_ACER_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z), SND_PCI_QUIRK(0x1028, 0x054b, "Dell XPS one 2710", ALC275_FIXUP_DELL_XPS), -- cgit v1.2.3 From b5bdbb6ccd1117896bf4fba6bff75336ca423e8c Mon Sep 17 00:00:00 2001 From: Daniel Mentz Date: Fri, 29 Mar 2019 15:48:54 -0700 Subject: ALSA: uapi: #include in asound.h The uapi header asound.h defines types based on struct timespec. We need to #include to get access to the definition of this struct. Previously, we encountered the following error message when building applications with a clang/bionic toolchain: kernel-headers/sound/asound.h:350:19: error: field has incomplete type 'struct timespec' struct timespec trigger_tstamp; ^ The absence of the time.h #include statement does not cause build errors with glibc, because its version of stdlib.h indirectly includes time.h. Signed-off-by: Daniel Mentz Signed-off-by: Takashi Iwai --- include/uapi/sound/asound.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h index 404d4b9ffe76..df1153cea0b7 100644 --- a/include/uapi/sound/asound.h +++ b/include/uapi/sound/asound.h @@ -32,6 +32,7 @@ #ifndef __KERNEL__ #include +#include #endif /* -- cgit v1.2.3 From b9a1ff504b9492ad6beb7d5606e0e3365d4d8499 Mon Sep 17 00:00:00 2001 From: Shenghui Wang Date: Mon, 1 Apr 2019 21:40:36 +0800 Subject: block: use blk_free_flush_queue() to free hctx->fq in blk_mq_init_hctx kfree() can leak the hctx->fq->flush_rq field. Reviewed-by: Ming Lei Signed-off-by: Shenghui Wang Signed-off-by: Jens Axboe --- block/blk-mq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 3ff3d7b49969..f3b0d33bdf88 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2332,7 +2332,7 @@ static int blk_mq_init_hctx(struct request_queue *q, return 0; free_fq: - kfree(hctx->fq); + blk_free_flush_queue(hctx->fq); exit_hctx: if (set->ops->exit_hctx) set->ops->exit_hctx(hctx, hctx_idx); -- cgit v1.2.3 From ff3b74b8e1675c802e09157a56c97ca38a659b9d Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Tue, 26 Mar 2019 21:19:25 +0800 Subject: blk-mq: add trace block plug and unplug for multiple queues For now, we just trace plug for single queue device or drivers provide .commit_rqs, and have not trace plug for multiple queues device. But, unplug events will be recorded when call blk_mq_flush_plug_list(). Then, trace events will be asymmetrical, just have unplug and without plug. This patch add trace plug and unplug for multiple queues device in blk_mq_make_request(). After that, we can accurately trace plug and unplug for multiple queues. Reviewed-by: Christoph Hellwig Signed-off-by: Yufen Yu Signed-off-by: Jens Axboe --- block/blk-mq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/blk-mq.c b/block/blk-mq.c index f3b0d33bdf88..22074a1e37cd 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2003,11 +2003,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) plug->rq_count--; } blk_add_rq_to_plug(plug, rq); + trace_block_plug(q); blk_mq_put_ctx(data.ctx); if (same_queue_rq) { data.hctx = same_queue_rq->mq_hctx; + trace_block_unplug(q, 1, true); blk_mq_try_issue_directly(data.hctx, same_queue_rq, &cookie, false, true); } -- cgit v1.2.3 From 01b76c32e3f30d54ab8ec1efeed3c6ecef7f6027 Mon Sep 17 00:00:00 2001 From: Bo YU Date: Thu, 28 Mar 2019 03:47:37 -0400 Subject: misc: fastrpc: add checked value for dma_set_mask There be should check return value from dma_set_mask to throw some info if fail to set dma mask. Detected by CoverityScan, CID# 1443983: Error handling issues (CHECKED_RETURN) Fixes: f6f9279f2bf0 ("misc: fastrpc: Add Qualcomm fastrpc basic driver model") Signed-off-by: Bo YU Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 39f832d27288..36d0d5c9cfba 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -1184,6 +1184,7 @@ static int fastrpc_cb_probe(struct platform_device *pdev) struct fastrpc_session_ctx *sess; struct device *dev = &pdev->dev; int i, sessions = 0; + int rc; cctx = dev_get_drvdata(dev->parent); if (!cctx) @@ -1213,7 +1214,11 @@ static int fastrpc_cb_probe(struct platform_device *pdev) } cctx->sesscount++; spin_unlock(&cctx->lock); - dma_set_mask(dev, DMA_BIT_MASK(32)); + rc = dma_set_mask(dev, DMA_BIT_MASK(32)); + if (rc) { + dev_err(dev, "32-bit DMA enable failed\n"); + return rc; + } return 0; } -- cgit v1.2.3 From 131ac62253dba79daf4a6d83ab12293d2b9863d3 Mon Sep 17 00:00:00 2001 From: Christian Gromm Date: Tue, 2 Apr 2019 13:39:57 +0200 Subject: staging: most: core: use device description as name This patch uses the device description to clearly identity a device attached to the bus. It is needed as the currently useed mdevX notation is not sufficiant in case more than one network interface controller is being used at the same time. Cc: stable@vger.kernel.org Signed-off-by: Christian Gromm Signed-off-by: Greg Kroah-Hartman --- drivers/staging/most/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/most/core.c b/drivers/staging/most/core.c index 18936cdb1083..956daf8c3bd2 100644 --- a/drivers/staging/most/core.c +++ b/drivers/staging/most/core.c @@ -1431,7 +1431,7 @@ int most_register_interface(struct most_interface *iface) INIT_LIST_HEAD(&iface->p->channel_list); iface->p->dev_id = id; - snprintf(iface->p->name, STRING_SIZE, "mdev%d", id); + strcpy(iface->p->name, iface->description); iface->dev.init_name = iface->p->name; iface->dev.bus = &mc.bus; iface->dev.parent = &mc.dev; -- cgit v1.2.3 From 99bd5fcc505d65ea9c60619202f0b2d926eabbe9 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 21 Mar 2019 17:19:37 -0700 Subject: ARC: PAE40: don't panic and instead turn off hw ioc HSDK currently panics when built for HIGHMEM/ARC_HAS_PAE40 because ioc is enabled with default which doesn't work for the 2 non contiguous memory nodes. So get PAE working by disabling ioc instead. Tested with !PAE40 by forcing @ioc_enable=0 and running the glibc testsuite over ssh Signed-off-by: Vineet Gupta --- arch/arc/mm/cache.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 4135abec3fb0..63e6e6504699 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -113,10 +113,24 @@ static void read_decode_cache_bcr_arcv2(int cpu) } READ_BCR(ARC_REG_CLUSTER_BCR, cbcr); - if (cbcr.c) + if (cbcr.c) { ioc_exists = 1; - else + + /* + * As for today we don't support both IOC and ZONE_HIGHMEM enabled + * simultaneously. This happens because as of today IOC aperture covers + * only ZONE_NORMAL (low mem) and any dma transactions outside this + * region won't be HW coherent. + * If we want to use both IOC and ZONE_HIGHMEM we can use + * bounce_buffer to handle dma transactions to HIGHMEM. + * Also it is possible to modify dma_direct cache ops or increase IOC + * aperture size if we are planning to use HIGHMEM without PAE. + */ + if (IS_ENABLED(CONFIG_HIGHMEM) || is_pae40_enabled()) + ioc_enable = 0; + } else { ioc_enable = 0; + } /* HS 2.0 didn't have AUX_VOL */ if (cpuinfo_arc700[cpu].core.family > 0x51) { @@ -1158,19 +1172,6 @@ noinline void __init arc_ioc_setup(void) if (!ioc_enable) return; - /* - * As for today we don't support both IOC and ZONE_HIGHMEM enabled - * simultaneously. This happens because as of today IOC aperture covers - * only ZONE_NORMAL (low mem) and any dma transactions outside this - * region won't be HW coherent. - * If we want to use both IOC and ZONE_HIGHMEM we can use - * bounce_buffer to handle dma transactions to HIGHMEM. - * Also it is possible to modify dma_direct cache ops or increase IOC - * aperture size if we are planning to use HIGHMEM without PAE. - */ - if (IS_ENABLED(CONFIG_HIGHMEM)) - panic("IOC and HIGHMEM can't be used simultaneously"); - /* Flush + invalidate + disable L1 dcache */ __dc_disable(); -- cgit v1.2.3 From 21cee1bd1594b2af6798ddffa97555b4bc3586e1 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 2 Apr 2019 12:10:44 -0700 Subject: ARC: [hsdk] Make it easier to add PAE40 region to DTB 1. Bump top level address-cells/size-cells nodes to 2 (to ensure all down stream addresses are 64-bits, unless explicitly specified otherwise (in "soc" bus with all peripherals) 2. "memory" also specified with address/size 2 3. Add a commented reference for PAE40 region beyond 4GB physical address space Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/hsdk.dts | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index 69bc1c9e8e50..7425bb0f2d1b 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -18,8 +18,8 @@ model = "snps,hsdk"; compatible = "snps,hsdk"; - #address-cells = <1>; - #size-cells = <1>; + #address-cells = <2>; + #size-cells = <2>; chosen { bootargs = "earlycon=uart8250,mmio32,0xf0005000,115200n8 console=ttyS0,115200n8 debug print-fatal-signals=1"; @@ -105,7 +105,7 @@ #size-cells = <1>; interrupt-parent = <&idu_intc>; - ranges = <0x00000000 0xf0000000 0x10000000>; + ranges = <0x00000000 0x0 0xf0000000 0x10000000>; cgu_rst: reset-controller@8a0 { compatible = "snps,hsdk-reset"; @@ -269,9 +269,10 @@ }; memory@80000000 { - #address-cells = <1>; - #size-cells = <1>; + #address-cells = <2>; + #size-cells = <2>; device_type = "memory"; - reg = <0x80000000 0x40000000>; /* 1 GiB */ + reg = <0x0 0x80000000 0x0 0x40000000>; /* 1 GB lowmem */ + /* 0x1 0x00000000 0x0 0x40000000>; 1 GB highmem */ }; }; -- cgit v1.2.3 From b2e54b09a3d29c4db883b920274ca8dca4d9f04d Mon Sep 17 00:00:00 2001 From: Sheena Mira-ato Date: Mon, 1 Apr 2019 13:04:42 +1300 Subject: ip6_tunnel: Match to ARPHRD_TUNNEL6 for dev type The device type for ip6 tunnels is set to ARPHRD_TUNNEL6. However, the ip4ip6_err function is expecting the device type of the tunnel to be ARPHRD_TUNNEL. Since the device types do not match, the function exits and the ICMP error packet is not sent to the originating host. Note that the device type for IPv4 tunnels is set to ARPHRD_TUNNEL. Fix is to expect a tunnel device type of ARPHRD_TUNNEL6 instead. Now the tunnel device type matches and the ICMP error packet is sent to the originating host. Signed-off-by: Sheena Mira-ato Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 0c6403cf8b52..ade1390c6348 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -627,7 +627,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, eiph->daddr, eiph->saddr, 0, 0, IPPROTO_IPIP, RT_TOS(eiph->tos), 0); - if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL) { + if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL6) { if (!IS_ERR(rt)) ip_rt_put(rt); goto out; @@ -636,7 +636,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } else { if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || - skb_dst(skb2)->dev->type != ARPHRD_TUNNEL) + skb_dst(skb2)->dev->type != ARPHRD_TUNNEL6) goto out; } -- cgit v1.2.3 From d939f44d4a7f910755165458da20407d2139f581 Mon Sep 17 00:00:00 2001 From: Le Ma Date: Mon, 1 Apr 2019 18:08:30 +0800 Subject: drm/amdgpu: remove unnecessary rlc reset function on gfx9 The rlc reset function is not necessary during gfx9 initialization/resume phase. And this function would even cause rlc fw loading failed on some gfx9 ASIC. Remove this function safely with verification well on Vega/Raven platform. Signed-off-by: Le Ma Reviewed-by: Feifei Xu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index d0309e8c9d12..a11db2b1a63f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2405,8 +2405,6 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) /* disable CG */ WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); - adev->gfx.rlc.funcs->reset(adev); - gfx_v9_0_init_pg(adev); if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { -- cgit v1.2.3 From 882c5e552ffd06856de42261460f46e18319d259 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Tue, 2 Apr 2019 12:26:36 +0200 Subject: rtc: da9063: set uie_unsupported when relevant The DA9063AD doesn't support alarms on any seconds and its granularity is the minute. Set uie_unsupported in that case. Reported-by: Wolfram Sang Reported-by: Geert Uytterhoeven Reviewed-by: Wolfram Sang Tested-by: Wolfram Sang Acked-by: Steve Twiss Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-da9063.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/rtc/rtc-da9063.c b/drivers/rtc/rtc-da9063.c index b4e054c64bad..69b54e5556c0 100644 --- a/drivers/rtc/rtc-da9063.c +++ b/drivers/rtc/rtc-da9063.c @@ -480,6 +480,13 @@ static int da9063_rtc_probe(struct platform_device *pdev) da9063_data_to_tm(data, &rtc->alarm_time, rtc); rtc->rtc_sync = false; + /* + * TODO: some models have alarms on a minute boundary but still support + * real hardware interrupts. Add this once the core supports it. + */ + if (config->rtc_data_start != RTC_SEC) + rtc->rtc_dev->uie_unsupported = 1; + irq_alarm = platform_get_irq_byname(pdev, "ALARM"); ret = devm_request_threaded_irq(&pdev->dev, irq_alarm, NULL, da9063_alarm_event, -- cgit v1.2.3 From 43d147be5738a9ed6cfb25c285ac50d6dd5793be Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 2 Apr 2019 13:49:14 +0100 Subject: ASoC: wm_adsp: Check for buffer in trigger stop Trigger stop can be called in situations where trigger start failed and as such it can't be assumed the buffer is already attached to the compressed stream or a NULL pointer may be dereferenced. Fixes: 639e5eb3c7d6 ("ASoC: wm_adsp: Correct handling of compressed streams that restart") Signed-off-by: Charles Keepax Signed-off-by: Mark Brown --- sound/soc/codecs/wm_adsp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 5608ed5decca..b0b48eb9c7c9 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -3587,7 +3587,8 @@ int wm_adsp_compr_trigger(struct snd_compr_stream *stream, int cmd) } break; case SNDRV_PCM_TRIGGER_STOP: - wm_adsp_buffer_clear(compr->buf); + if (wm_adsp_compr_attached(compr)) + wm_adsp_buffer_clear(compr->buf); break; default: ret = -EINVAL; -- cgit v1.2.3 From 9f3bd8fe8f9d39e27e29c7134b21e335d1c7db6c Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 2 Apr 2019 13:18:45 -0400 Subject: Update Nicolas Pitre's email address The @linaro version won't be valid much longer. Signed-off-by: Nicolas Pitre Signed-off-by: Linus Torvalds --- .mailmap | 2 ++ MAINTAINERS | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index b2cde8668dcc..ae2bcad06f4b 100644 --- a/.mailmap +++ b/.mailmap @@ -156,6 +156,8 @@ Morten Welinder Morten Welinder Mythri P K Nguyen Anh Quynh +Nicolas Pitre +Nicolas Pitre Paolo 'Blaisorblade' Giarrusso Patrick Mochel Paul Burton diff --git a/MAINTAINERS b/MAINTAINERS index 43b36dbed48e..391405091c6b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4129,7 +4129,7 @@ F: drivers/cpuidle/* F: include/linux/cpuidle.h CRAMFS FILESYSTEM -M: Nicolas Pitre +M: Nicolas Pitre S: Maintained F: Documentation/filesystems/cramfs.txt F: fs/cramfs/ -- cgit v1.2.3 From 4bcdec39c454c4e8f9512115bdcc3efec1ba5f55 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 2 Apr 2019 12:20:49 +0200 Subject: ASoC: Intel: cht_bsw_max98090_ti: Enable codec clock once and keep it enabled Users have been seeing sound stability issues with max98090 codecs since: commit 648e921888ad ("clk: x86: Stop marking clocks as CLK_IS_CRITICAL") At first that commit broke sound for Chromebook Swanky and Clapper models, the problem was that the machine-driver has been controlling the wrong clock on those models since support for them was added. This was hidden by clk-pmc-atom.c keeping the actual clk on unconditionally. With the machine-driver controlling the proper clock, sound works again but we are seeing bug reports describing it as: low volume, "sounds like played at 10x speed" and instable. When these issues are hit the following message is seen in dmesg: "max98090 i2c-193C9890:00: PLL unlocked". Attempts have been made to fix this by inserting a delay between enabling the clk and enabling and checking the pll, but this has not helped. It seems that at least on boards which use pmc_plt_clk_0 as clock, if we ever disable the clk, the pll looses its lock and after that we get various issues. This commit fixes this by enabling the clock once at probe time on these boards. In essence this restores the old behavior of clk-pmc-atom.c always keeping the clk on on these boards. Fixes: 648e921888ad ("clk: x86: Stop marking clocks as CLK_IS_CRITICAL") Reported-by: Mogens Jensen Reported-by: Dean Wallace Signed-off-by: Hans de Goede Acked-by: Pierre-Louis Bossart Signed-off-by: Mark Brown --- sound/soc/intel/boards/cht_bsw_max98090_ti.c | 47 ++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 6 deletions(-) diff --git a/sound/soc/intel/boards/cht_bsw_max98090_ti.c b/sound/soc/intel/boards/cht_bsw_max98090_ti.c index 3263b0495853..c0e0844f75b9 100644 --- a/sound/soc/intel/boards/cht_bsw_max98090_ti.c +++ b/sound/soc/intel/boards/cht_bsw_max98090_ti.c @@ -43,6 +43,7 @@ struct cht_mc_private { struct clk *mclk; struct snd_soc_jack jack; bool ts3a227e_present; + int quirks; }; static int platform_clock_control(struct snd_soc_dapm_widget *w, @@ -54,6 +55,10 @@ static int platform_clock_control(struct snd_soc_dapm_widget *w, struct cht_mc_private *ctx = snd_soc_card_get_drvdata(card); int ret; + /* See the comment in snd_cht_mc_probe() */ + if (ctx->quirks & QUIRK_PMC_PLT_CLK_0) + return 0; + codec_dai = snd_soc_card_get_codec_dai(card, CHT_CODEC_DAI); if (!codec_dai) { dev_err(card->dev, "Codec dai not found; Unable to set platform clock\n"); @@ -223,6 +228,10 @@ static int cht_codec_init(struct snd_soc_pcm_runtime *runtime) "jack detection gpios not added, error %d\n", ret); } + /* See the comment in snd_cht_mc_probe() */ + if (ctx->quirks & QUIRK_PMC_PLT_CLK_0) + return 0; + /* * The firmware might enable the clock at * boot (this information may or may not @@ -423,16 +432,15 @@ static int snd_cht_mc_probe(struct platform_device *pdev) const char *mclk_name; struct snd_soc_acpi_mach *mach; const char *platform_name; - int quirks = 0; - - dmi_id = dmi_first_match(cht_max98090_quirk_table); - if (dmi_id) - quirks = (unsigned long)dmi_id->driver_data; drv = devm_kzalloc(&pdev->dev, sizeof(*drv), GFP_KERNEL); if (!drv) return -ENOMEM; + dmi_id = dmi_first_match(cht_max98090_quirk_table); + if (dmi_id) + drv->quirks = (unsigned long)dmi_id->driver_data; + drv->ts3a227e_present = acpi_dev_found("104C227E"); if (!drv->ts3a227e_present) { /* no need probe TI jack detection chip */ @@ -458,7 +466,7 @@ static int snd_cht_mc_probe(struct platform_device *pdev) snd_soc_card_cht.dev = &pdev->dev; snd_soc_card_set_drvdata(&snd_soc_card_cht, drv); - if (quirks & QUIRK_PMC_PLT_CLK_0) + if (drv->quirks & QUIRK_PMC_PLT_CLK_0) mclk_name = "pmc_plt_clk_0"; else mclk_name = "pmc_plt_clk_3"; @@ -471,6 +479,21 @@ static int snd_cht_mc_probe(struct platform_device *pdev) return PTR_ERR(drv->mclk); } + /* + * Boards which have the MAX98090's clk connected to clk_0 do not seem + * to like it if we muck with the clock. If we disable the clock when + * it is unused we get "max98090 i2c-193C9890:00: PLL unlocked" errors + * and the PLL never seems to lock again. + * So for these boards we enable it here once and leave it at that. + */ + if (drv->quirks & QUIRK_PMC_PLT_CLK_0) { + ret_val = clk_prepare_enable(drv->mclk); + if (ret_val < 0) { + dev_err(&pdev->dev, "MCLK enable error: %d\n", ret_val); + return ret_val; + } + } + ret_val = devm_snd_soc_register_card(&pdev->dev, &snd_soc_card_cht); if (ret_val) { dev_err(&pdev->dev, @@ -481,11 +504,23 @@ static int snd_cht_mc_probe(struct platform_device *pdev) return ret_val; } +static int snd_cht_mc_remove(struct platform_device *pdev) +{ + struct snd_soc_card *card = platform_get_drvdata(pdev); + struct cht_mc_private *ctx = snd_soc_card_get_drvdata(card); + + if (ctx->quirks & QUIRK_PMC_PLT_CLK_0) + clk_disable_unprepare(ctx->mclk); + + return 0; +} + static struct platform_driver snd_cht_mc_driver = { .driver = { .name = "cht-bsw-max98090", }, .probe = snd_cht_mc_probe, + .remove = snd_cht_mc_remove, }; module_platform_driver(snd_cht_mc_driver) -- cgit v1.2.3 From a05a2e7998ab1badcf80aed47b5313934fd131fa Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 22 Mar 2019 10:16:50 +0100 Subject: mfd: sun6i-prcm: Allow to compile with COMPILE_TEST Since this driver only has a dependency on ARCH_SUNXI just because it doesn't make any sense to run it on something else, we can definitely enable it through COMPILE_TEST as well to get some build coverage. Signed-off-by: Maxime Ripard Signed-off-by: Lee Jones --- drivers/mfd/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 0ce2d8dfc5f1..26ad6468d13a 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -1246,7 +1246,7 @@ config MFD_STA2X11 config MFD_SUN6I_PRCM bool "Allwinner A31 PRCM controller" - depends on ARCH_SUNXI + depends on ARCH_SUNXI || COMPILE_TEST select MFD_CORE help Support for the PRCM (Power/Reset/Clock Management) unit available -- cgit v1.2.3 From 583feb08e7f7ac9d533b446882eb3a54737a6dbb Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Wed, 6 Mar 2019 11:50:48 -0800 Subject: perf/x86/intel: Fix handling of wakeup_events for multi-entry PEBS When an event is programmed with attr.wakeup_events=N (N>0), it means the caller is interested in getting a user level notification after N samples have been recorded in the kernel sampling buffer. With precise events on Intel processors, the kernel uses PEBS. The kernel tries minimize sampling overhead by verifying if the event configuration is compatible with multi-entry PEBS mode. If so, the kernel is notified only when the buffer has reached its threshold. Other PEBS operates in single-entry mode, the kenrel is notified for each PEBS sample. The problem is that the current implementation look at frequency mode and event sample_type but ignores the wakeup_events field. Thus, it may not be possible to receive a notification after each precise event. This patch fixes this problem by disabling multi-entry PEBS if wakeup_events is non-zero. Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andi Kleen Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Cc: kan.liang@intel.com Link: https://lkml.kernel.org/r/20190306195048.189514-1-eranian@google.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 8baa441d8000..1539647ea39d 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3185,7 +3185,7 @@ static int intel_pmu_hw_config(struct perf_event *event) return ret; if (event->attr.precise_ip) { - if (!event->attr.freq) { + if (!(event->attr.freq || event->attr.wakeup_events)) { event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD; if (!(event->attr.sample_type & ~intel_pmu_large_pebs_flags(event))) -- cgit v1.2.3 From f5ae2f932e2f8f4f79796f44832ae8fca26f188a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 5 Mar 2019 13:32:30 +0100 Subject: iwlwifi: mvm: avoid possible deadlock in TX path iwl_mvm_tx_mpdu() may run from iwl_mvm_add_new_dqa_stream_wk(), where soft-IRQs aren't disabled. In this case, it may hold the station lock and be interrupted by a soft-IRQ that also wants to acquire said lock, leading to a deadlock. Fix it by disabling soft-IRQs in iwl_mvm_add_new_dqa_stream_wk(). Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index db26f0041a81..98d123dd7177 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -1399,7 +1399,9 @@ void iwl_mvm_add_new_dqa_stream_wk(struct work_struct *wk) iwl_mvm_sta_alloc_queue(mvm, txq->sta, txq->ac, tid); list_del_init(&mvmtxq->list); + local_bh_disable(); iwl_mvm_mac_itxq_xmit(mvm->hw, txq); + local_bh_enable(); } mutex_unlock(&mvm->mutex); -- cgit v1.2.3 From dcfe3b103dd1348706bbdcfb9921c65452a6144e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 12 Mar 2019 13:22:43 +0100 Subject: iwlwifi: mvm: update offloaded rate control on changes With offloaded rate control, if the station parameters (rates, NSS, bandwidth) change (sta_rc_update method), call iwl_mvm_rs_rate_init() to propagate those change to the firmware. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index eeaeb8475666..6a3b11dd2edf 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -3258,6 +3258,13 @@ static void iwl_mvm_sta_rc_update(struct ieee80211_hw *hw, struct ieee80211_sta *sta, u32 changed) { struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + + if (changed & (IEEE80211_RC_BW_CHANGED | + IEEE80211_RC_SUPP_RATES_CHANGED | + IEEE80211_RC_NSS_CHANGED)) + iwl_mvm_rs_rate_init(mvm, sta, mvmvif->phy_ctxt->channel->band, + true); if (vif->type == NL80211_IFTYPE_STATION && changed & IEEE80211_RC_NSS_CHANGED) -- cgit v1.2.3 From debec2f23910cb17f2c0f6d5e30a8da00bb5f515 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Thu, 14 Mar 2019 14:57:08 +0200 Subject: iwlwifi: add support for quz firmwares Add a new configuration with a new firmware name for quz devices. And, since these devices have the same PCI device and subsystem IDs, we need to add some code to switch from a normal qu firmware to the quz firmware. Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/cfg/22000.c | 18 ++++++++++++++++-- drivers/net/wireless/intel/iwlwifi/iwl-config.h | 1 + drivers/net/wireless/intel/iwlwifi/iwl-csr.h | 1 + drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 4 ++++ 4 files changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c index d5c29298ae3e..eb6defb6d0cd 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c @@ -82,6 +82,7 @@ #define IWL_22000_HR_A0_FW_PRE "iwlwifi-QuQnj-a0-hr-a0-" #define IWL_22000_SU_Z0_FW_PRE "iwlwifi-su-z0-" #define IWL_QU_B_JF_B_FW_PRE "iwlwifi-Qu-b0-jf-b0-" +#define IWL_QUZ_A_HR_B_FW_PRE "iwlwifi-QuZ-a0-hr-b0-" #define IWL_QNJ_B_JF_B_FW_PRE "iwlwifi-QuQnj-b0-jf-b0-" #define IWL_CC_A_FW_PRE "iwlwifi-cc-a0-" #define IWL_22000_SO_A_JF_B_FW_PRE "iwlwifi-so-a0-jf-b0-" @@ -105,8 +106,8 @@ IWL_22000_HR_A0_FW_PRE __stringify(api) ".ucode" #define IWL_22000_SU_Z0_MODULE_FIRMWARE(api) \ IWL_22000_SU_Z0_FW_PRE __stringify(api) ".ucode" -#define IWL_QU_B_JF_B_MODULE_FIRMWARE(api) \ - IWL_QU_B_JF_B_FW_PRE __stringify(api) ".ucode" +#define IWL_QUZ_A_HR_B_MODULE_FIRMWARE(api) \ + IWL_QUZ_A_HR_B_FW_PRE __stringify(api) ".ucode" #define IWL_QU_B_JF_B_MODULE_FIRMWARE(api) \ IWL_QU_B_JF_B_FW_PRE __stringify(api) ".ucode" #define IWL_QNJ_B_JF_B_MODULE_FIRMWARE(api) \ @@ -235,6 +236,18 @@ const struct iwl_cfg iwl_ax101_cfg_qu_hr = { .max_tx_agg_size = IEEE80211_MAX_AMPDU_BUF_HT, }; +const struct iwl_cfg iwl_ax101_cfg_quz_hr = { + .name = "Intel(R) Wi-Fi 6 AX101", + .fw_name_pre = IWL_QUZ_A_HR_B_FW_PRE, + IWL_DEVICE_22500, + /* + * This device doesn't support receiving BlockAck with a large bitmap + * so we need to restrict the size of transmitted aggregation to the + * HT size; mac80211 would otherwise pick the HE max (256) by default. + */ + .max_tx_agg_size = IEEE80211_MAX_AMPDU_BUF_HT, +}; + const struct iwl_cfg iwl_ax200_cfg_cc = { .name = "Intel(R) Wi-Fi 6 AX200 160MHz", .fw_name_pre = IWL_CC_A_FW_PRE, @@ -444,6 +457,7 @@ MODULE_FIRMWARE(IWL_22000_HR_B_QNJ_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_22000_HR_A0_QNJ_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_22000_SU_Z0_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_QU_B_JF_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); +MODULE_FIRMWARE(IWL_QUZ_A_HR_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_QNJ_B_JF_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_CC_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_22000_SO_A_JF_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index c91b537fa7ff..93070848280a 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -549,6 +549,7 @@ extern const struct iwl_cfg iwl22000_2ac_cfg_hr; extern const struct iwl_cfg iwl22000_2ac_cfg_hr_cdb; extern const struct iwl_cfg iwl22000_2ac_cfg_jf; extern const struct iwl_cfg iwl_ax101_cfg_qu_hr; +extern const struct iwl_cfg iwl_ax101_cfg_quz_hr; extern const struct iwl_cfg iwl22000_2ax_cfg_hr; extern const struct iwl_cfg iwl_ax200_cfg_cc; extern const struct iwl_cfg killer1650s_2ax_cfg_qu_b0_hr_b0; diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h index aea6d03e545a..e539bc94eff7 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h @@ -327,6 +327,7 @@ enum { #define CSR_HW_REV_TYPE_NONE (0x00001F0) #define CSR_HW_REV_TYPE_QNJ (0x0000360) #define CSR_HW_REV_TYPE_QNJ_B0 (0x0000364) +#define CSR_HW_REV_TYPE_QUZ (0x0000354) #define CSR_HW_REV_TYPE_HR_CDB (0x0000340) #define CSR_HW_REV_TYPE_SO (0x0000370) #define CSR_HW_REV_TYPE_TY (0x0000420) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 1d6f3053f233..79c1dc05f948 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -3543,6 +3543,10 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, } } else if (cfg == &iwl_ax101_cfg_qu_hr) { if (CSR_HW_RF_ID_TYPE_CHIP_ID(trans->hw_rf_id) == + CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_HR) && + trans->hw_rev == CSR_HW_REV_TYPE_QNJ_B0) { + trans->cfg = &iwl22000_2ax_cfg_qnj_hr_b0; + } else if (CSR_HW_RF_ID_TYPE_CHIP_ID(trans->hw_rf_id) == CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_HR)) { trans->cfg = &iwl_ax101_cfg_qu_hr; } else if (CSR_HW_RF_ID_TYPE_CHIP_ID(trans->hw_rf_id) == -- cgit v1.2.3 From d7262457e35dbe239659e62654e56f8ddb814bed Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 21 Mar 2019 13:38:49 +0100 Subject: perf/x86/intel: Initialize TFA MSR Stephane reported that the TFA MSR is not initialized by the kernel, but the TFA bit could set by firmware or as a leftover from a kexec, which makes the state inconsistent. Reported-by: Stephane Eranian Tested-by: Nelson DSouza Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Cc: tonyj@suse.com Link: https://lkml.kernel.org/r/20190321123849.GN6521@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 1539647ea39d..f61dcbef20ff 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3575,6 +3575,12 @@ static void intel_pmu_cpu_starting(int cpu) cpuc->lbr_sel = NULL; + if (x86_pmu.flags & PMU_FL_TFA) { + WARN_ON_ONCE(cpuc->tfa_shadow); + cpuc->tfa_shadow = ~0ULL; + intel_set_tfa(cpuc, false); + } + if (x86_pmu.version > 1) flip_smm_bit(&x86_pmu.attr_freeze_on_smi); -- cgit v1.2.3 From 914123fa39042e651d79eaf86bbf63a1b938dddf Mon Sep 17 00:00:00 2001 From: "Lendacky, Thomas" Date: Tue, 2 Apr 2019 15:21:14 +0000 Subject: x86/perf/amd: Resolve race condition when disabling PMC On AMD processors, the detection of an overflowed counter in the NMI handler relies on the current value of the counter. So, for example, to check for overflow on a 48 bit counter, bit 47 is checked to see if it is 1 (not overflowed) or 0 (overflowed). There is currently a race condition present when disabling and then updating the PMC. Increased NMI latency in newer AMD processors makes this race condition more pronounced. If the counter value has overflowed, it is possible to update the PMC value before the NMI handler can run. The updated PMC value is not an overflowed value, so when the perf NMI handler does run, it will not find an overflowed counter. This may appear as an unknown NMI resulting in either a panic or a series of messages, depending on how the kernel is configured. To eliminate this race condition, the PMC value must be checked after disabling the counter. Add an AMD function, amd_pmu_disable_all(), that will wait for the NMI handler to reset any active and overflowed counter after calling x86_pmu_disable_all(). Signed-off-by: Tom Lendacky Signed-off-by: Peter Zijlstra (Intel) Cc: # 4.14.x- Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: https://lkml.kernel.org/r/Message-ID: Signed-off-by: Ingo Molnar --- arch/x86/events/amd/core.c | 65 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 62 insertions(+), 3 deletions(-) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 7d2d7c801dba..c09ee88b0eed 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include "../perf_event.h" @@ -429,6 +430,64 @@ static void amd_pmu_cpu_dead(int cpu) } } +/* + * When a PMC counter overflows, an NMI is used to process the event and + * reset the counter. NMI latency can result in the counter being updated + * before the NMI can run, which can result in what appear to be spurious + * NMIs. This function is intended to wait for the NMI to run and reset + * the counter to avoid possible unhandled NMI messages. + */ +#define OVERFLOW_WAIT_COUNT 50 + +static void amd_pmu_wait_on_overflow(int idx) +{ + unsigned int i; + u64 counter; + + /* + * Wait for the counter to be reset if it has overflowed. This loop + * should exit very, very quickly, but just in case, don't wait + * forever... + */ + for (i = 0; i < OVERFLOW_WAIT_COUNT; i++) { + rdmsrl(x86_pmu_event_addr(idx), counter); + if (counter & (1ULL << (x86_pmu.cntval_bits - 1))) + break; + + /* Might be in IRQ context, so can't sleep */ + udelay(1); + } +} + +static void amd_pmu_disable_all(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int idx; + + x86_pmu_disable_all(); + + /* + * This shouldn't be called from NMI context, but add a safeguard here + * to return, since if we're in NMI context we can't wait for an NMI + * to reset an overflowed counter value. + */ + if (in_nmi()) + return; + + /* + * Check each counter for overflow and wait for it to be reset by the + * NMI if it has overflowed. This relies on the fact that all active + * counters are always enabled when this function is caled and + * ARCH_PERFMON_EVENTSEL_INT is always set. + */ + for (idx = 0; idx < x86_pmu.num_counters; idx++) { + if (!test_bit(idx, cpuc->active_mask)) + continue; + + amd_pmu_wait_on_overflow(idx); + } +} + static struct event_constraint * amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct perf_event *event) @@ -622,7 +681,7 @@ static ssize_t amd_event_sysfs_show(char *page, u64 config) static __initconst const struct x86_pmu amd_pmu = { .name = "AMD", .handle_irq = x86_pmu_handle_irq, - .disable_all = x86_pmu_disable_all, + .disable_all = amd_pmu_disable_all, .enable_all = x86_pmu_enable_all, .enable = x86_pmu_enable_event, .disable = x86_pmu_disable_event, @@ -732,7 +791,7 @@ void amd_pmu_enable_virt(void) cpuc->perf_ctr_virt_mask = 0; /* Reload all events */ - x86_pmu_disable_all(); + amd_pmu_disable_all(); x86_pmu_enable_all(0); } EXPORT_SYMBOL_GPL(amd_pmu_enable_virt); @@ -750,7 +809,7 @@ void amd_pmu_disable_virt(void) cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY; /* Reload all events */ - x86_pmu_disable_all(); + amd_pmu_disable_all(); x86_pmu_enable_all(0); } EXPORT_SYMBOL_GPL(amd_pmu_disable_virt); -- cgit v1.2.3 From 6d3edaae16c6c7d238360f2841212c2b26774d5e Mon Sep 17 00:00:00 2001 From: "Lendacky, Thomas" Date: Tue, 2 Apr 2019 15:21:16 +0000 Subject: x86/perf/amd: Resolve NMI latency issues for active PMCs On AMD processors, the detection of an overflowed PMC counter in the NMI handler relies on the current value of the PMC. So, for example, to check for overflow on a 48-bit counter, bit 47 is checked to see if it is 1 (not overflowed) or 0 (overflowed). When the perf NMI handler executes it does not know in advance which PMC counters have overflowed. As such, the NMI handler will process all active PMC counters that have overflowed. NMI latency in newer AMD processors can result in multiple overflowed PMC counters being processed in one NMI and then a subsequent NMI, that does not appear to be a back-to-back NMI, not finding any PMC counters that have overflowed. This may appear to be an unhandled NMI resulting in either a panic or a series of messages, depending on how the kernel was configured. To mitigate this issue, add an AMD handle_irq callback function, amd_pmu_handle_irq(), that will invoke the common x86_pmu_handle_irq() function and upon return perform some additional processing that will indicate if the NMI has been handled or would have been handled had an earlier NMI not handled the overflowed PMC. Using a per-CPU variable, a minimum value of the number of active PMCs or 2 will be set whenever a PMC is active. This is used to indicate the possible number of NMIs that can still occur. The value of 2 is used for when an NMI does not arrive at the LAPIC in time to be collapsed into an already pending NMI. Each time the function is called without having handled an overflowed counter, the per-CPU value is checked. If the value is non-zero, it is decremented and the NMI indicates that it handled the NMI. If the value is zero, then the NMI indicates that it did not handle the NMI. Signed-off-by: Tom Lendacky Signed-off-by: Peter Zijlstra (Intel) Cc: # 4.14.x- Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: https://lkml.kernel.org/r/Message-ID: Signed-off-by: Ingo Molnar --- arch/x86/events/amd/core.c | 56 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index c09ee88b0eed..34c191453ce3 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -4,10 +4,13 @@ #include #include #include +#include #include #include "../perf_event.h" +static DEFINE_PER_CPU(unsigned int, perf_nmi_counter); + static __initconst const u64 amd_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] @@ -488,6 +491,57 @@ static void amd_pmu_disable_all(void) } } +/* + * Because of NMI latency, if multiple PMC counters are active or other sources + * of NMIs are received, the perf NMI handler can handle one or more overflowed + * PMC counters outside of the NMI associated with the PMC overflow. If the NMI + * doesn't arrive at the LAPIC in time to become a pending NMI, then the kernel + * back-to-back NMI support won't be active. This PMC handler needs to take into + * account that this can occur, otherwise this could result in unknown NMI + * messages being issued. Examples of this is PMC overflow while in the NMI + * handler when multiple PMCs are active or PMC overflow while handling some + * other source of an NMI. + * + * Attempt to mitigate this by using the number of active PMCs to determine + * whether to return NMI_HANDLED if the perf NMI handler did not handle/reset + * any PMCs. The per-CPU perf_nmi_counter variable is set to a minimum of the + * number of active PMCs or 2. The value of 2 is used in case an NMI does not + * arrive at the LAPIC in time to be collapsed into an already pending NMI. + */ +static int amd_pmu_handle_irq(struct pt_regs *regs) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int active, handled; + + /* + * Obtain the active count before calling x86_pmu_handle_irq() since + * it is possible that x86_pmu_handle_irq() may make a counter + * inactive (through x86_pmu_stop). + */ + active = __bitmap_weight(cpuc->active_mask, X86_PMC_IDX_MAX); + + /* Process any counter overflows */ + handled = x86_pmu_handle_irq(regs); + + /* + * If a counter was handled, record the number of possible remaining + * NMIs that can occur. + */ + if (handled) { + this_cpu_write(perf_nmi_counter, + min_t(unsigned int, 2, active)); + + return handled; + } + + if (!this_cpu_read(perf_nmi_counter)) + return NMI_DONE; + + this_cpu_dec(perf_nmi_counter); + + return NMI_HANDLED; +} + static struct event_constraint * amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct perf_event *event) @@ -680,7 +734,7 @@ static ssize_t amd_event_sysfs_show(char *page, u64 config) static __initconst const struct x86_pmu amd_pmu = { .name = "AMD", - .handle_irq = x86_pmu_handle_irq, + .handle_irq = amd_pmu_handle_irq, .disable_all = amd_pmu_disable_all, .enable_all = x86_pmu_enable_all, .enable = x86_pmu_enable_event, -- cgit v1.2.3 From a0fe2c6479aab5723239b315ef1b552673f434a3 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 29 Mar 2019 22:46:49 +0100 Subject: linux/kernel.h: Use parentheses around argument in u64_to_user_ptr() Use parentheses around uses of the argument in u64_to_user_ptr() to ensure that the cast doesn't apply to part of the argument. There are existing uses of the macro of the form u64_to_user_ptr(A + B) which expands to (void __user *)(uintptr_t)A + B (the cast applies to the first operand of the addition, the addition is a pointer addition). This happens to still work as intended, the semantic difference doesn't cause a difference in behavior. But I want to use u64_to_user_ptr() with a ternary operator in the argument, like so: u64_to_user_ptr(A ? B : C) This currently doesn't work as intended. Signed-off-by: Jann Horn Signed-off-by: Borislav Petkov Reviewed-by: Mukesh Ojha Cc: Andrei Vagin Cc: Andrew Morton Cc: Dan Carpenter Cc: Greg Kroah-Hartman Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jani Nikula Cc: Kees Cook Cc: Masahiro Yamada Cc: NeilBrown Cc: Peter Zijlstra Cc: Qiaowei Ren Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/20190329214652.258477-1-jannh@google.com --- include/linux/kernel.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 34a5036debd3..2d14e21c16c0 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -47,8 +47,8 @@ #define u64_to_user_ptr(x) ( \ { \ - typecheck(u64, x); \ - (void __user *)(uintptr_t)x; \ + typecheck(u64, (x)); \ + (void __user *)(uintptr_t)(x); \ } \ ) -- cgit v1.2.3 From 8983eb602af511fc5822f5ff4a82074c68816fd9 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Wed, 3 Apr 2019 15:31:49 +0800 Subject: ALSA: hda/realtek - Move to ACT_INIT state It will be lose Mic JD state when Chrome OS boot and headset was plugged. Just Implement of reset combo jack JD verb for ACT_PRE_PROBE state. Intel test result was also failed. It test passed until changed the initial state to ACT_INIT. Mic JD will show every time. This patch also changed the model name as 'alc-chrome-book' for application of Chrome OS. Fixes: 10f5b1b85ed1 ("ALSA: hda/realtek - Fixed Headset Mic JD not stable") Signed-off-by: Kailang Yang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 715ab2342151..c235b26add07 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5491,7 +5491,7 @@ static void alc_headset_btn_callback(struct hda_codec *codec, jack->jack->button_state = report; } -static void alc295_fixup_chromebook(struct hda_codec *codec, +static void alc_fixup_headset_jack(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -5501,16 +5501,6 @@ static void alc295_fixup_chromebook(struct hda_codec *codec, alc_headset_btn_callback); snd_hda_jack_add_kctl(codec, 0x55, "Headset Jack", false, SND_JACK_HEADSET, alc_headset_btn_keymap); - switch (codec->core.vendor_id) { - case 0x10ec0295: - alc_update_coef_idx(codec, 0x4a, 0x8000, 1 << 15); /* Reset HP JD */ - alc_update_coef_idx(codec, 0x4a, 0x8000, 0 << 15); - break; - case 0x10ec0236: - alc_update_coef_idx(codec, 0x1b, 0x8000, 1 << 15); /* Reset HP JD */ - alc_update_coef_idx(codec, 0x1b, 0x8000, 0 << 15); - break; - } break; case HDA_FIXUP_ACT_INIT: switch (codec->core.vendor_id) { @@ -5531,6 +5521,25 @@ static void alc295_fixup_chromebook(struct hda_codec *codec, } } +static void alc295_fixup_chromebook(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + switch (action) { + case HDA_FIXUP_ACT_INIT: + switch (codec->core.vendor_id) { + case 0x10ec0295: + alc_update_coef_idx(codec, 0x4a, 0x8000, 1 << 15); /* Reset HP JD */ + alc_update_coef_idx(codec, 0x4a, 0x8000, 0 << 15); + break; + case 0x10ec0236: + alc_update_coef_idx(codec, 0x1b, 0x8000, 1 << 15); /* Reset HP JD */ + alc_update_coef_idx(codec, 0x1b, 0x8000, 0 << 15); + break; + } + break; + } +} + static void alc_fixup_disable_mic_vref(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -5685,6 +5694,7 @@ enum { ALC285_FIXUP_LENOVO_PC_BEEP_IN_NOISE, ALC255_FIXUP_ACER_HEADSET_MIC, ALC295_FIXUP_CHROME_BOOK, + ALC225_FIXUP_HEADSET_JACK, ALC225_FIXUP_DELL_WYSE_AIO_MIC_NO_PRESENCE, ALC225_FIXUP_WYSE_AUTO_MUTE, ALC225_FIXUP_WYSE_DISABLE_MIC_VREF, @@ -6646,6 +6656,12 @@ static const struct hda_fixup alc269_fixups[] = { [ALC295_FIXUP_CHROME_BOOK] = { .type = HDA_FIXUP_FUNC, .v.func = alc295_fixup_chromebook, + .chained = true, + .chain_id = ALC225_FIXUP_HEADSET_JACK + }, + [ALC225_FIXUP_HEADSET_JACK] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_headset_jack, }, [ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE] = { .type = HDA_FIXUP_PINS, @@ -7144,7 +7160,8 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC255_FIXUP_DUMMY_LINEOUT_VERB, .name = "alc255-dummy-lineout"}, {.id = ALC255_FIXUP_DELL_HEADSET_MIC, .name = "alc255-dell-headset"}, {.id = ALC295_FIXUP_HP_X360, .name = "alc295-hp-x360"}, - {.id = ALC295_FIXUP_CHROME_BOOK, .name = "alc-sense-combo"}, + {.id = ALC225_FIXUP_HEADSET_JACK, .name = "alc-headset-jack"}, + {.id = ALC295_FIXUP_CHROME_BOOK, .name = "alc-chrome-book"}, {.id = ALC299_FIXUP_PREDATOR_SPK, .name = "predator-spk"}, {} }; -- cgit v1.2.3 From 80690a276f444a68a332136d98bfea1c338bc263 Mon Sep 17 00:00:00 2001 From: Richard Sailer Date: Tue, 2 Apr 2019 15:52:04 +0200 Subject: ALSA: hda/realtek - Add quirk for Tuxedo XC 1509 This adds a SND_PCI_QUIRK(...) line for the Tuxedo XC 1509. The Tuxedo XC 1509 and the System76 oryp5 are the same barebone notebooks manufactured by Clevo. To name the fixups both use after the actual underlying hardware, this patch also changes System76_orpy5 to clevo_pb51ed in 2 enum symbols and one function name, matching the other pci_quirk entries which are also named after the device ODM. Fixes: 7f665b1c3283 ("ALSA: hda/realtek - Headset microphone and internal speaker support for System76 oryp5") Signed-off-by: Richard Sailer Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index c235b26add07..810479766090 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -1864,8 +1864,8 @@ enum { ALC887_FIXUP_BASS_CHMAP, ALC1220_FIXUP_GB_DUAL_CODECS, ALC1220_FIXUP_CLEVO_P950, - ALC1220_FIXUP_SYSTEM76_ORYP5, - ALC1220_FIXUP_SYSTEM76_ORYP5_PINS, + ALC1220_FIXUP_CLEVO_PB51ED, + ALC1220_FIXUP_CLEVO_PB51ED_PINS, }; static void alc889_fixup_coef(struct hda_codec *codec, @@ -2070,7 +2070,7 @@ static void alc1220_fixup_clevo_p950(struct hda_codec *codec, static void alc_fixup_headset_mode_no_hp_mic(struct hda_codec *codec, const struct hda_fixup *fix, int action); -static void alc1220_fixup_system76_oryp5(struct hda_codec *codec, +static void alc1220_fixup_clevo_pb51ed(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -2322,18 +2322,18 @@ static const struct hda_fixup alc882_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc1220_fixup_clevo_p950, }, - [ALC1220_FIXUP_SYSTEM76_ORYP5] = { + [ALC1220_FIXUP_CLEVO_PB51ED] = { .type = HDA_FIXUP_FUNC, - .v.func = alc1220_fixup_system76_oryp5, + .v.func = alc1220_fixup_clevo_pb51ed, }, - [ALC1220_FIXUP_SYSTEM76_ORYP5_PINS] = { + [ALC1220_FIXUP_CLEVO_PB51ED_PINS] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { { 0x19, 0x01a1913c }, /* use as headset mic, without its own jack detect */ {} }, .chained = true, - .chain_id = ALC1220_FIXUP_SYSTEM76_ORYP5, + .chain_id = ALC1220_FIXUP_CLEVO_PB51ED, }, }; @@ -2411,8 +2411,9 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x9501, "Clevo P950HR", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1558, 0x95e1, "Clevo P95xER", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1558, 0x95e2, "Clevo P950ER", ALC1220_FIXUP_CLEVO_P950), - SND_PCI_QUIRK(0x1558, 0x96e1, "System76 Oryx Pro (oryp5)", ALC1220_FIXUP_SYSTEM76_ORYP5_PINS), - SND_PCI_QUIRK(0x1558, 0x97e1, "System76 Oryx Pro (oryp5)", ALC1220_FIXUP_SYSTEM76_ORYP5_PINS), + SND_PCI_QUIRK(0x1558, 0x96e1, "System76 Oryx Pro (oryp5)", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x97e1, "System76 Oryx Pro (oryp5)", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x65d1, "Tuxedo Book XC1509", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD), SND_PCI_QUIRK(0x161f, 0x2054, "Medion laptop", ALC883_FIXUP_EAPD), SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Y530", ALC882_FIXUP_LENOVO_Y530), -- cgit v1.2.3 From ce856634af8cda3490947df8ac1ef5843e6356af Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 2 Apr 2019 09:57:13 -0700 Subject: HID: input: add mapping for Assistant key According to HUTRR89 usage 0x1cb from the consumer page was assigned to allow launching desktop-aware assistant application, so let's add the mapping. Signed-off-by: Dmitry Torokhov Signed-off-by: Jiri Kosina --- drivers/hid/hid-input.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index b10b1922c5bd..1fce0076e7dc 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -998,6 +998,7 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel case 0x1b8: map_key_clear(KEY_VIDEO); break; case 0x1bc: map_key_clear(KEY_MESSENGER); break; case 0x1bd: map_key_clear(KEY_INFO); break; + case 0x1cb: map_key_clear(KEY_ASSISTANT); break; case 0x201: map_key_clear(KEY_NEW); break; case 0x202: map_key_clear(KEY_OPEN); break; case 0x203: map_key_clear(KEY_CLOSE); break; -- cgit v1.2.3 From 2c3af7d901c61c101c02f431cfb520af9ff56ab4 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 1 Apr 2019 13:57:30 -0700 Subject: selftests/bpf: fix vlan handling in flow dissector program When we tail call PROG(VLAN) from parse_eth_proto we don't need to peek back to handle vlan proto because we didn't adjust nhoff/thoff yet. Use flow_keys->n_proto, that we set in parse_eth_proto instead and properly increment nhoff as well. Also, always use skb->protocol and don't look at skb->vlan_present. skb->vlan_present indicates that vlan information is stored out-of-band in skb->vlan_{tci,proto} and vlan header is already pulled from skb. That means, skb->vlan_present == true is not relevant for BPF flow dissector. Add simple test cases with VLAN tagged frames: * single vlan for ipv4 * double vlan for ipv6 Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- .../selftests/bpf/prog_tests/flow_dissector.c | 68 ++++++++++++++++++++++ tools/testing/selftests/bpf/progs/bpf_flow.c | 15 ++--- 2 files changed, 72 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index bcbd928c96ab..fc818bc1d729 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -39,6 +39,58 @@ static struct bpf_flow_keys pkt_v6_flow_keys = { .n_proto = __bpf_constant_htons(ETH_P_IPV6), }; +#define VLAN_HLEN 4 + +static struct { + struct ethhdr eth; + __u16 vlan_tci; + __u16 vlan_proto; + struct iphdr iph; + struct tcphdr tcp; +} __packed pkt_vlan_v4 = { + .eth.h_proto = __bpf_constant_htons(ETH_P_8021Q), + .vlan_proto = __bpf_constant_htons(ETH_P_IP), + .iph.ihl = 5, + .iph.protocol = IPPROTO_TCP, + .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES), + .tcp.urg_ptr = 123, + .tcp.doff = 5, +}; + +static struct bpf_flow_keys pkt_vlan_v4_flow_keys = { + .nhoff = VLAN_HLEN, + .thoff = VLAN_HLEN + sizeof(struct iphdr), + .addr_proto = ETH_P_IP, + .ip_proto = IPPROTO_TCP, + .n_proto = __bpf_constant_htons(ETH_P_IP), +}; + +static struct { + struct ethhdr eth; + __u16 vlan_tci; + __u16 vlan_proto; + __u16 vlan_tci2; + __u16 vlan_proto2; + struct ipv6hdr iph; + struct tcphdr tcp; +} __packed pkt_vlan_v6 = { + .eth.h_proto = __bpf_constant_htons(ETH_P_8021AD), + .vlan_proto = __bpf_constant_htons(ETH_P_8021Q), + .vlan_proto2 = __bpf_constant_htons(ETH_P_IPV6), + .iph.nexthdr = IPPROTO_TCP, + .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES), + .tcp.urg_ptr = 123, + .tcp.doff = 5, +}; + +static struct bpf_flow_keys pkt_vlan_v6_flow_keys = { + .nhoff = VLAN_HLEN * 2, + .thoff = VLAN_HLEN * 2 + sizeof(struct ipv6hdr), + .addr_proto = ETH_P_IPV6, + .ip_proto = IPPROTO_TCP, + .n_proto = __bpf_constant_htons(ETH_P_IPV6), +}; + void test_flow_dissector(void) { struct bpf_flow_keys flow_keys; @@ -68,5 +120,21 @@ void test_flow_dissector(void) err, errno, retval, duration, size, sizeof(flow_keys)); CHECK_FLOW_KEYS("ipv6_flow_keys", flow_keys, pkt_v6_flow_keys); + err = bpf_prog_test_run(prog_fd, 10, &pkt_vlan_v4, sizeof(pkt_vlan_v4), + &flow_keys, &size, &retval, &duration); + CHECK(size != sizeof(flow_keys) || err || retval != 1, "vlan_ipv4", + "err %d errno %d retval %d duration %d size %u/%lu\n", + err, errno, retval, duration, size, sizeof(flow_keys)); + CHECK_FLOW_KEYS("vlan_ipv4_flow_keys", flow_keys, + pkt_vlan_v4_flow_keys); + + err = bpf_prog_test_run(prog_fd, 10, &pkt_vlan_v6, sizeof(pkt_vlan_v6), + &flow_keys, &size, &retval, &duration); + CHECK(size != sizeof(flow_keys) || err || retval != 1, "vlan_ipv6", + "err %d errno %d retval %d duration %d size %u/%lu\n", + err, errno, retval, duration, size, sizeof(flow_keys)); + CHECK_FLOW_KEYS("vlan_ipv6_flow_keys", flow_keys, + pkt_vlan_v6_flow_keys); + bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c index 284660f5aa95..f177c7a6a6c7 100644 --- a/tools/testing/selftests/bpf/progs/bpf_flow.c +++ b/tools/testing/selftests/bpf/progs/bpf_flow.c @@ -119,10 +119,7 @@ static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto) SEC("flow_dissector") int _dissect(struct __sk_buff *skb) { - if (!skb->vlan_present) - return parse_eth_proto(skb, skb->protocol); - else - return parse_eth_proto(skb, skb->vlan_proto); + return parse_eth_proto(skb, skb->protocol); } /* Parses on IPPROTO_* */ @@ -336,15 +333,9 @@ PROG(VLAN)(struct __sk_buff *skb) { struct bpf_flow_keys *keys = skb->flow_keys; struct vlan_hdr *vlan, _vlan; - __be16 proto; - - /* Peek back to see if single or double-tagging */ - if (bpf_skb_load_bytes(skb, keys->thoff - sizeof(proto), &proto, - sizeof(proto))) - return BPF_DROP; /* Account for double-tagging */ - if (proto == bpf_htons(ETH_P_8021AD)) { + if (keys->n_proto == bpf_htons(ETH_P_8021AD)) { vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan); if (!vlan) return BPF_DROP; @@ -352,6 +343,7 @@ PROG(VLAN)(struct __sk_buff *skb) if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q)) return BPF_DROP; + keys->nhoff += sizeof(*vlan); keys->thoff += sizeof(*vlan); } @@ -359,6 +351,7 @@ PROG(VLAN)(struct __sk_buff *skb) if (!vlan) return BPF_DROP; + keys->nhoff += sizeof(*vlan); keys->thoff += sizeof(*vlan); /* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/ if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) || -- cgit v1.2.3 From 822fe61795018265ae14731d4e5399e5bde36864 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 1 Apr 2019 13:57:31 -0700 Subject: net/flow_dissector: pass flow_keys->n_proto to BPF programs This is a preparation for the next commit that would prohibit access to the most fields of __sk_buff from the BPF programs. Instead of requiring BPF flow dissector programs to look into skb, pass all input data in the flow_keys. Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- net/core/flow_dissector.c | 1 + tools/testing/selftests/bpf/progs/bpf_flow.c | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index bb1a54747d64..9b84250039df 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -707,6 +707,7 @@ bool __skb_flow_bpf_dissect(struct bpf_prog *prog, /* Pass parameters to the BPF program */ memset(flow_keys, 0, sizeof(*flow_keys)); cb->qdisc_cb.flow_keys = flow_keys; + flow_keys->n_proto = skb->protocol; flow_keys->nhoff = skb_network_offset(skb); flow_keys->thoff = flow_keys->nhoff; diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c index f177c7a6a6c7..75b17cada539 100644 --- a/tools/testing/selftests/bpf/progs/bpf_flow.c +++ b/tools/testing/selftests/bpf/progs/bpf_flow.c @@ -92,7 +92,6 @@ static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto) { struct bpf_flow_keys *keys = skb->flow_keys; - keys->n_proto = proto; switch (proto) { case bpf_htons(ETH_P_IP): bpf_tail_call(skb, &jmp_table, IP); @@ -119,7 +118,9 @@ static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto) SEC("flow_dissector") int _dissect(struct __sk_buff *skb) { - return parse_eth_proto(skb, skb->protocol); + struct bpf_flow_keys *keys = skb->flow_keys; + + return parse_eth_proto(skb, keys->n_proto); } /* Parses on IPPROTO_* */ @@ -358,6 +359,7 @@ PROG(VLAN)(struct __sk_buff *skb) vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q)) return BPF_DROP; + keys->n_proto = vlan->h_vlan_encapsulated_proto; return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto); } -- cgit v1.2.3 From b9e9c8599f0f23e3d2051befc9966a84b639f64f Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 1 Apr 2019 13:57:32 -0700 Subject: flow_dissector: fix clamping of BPF flow_keys for non-zero nhoff Don't allow BPF program to set flow_keys->nhoff to less than initial value. We currently don't read the value afterwards in anything but the tests, but it's still a good practice to return consistent values to the test programs. Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- net/core/flow_dissector.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 9b84250039df..94a450b2191a 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -717,7 +717,8 @@ bool __skb_flow_bpf_dissect(struct bpf_prog *prog, /* Restore state */ memcpy(cb, &cb_saved, sizeof(cb_saved)); - flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, 0, skb->len); + flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, + skb_network_offset(skb), skb->len); flow_keys->thoff = clamp_t(u16, flow_keys->thoff, flow_keys->nhoff, skb->len); -- cgit v1.2.3 From 2ee7fba0d62d638d8b6dbe30cada3a531ec042af Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 1 Apr 2019 13:57:33 -0700 Subject: flow_dissector: allow access only to a subset of __sk_buff fields Use whitelist instead of a blacklist and allow only a small set of fields that might be relevant in the context of flow dissector: * data * data_end * flow_keys This is required for the eth_get_headlen case where we have only a chunk of data to dissect (i.e. trying to read the other skb fields doesn't make sense). Note, that it is a breaking API change! However, we've provided flow_keys->n_proto as a substitute for skb->protocol; and there is no need to manually handle skb->vlan_present. So even if we break somebody, the migration is trivial. Unfortunately, we can't support eth_get_headlen use-case without those breaking changes. Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- net/core/filter.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index 647c63a7b25b..fc92ebc4e200 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6613,14 +6613,8 @@ static bool flow_dissector_is_valid_access(int off, int size, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { - if (type == BPF_WRITE) { - switch (off) { - case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): - break; - default: - return false; - } - } + if (type == BPF_WRITE) + return false; switch (off) { case bpf_ctx_range(struct __sk_buff, data): @@ -6632,11 +6626,7 @@ static bool flow_dissector_is_valid_access(int off, int size, case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): info->reg_type = PTR_TO_FLOW_KEYS; break; - case bpf_ctx_range(struct __sk_buff, tc_classid): - case bpf_ctx_range(struct __sk_buff, data_meta): - case bpf_ctx_range_till(struct __sk_buff, family, local_port): - case bpf_ctx_range(struct __sk_buff, tstamp): - case bpf_ctx_range(struct __sk_buff, wire_len): + default: return false; } -- cgit v1.2.3 From ae82899bbe92a7777ded9a562ee602dd5917bcd8 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 1 Apr 2019 13:57:34 -0700 Subject: flow_dissector: document BPF flow dissector environment Short doc on what BPF flow dissector should expect in the input __sk_buff and flow_keys. Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- Documentation/networking/bpf_flow_dissector.txt | 115 ++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 Documentation/networking/bpf_flow_dissector.txt diff --git a/Documentation/networking/bpf_flow_dissector.txt b/Documentation/networking/bpf_flow_dissector.txt new file mode 100644 index 000000000000..70f66a2d57e7 --- /dev/null +++ b/Documentation/networking/bpf_flow_dissector.txt @@ -0,0 +1,115 @@ +================== +BPF Flow Dissector +================== + +Overview +======== + +Flow dissector is a routine that parses metadata out of the packets. It's +used in the various places in the networking subsystem (RFS, flow hash, etc). + +BPF flow dissector is an attempt to reimplement C-based flow dissector logic +in BPF to gain all the benefits of BPF verifier (namely, limits on the +number of instructions and tail calls). + +API +=== + +BPF flow dissector programs operate on an __sk_buff. However, only the +limited set of fields is allowed: data, data_end and flow_keys. flow_keys +is 'struct bpf_flow_keys' and contains flow dissector input and +output arguments. + +The inputs are: + * nhoff - initial offset of the networking header + * thoff - initial offset of the transport header, initialized to nhoff + * n_proto - L3 protocol type, parsed out of L2 header + +Flow dissector BPF program should fill out the rest of the 'struct +bpf_flow_keys' fields. Input arguments nhoff/thoff/n_proto should be also +adjusted accordingly. + +The return code of the BPF program is either BPF_OK to indicate successful +dissection, or BPF_DROP to indicate parsing error. + +__sk_buff->data +=============== + +In the VLAN-less case, this is what the initial state of the BPF flow +dissector looks like: ++------+------+------------+-----------+ +| DMAC | SMAC | ETHER_TYPE | L3_HEADER | ++------+------+------------+-----------+ + ^ + | + +-- flow dissector starts here + +skb->data + flow_keys->nhoff point to the first byte of L3_HEADER. +flow_keys->thoff = nhoff +flow_keys->n_proto = ETHER_TYPE + + +In case of VLAN, flow dissector can be called with the two different states. + +Pre-VLAN parsing: ++------+------+------+-----+-----------+-----------+ +| DMAC | SMAC | TPID | TCI |ETHER_TYPE | L3_HEADER | ++------+------+------+-----+-----------+-----------+ + ^ + | + +-- flow dissector starts here + +skb->data + flow_keys->nhoff point the to first byte of TCI. +flow_keys->thoff = nhoff +flow_keys->n_proto = TPID + +Please note that TPID can be 802.1AD and, hence, BPF program would +have to parse VLAN information twice for double tagged packets. + + +Post-VLAN parsing: ++------+------+------+-----+-----------+-----------+ +| DMAC | SMAC | TPID | TCI |ETHER_TYPE | L3_HEADER | ++------+------+------+-----+-----------+-----------+ + ^ + | + +-- flow dissector starts here + +skb->data + flow_keys->nhoff point the to first byte of L3_HEADER. +flow_keys->thoff = nhoff +flow_keys->n_proto = ETHER_TYPE + +In this case VLAN information has been processed before the flow dissector +and BPF flow dissector is not required to handle it. + + +The takeaway here is as follows: BPF flow dissector program can be called with +the optional VLAN header and should gracefully handle both cases: when single +or double VLAN is present and when it is not present. The same program +can be called for both cases and would have to be written carefully to +handle both cases. + + +Reference Implementation +======================== + +See tools/testing/selftests/bpf/progs/bpf_flow.c for the reference +implementation and tools/testing/selftests/bpf/flow_dissector_load.[hc] for +the loader. bpftool can be used to load BPF flow dissector program as well. + +The reference implementation is organized as follows: +* jmp_table map that contains sub-programs for each supported L3 protocol +* _dissect routine - entry point; it does input n_proto parsing and does + bpf_tail_call to the appropriate L3 handler + +Since BPF at this point doesn't support looping (or any jumping back), +jmp_table is used instead to handle multiple levels of encapsulation (and +IPv6 options). + + +Current Limitations +=================== +BPF flow dissector doesn't support exporting all the metadata that in-kernel +C-based implementation can export. Notable example is single VLAN (802.1Q) +and double VLAN (802.1AD) tags. Please refer to the 'struct bpf_flow_keys' +for a set of information that's currently can be exported from the BPF context. -- cgit v1.2.3 From 25adf50fe25d506d3fc12070a5ff4be858a1ac1b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 3 Apr 2019 09:52:40 -0600 Subject: io_uring: fix double free in case of fileset regitration failure Will Deacon reported the following KASAN complaint: [ 149.890370] ================================================================== [ 149.891266] BUG: KASAN: double-free or invalid-free in io_sqe_files_unregister+0xa8/0x140 [ 149.892218] [ 149.892411] CPU: 113 PID: 3974 Comm: io_uring_regist Tainted: G B 5.1.0-rc3-00012-g40b114779944 #3 [ 149.893623] Hardware name: linux,dummy-virt (DT) [ 149.894169] Call trace: [ 149.894539] dump_backtrace+0x0/0x228 [ 149.895172] show_stack+0x14/0x20 [ 149.895747] dump_stack+0xe8/0x124 [ 149.896335] print_address_description+0x60/0x258 [ 149.897148] kasan_report_invalid_free+0x78/0xb8 [ 149.897936] __kasan_slab_free+0x1fc/0x228 [ 149.898641] kasan_slab_free+0x10/0x18 [ 149.899283] kfree+0x70/0x1f8 [ 149.899798] io_sqe_files_unregister+0xa8/0x140 [ 149.900574] io_ring_ctx_wait_and_kill+0x190/0x3c0 [ 149.901402] io_uring_release+0x2c/0x48 [ 149.902068] __fput+0x18c/0x510 [ 149.902612] ____fput+0xc/0x18 [ 149.903146] task_work_run+0xf0/0x148 [ 149.903778] do_notify_resume+0x554/0x748 [ 149.904467] work_pending+0x8/0x10 [ 149.905060] [ 149.905331] Allocated by task 3974: [ 149.905934] __kasan_kmalloc.isra.0.part.1+0x48/0xf8 [ 149.906786] __kasan_kmalloc.isra.0+0xb8/0xd8 [ 149.907531] kasan_kmalloc+0xc/0x18 [ 149.908134] __kmalloc+0x168/0x248 [ 149.908724] __arm64_sys_io_uring_register+0x2b8/0x15a8 [ 149.909622] el0_svc_common+0x100/0x258 [ 149.910281] el0_svc_handler+0x48/0xc0 [ 149.910928] el0_svc+0x8/0xc [ 149.911425] [ 149.911696] Freed by task 3974: [ 149.912242] __kasan_slab_free+0x114/0x228 [ 149.912955] kasan_slab_free+0x10/0x18 [ 149.913602] kfree+0x70/0x1f8 [ 149.914118] __arm64_sys_io_uring_register+0xc2c/0x15a8 [ 149.915009] el0_svc_common+0x100/0x258 [ 149.915670] el0_svc_handler+0x48/0xc0 [ 149.916317] el0_svc+0x8/0xc [ 149.916817] [ 149.917101] The buggy address belongs to the object at ffff8004ce07ed00 [ 149.917101] which belongs to the cache kmalloc-128 of size 128 [ 149.919197] The buggy address is located 0 bytes inside of [ 149.919197] 128-byte region [ffff8004ce07ed00, ffff8004ce07ed80) [ 149.921142] The buggy address belongs to the page: [ 149.921953] page:ffff7e0013381f00 count:1 mapcount:0 mapping:ffff800503417c00 index:0x0 compound_mapcount: 0 [ 149.923595] flags: 0x1ffff00000010200(slab|head) [ 149.924388] raw: 1ffff00000010200 dead000000000100 dead000000000200 ffff800503417c00 [ 149.925706] raw: 0000000000000000 0000000080400040 00000001ffffffff 0000000000000000 [ 149.927011] page dumped because: kasan: bad access detected [ 149.927956] [ 149.928224] Memory state around the buggy address: [ 149.929054] ffff8004ce07ec00: 00 00 00 00 00 00 00 00 fc fc fc fc fc fc fc fc [ 149.930274] ffff8004ce07ec80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 149.931494] >ffff8004ce07ed00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 149.932712] ^ [ 149.933281] ffff8004ce07ed80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 149.934508] ffff8004ce07ee00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 149.935725] ================================================================== which is due to a failure in registrering a fileset. This frees the ctx->user_files pointer, but doesn't clear it. When the io_uring instance is later freed through the normal channels, we free this pointer again. At this point it's invalid. Ensure we clear the pointer when we free it for the error case. Reported-by: Will Deacon Tested-by: Will Deacon Signed-off-by: Jens Axboe --- fs/io_uring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index bbdbd56cf2ac..07d6ef195d05 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2215,6 +2215,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, fput(ctx->user_files[i]); kfree(ctx->user_files); + ctx->user_files = NULL; ctx->nr_user_files = 0; return ret; } -- cgit v1.2.3 From 58ccd2d31e502c37e108b285bf3d343eb00c235b Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 3 Apr 2019 11:37:07 +0800 Subject: paride/pf: Fix potential NULL pointer dereference Syzkaller report this: pf: pf version 1.04, major 47, cluster 64, nice 0 pf: No ATAPI disk detected kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN PTI CPU: 0 PID: 9887 Comm: syz-executor.0 Tainted: G C 5.1.0-rc3+ #8 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 RIP: 0010:pf_init+0x7af/0x1000 [pf] Code: 46 77 d2 48 89 d8 48 c1 e8 03 80 3c 28 00 74 08 48 89 df e8 03 25 a6 d2 4c 8b 23 49 8d bc 24 80 05 00 00 48 89 f8 48 c1 e8 03 <80> 3c 28 00 74 05 e8 e6 24 a6 d2 49 8b bc 24 80 05 00 00 e8 79 34 RSP: 0018:ffff8881abcbf998 EFLAGS: 00010202 RAX: 00000000000000b0 RBX: ffffffffc1e4a8a8 RCX: ffffffffaec50788 RDX: 0000000000039b10 RSI: ffffc9000153c000 RDI: 0000000000000580 RBP: dffffc0000000000 R08: ffffed103ee44e59 R09: ffffed103ee44e59 R10: 0000000000000001 R11: ffffed103ee44e58 R12: 0000000000000000 R13: ffffffffc1e4b028 R14: 0000000000000000 R15: 0000000000000020 FS: 00007f1b78a91700(0000) GS:ffff8881f7200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f6d72b207f8 CR3: 00000001d5790004 CR4: 00000000007606f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? 0xffffffffc1e50000 do_one_initcall+0xbc/0x47d init/main.c:901 do_init_module+0x1b5/0x547 kernel/module.c:3456 load_module+0x6405/0x8c10 kernel/module.c:3804 __do_sys_finit_module+0x162/0x190 kernel/module.c:3898 do_syscall_64+0x9f/0x450 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x462e99 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f1b78a90c58 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 RAX: ffffffffffffffda RBX: 000000000073bf00 RCX: 0000000000462e99 RDX: 0000000000000000 RSI: 0000000020000180 RDI: 0000000000000003 RBP: 00007f1b78a90c70 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f1b78a916bc R13: 00000000004bcefa R14: 00000000006f6fb0 R15: 0000000000000004 Modules linked in: pf(+) paride gpio_tps65218 tps65218 i2c_cht_wc ati_remote dc395x act_meta_skbtcindex act_ife ife ecdh_generic rc_xbox_dvd sky81452_regulator v4l2_fwnode leds_blinkm snd_usb_hiface comedi(C) aes_ti slhc cfi_cmdset_0020 mtd cfi_util sx8654 mdio_gpio of_mdio fixed_phy mdio_bitbang libphy alcor_pci matrix_keymap hid_uclogic usbhid scsi_transport_fc videobuf2_v4l2 videobuf2_dma_sg snd_soc_pcm179x_spi snd_soc_pcm179x_codec i2c_demux_pinctrl mdev snd_indigodj isl6405 mii enc28j60 cmac adt7316_i2c(C) adt7316(C) fmc_trivial fmc nf_reject_ipv4 authenc rc_dtt200u rtc_ds1672 dvb_usb_dibusb_mc dvb_usb_dibusb_mc_common dib3000mc dibx000_common dvb_usb_dibusb_common dvb_usb dvb_core videobuf2_common videobuf2_vmalloc videobuf2_memops regulator_haptic adf7242 mac802154 ieee802154 s5h1409 da9034_ts snd_intel8x0m wmi cx24120 usbcore sdhci_cadence sdhci_pltfm sdhci mmc_core joydev i2c_algo_bit scsi_transport_iscsi iscsi_boot_sysfs ves1820 lockd grace nfs_acl auth_rpcgss sunrp c ip_vs snd_soc_adau7002 snd_cs4281 snd_rawmidi gameport snd_opl3_lib snd_seq_device snd_hwdep snd_ac97_codec ad7418 hid_primax hid snd_soc_cs4265 snd_soc_core snd_pcm_dmaengine snd_pcm snd_timer ac97_bus snd_compress snd soundcore ti_adc108s102 eeprom_93cx6 i2c_algo_pca mlxreg_hotplug st_pressure st_sensors industrialio_triggered_buffer kfifo_buf industrialio v4l2_common videodev media snd_soc_adau_utils rc_pinnacle_grey rc_core pps_gpio leds_lm3692x nandcore ledtrig_pattern iptable_security iptable_raw iptable_mangle iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 iptable_filter bpfilter ip6_vti ip_vti ip_gre ipip sit tunnel4 ip_tunnel hsr veth netdevsim vxcan batman_adv cfg80211 rfkill chnl_net caif nlmon dummy team bonding vcan bridge stp llc ip6_gre gre ip6_tunnel tunnel6 tun mousedev ppdev tpm kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel aesni_intel ide_pci_generic aes_x86_64 piix crypto_simd input_leds psmouse cryp td glue_helper ide_core intel_agp serio_raw intel_gtt agpgart ata_generic i2c_piix4 pata_acpi parport_pc parport rtc_cmos floppy sch_fq_codel ip_tables x_tables sha1_ssse3 sha1_generic ipv6 [last unloaded: paride] Dumping ftrace buffer: (ftrace buffer empty) ---[ end trace 7a818cf5f210d79e ]--- If alloc_disk fails in pf_init_units, pf->disk will be NULL, however in pf_detect and pf_exit, it's not check this before free.It may result a NULL pointer dereference. Also when register_blkdev failed, blk_cleanup_queue() and blk_mq_free_tag_set() should be called to free resources. Reported-by: Hulk Robot Fixes: 6ce59025f118 ("paride/pf: cleanup queues when detection fails") Signed-off-by: YueHaibing Signed-off-by: Jens Axboe --- drivers/block/paride/pf.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index 103b617cdc31..35e6e271b219 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -762,6 +762,8 @@ static int pf_detect(void) printk("%s: No ATAPI disk detected\n", name); for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) { + if (!pf->disk) + continue; blk_cleanup_queue(pf->disk->queue); pf->disk->queue = NULL; blk_mq_free_tag_set(&pf->tag_set); @@ -1029,8 +1031,13 @@ static int __init pf_init(void) pf_busy = 0; if (register_blkdev(major, name)) { - for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) + for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) { + if (!pf->disk) + continue; + blk_cleanup_queue(pf->disk->queue); + blk_mq_free_tag_set(&pf->tag_set); put_disk(pf->disk); + } return -EBUSY; } @@ -1051,6 +1058,9 @@ static void __exit pf_exit(void) int unit; unregister_blkdev(major, name); for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) { + if (!pf->disk) + continue; + if (pf->present) del_gendisk(pf->disk); -- cgit v1.2.3 From 18bfb9c6a8a5fbfe1a732130bb8f65fcfc4e8aa2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 3 Apr 2019 09:22:35 +0300 Subject: aio: Fix an error code in __io_submit_one() This accidentally returns the wrong variable. The "req->ki_eventfd" pointer is NULL so this return success. Fixes: 7316b49c2a11 ("aio: move sanity checks and request allocation to io_submit_one()") Signed-off-by: Dan Carpenter Signed-off-by: Al Viro --- fs/aio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/aio.c b/fs/aio.c index a4cc2a1cccb7..7ccecaab487a 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1794,7 +1794,7 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb, */ eventfd = eventfd_ctx_fdget(iocb->aio_resfd); if (IS_ERR(eventfd)) - return PTR_ERR(req->ki_eventfd); + return PTR_ERR(eventfd); req->ki_eventfd = eventfd; } -- cgit v1.2.3 From 426b046b748d1f47e096e05bdcc6fb4172791307 Mon Sep 17 00:00:00 2001 From: Louis Taylor Date: Wed, 3 Apr 2019 12:36:20 -0600 Subject: vfio/pci: use correct format characters When compiling with -Wformat, clang emits the following warnings: drivers/vfio/pci/vfio_pci.c:1601:5: warning: format specifies type 'unsigned short' but the argument has type 'unsigned int' [-Wformat] vendor, device, subvendor, subdevice, ^~~~~~ drivers/vfio/pci/vfio_pci.c:1601:13: warning: format specifies type 'unsigned short' but the argument has type 'unsigned int' [-Wformat] vendor, device, subvendor, subdevice, ^~~~~~ drivers/vfio/pci/vfio_pci.c:1601:21: warning: format specifies type 'unsigned short' but the argument has type 'unsigned int' [-Wformat] vendor, device, subvendor, subdevice, ^~~~~~~~~ drivers/vfio/pci/vfio_pci.c:1601:32: warning: format specifies type 'unsigned short' but the argument has type 'unsigned int' [-Wformat] vendor, device, subvendor, subdevice, ^~~~~~~~~ drivers/vfio/pci/vfio_pci.c:1605:5: warning: format specifies type 'unsigned short' but the argument has type 'unsigned int' [-Wformat] vendor, device, subvendor, subdevice, ^~~~~~ drivers/vfio/pci/vfio_pci.c:1605:13: warning: format specifies type 'unsigned short' but the argument has type 'unsigned int' [-Wformat] vendor, device, subvendor, subdevice, ^~~~~~ drivers/vfio/pci/vfio_pci.c:1605:21: warning: format specifies type 'unsigned short' but the argument has type 'unsigned int' [-Wformat] vendor, device, subvendor, subdevice, ^~~~~~~~~ drivers/vfio/pci/vfio_pci.c:1605:32: warning: format specifies type 'unsigned short' but the argument has type 'unsigned int' [-Wformat] vendor, device, subvendor, subdevice, ^~~~~~~~~ The types of these arguments are unconditionally defined, so this patch updates the format character to the correct ones for unsigned ints. Link: https://github.com/ClangBuiltLinux/linux/issues/378 Signed-off-by: Louis Taylor Reviewed-by: Nick Desaulniers Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index a25659b5a5d1..3fa20e95a6bb 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -1661,11 +1661,11 @@ static void __init vfio_pci_fill_ids(void) rc = pci_add_dynid(&vfio_pci_driver, vendor, device, subvendor, subdevice, class, class_mask, 0); if (rc) - pr_warn("failed to add dynamic id [%04hx:%04hx[%04hx:%04hx]] class %#08x/%08x (%d)\n", + pr_warn("failed to add dynamic id [%04x:%04x[%04x:%04x]] class %#08x/%08x (%d)\n", vendor, device, subvendor, subdevice, class, class_mask, rc); else - pr_info("add [%04hx:%04hx[%04hx:%04hx]] class %#08x/%08x\n", + pr_info("add [%04x:%04x[%04x:%04x]] class %#08x/%08x\n", vendor, device, subvendor, subdevice, class, class_mask); } -- cgit v1.2.3 From e39dd513d5f2ae2041c593d42fd0d8b24e7e950b Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Wed, 3 Apr 2019 12:36:21 -0600 Subject: vfio/spapr_tce: Make symbol 'tce_iommu_driver_ops' static Fixes the following sparse warning: drivers/vfio/vfio_iommu_spapr_tce.c:1401:36: warning: symbol 'tce_iommu_driver_ops' was not declared. Should it be static? Fixes: 5ffd229c0273 ("powerpc/vfio: Implement IOMMU driver for VFIO") Signed-off-by: Wang Hai Reviewed-by: Alexey Kardashevskiy Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_spapr_tce.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 8dbb270998f4..6b64e45a5269 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -1398,7 +1398,7 @@ unlock_exit: mutex_unlock(&container->lock); } -const struct vfio_iommu_driver_ops tce_iommu_driver_ops = { +static const struct vfio_iommu_driver_ops tce_iommu_driver_ops = { .name = "iommu-vfio-powerpc", .owner = THIS_MODULE, .open = tce_iommu_open, -- cgit v1.2.3 From 492855939bdb59c6f947b0b5b44af9ad82b7e38c Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 3 Apr 2019 12:36:21 -0600 Subject: vfio/type1: Limit DMA mappings per container Memory backed DMA mappings are accounted against a user's locked memory limit, including multiple mappings of the same memory. This accounting bounds the number of such mappings that a user can create. However, DMA mappings that are not backed by memory, such as DMA mappings of device MMIO via mmaps, do not make use of page pinning and therefore do not count against the user's locked memory limit. These mappings still consume memory, but the memory is not well associated to the process for the purpose of oom killing a task. To add bounding on this use case, we introduce a limit to the total number of concurrent DMA mappings that a user is allowed to create. This limit is exposed as a tunable module option where the default value of 64K is expected to be well in excess of any reasonable use case (a large virtual machine configuration would typically only make use of tens of concurrent mappings). This fixes CVE-2019-3882. Reviewed-by: Eric Auger Tested-by: Eric Auger Reviewed-by: Peter Xu Reviewed-by: Cornelia Huck Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 73652e21efec..d0f731c9920a 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -58,12 +58,18 @@ module_param_named(disable_hugepages, MODULE_PARM_DESC(disable_hugepages, "Disable VFIO IOMMU support for IOMMU hugepages."); +static unsigned int dma_entry_limit __read_mostly = U16_MAX; +module_param_named(dma_entry_limit, dma_entry_limit, uint, 0644); +MODULE_PARM_DESC(dma_entry_limit, + "Maximum number of user DMA mappings per container (65535)."); + struct vfio_iommu { struct list_head domain_list; struct vfio_domain *external_domain; /* domain for external user */ struct mutex lock; struct rb_root dma_list; struct blocking_notifier_head notifier; + unsigned int dma_avail; bool v2; bool nesting; }; @@ -836,6 +842,7 @@ static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma) vfio_unlink_dma(iommu, dma); put_task_struct(dma->task); kfree(dma); + iommu->dma_avail++; } static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu) @@ -1081,12 +1088,18 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, goto out_unlock; } + if (!iommu->dma_avail) { + ret = -ENOSPC; + goto out_unlock; + } + dma = kzalloc(sizeof(*dma), GFP_KERNEL); if (!dma) { ret = -ENOMEM; goto out_unlock; } + iommu->dma_avail--; dma->iova = iova; dma->vaddr = vaddr; dma->prot = prot; @@ -1583,6 +1596,7 @@ static void *vfio_iommu_type1_open(unsigned long arg) INIT_LIST_HEAD(&iommu->domain_list); iommu->dma_list = RB_ROOT; + iommu->dma_avail = dma_entry_limit; mutex_init(&iommu->lock); BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier); -- cgit v1.2.3 From bf2a7ca39fd3ab47ef71c621a7ee69d1813b1f97 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Wed, 3 Apr 2019 15:14:44 -0700 Subject: Input: snvs_pwrkey - initialize necessary driver data before enabling IRQ SNVS IRQ is requested before necessary driver data initialized, if there is a pending IRQ during driver probe phase, kernel NULL pointer panic will occur in IRQ handler. To avoid such scenario, just initialize necessary driver data before enabling IRQ. This patch is inspired by NXP's internal kernel tree. Fixes: d3dc6e232215 ("input: keyboard: imx: add snvs power key driver") Signed-off-by: Anson Huang Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/snvs_pwrkey.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/input/keyboard/snvs_pwrkey.c b/drivers/input/keyboard/snvs_pwrkey.c index effb63205d3d..4c67cf30a5d9 100644 --- a/drivers/input/keyboard/snvs_pwrkey.c +++ b/drivers/input/keyboard/snvs_pwrkey.c @@ -148,6 +148,9 @@ static int imx_snvs_pwrkey_probe(struct platform_device *pdev) return error; } + pdata->input = input; + platform_set_drvdata(pdev, pdata); + error = devm_request_irq(&pdev->dev, pdata->irq, imx_snvs_pwrkey_interrupt, 0, pdev->name, pdev); @@ -163,9 +166,6 @@ static int imx_snvs_pwrkey_probe(struct platform_device *pdev) return error; } - pdata->input = input; - platform_set_drvdata(pdev, pdata); - device_init_wakeup(&pdev->dev, pdata->wakeup); return 0; -- cgit v1.2.3 From 7f1a93b1f1d1d2603a49a9e4226259db9272f305 Mon Sep 17 00:00:00 2001 From: Xiong Zhang Date: Mon, 25 Mar 2019 16:29:19 +0800 Subject: drm/i915/gvt: Correct the calculation of plane size stride isn't in unit of pixel, it is bytes, so calculation of plane size doesn't need to multiple bpp. Fixes: e546e281d33d ("drm/i915/gvt: Dmabuf support for GVT-g") Signed-off-by: Xiong Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/dmabuf.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c index 3e7e2b80c857..5d887f7cc0d5 100644 --- a/drivers/gpu/drm/i915/gvt/dmabuf.c +++ b/drivers/gpu/drm/i915/gvt/dmabuf.c @@ -238,9 +238,6 @@ static int vgpu_get_plane_info(struct drm_device *dev, default: gvt_vgpu_err("invalid tiling mode: %x\n", p.tiled); } - - info->size = (((p.stride * p.height * p.bpp) / 8) + - (PAGE_SIZE - 1)) >> PAGE_SHIFT; } else if (plane_id == DRM_PLANE_TYPE_CURSOR) { ret = intel_vgpu_decode_cursor_plane(vgpu, &c); if (ret) @@ -262,14 +259,13 @@ static int vgpu_get_plane_info(struct drm_device *dev, info->x_hot = UINT_MAX; info->y_hot = UINT_MAX; } - - info->size = (((info->stride * c.height * c.bpp) / 8) - + (PAGE_SIZE - 1)) >> PAGE_SHIFT; } else { gvt_vgpu_err("invalid plane id:%d\n", plane_id); return -EINVAL; } + info->size = (info->stride * info->height + PAGE_SIZE - 1) + >> PAGE_SHIFT; if (info->size == 0) { gvt_vgpu_err("fb size is zero\n"); return -EINVAL; -- cgit v1.2.3 From cf9ed66671ec5f6cacc7b6efbad9d7c9e5e31776 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 5 Feb 2019 20:30:33 +0000 Subject: drm/i915/gvt: Fix kerneldoc typo for intel_vgpu_emulate_hotplug drivers/gpu/drm/i915/gvt/display.c:457: warning: Function parameter or member 'connected' not described in 'intel_vgpu_emulate_hotplug' drivers/gpu/drm/i915/gvt/display.c:457: warning: Excess function parameter 'conncted' description in 'intel_vgpu_emulate_hotplug' Fixes: 1ca20f33df42 ("drm/i915/gvt: add hotplug emulation") Signed-off-by: Chris Wilson Cc: Hang Yuan Cc: Zhenyu Wang Cc: Zhi Wang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index 035479e273be..e3f9caa7839f 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -448,7 +448,7 @@ void intel_gvt_emulate_vblank(struct intel_gvt *gvt) /** * intel_vgpu_emulate_hotplug - trigger hotplug event for vGPU * @vgpu: a vGPU - * @conncted: link state + * @connected: link state * * This function is used to trigger hotplug interrupt for vGPU * -- cgit v1.2.3 From 27fad74a5a77fe2e1f876db7bf27efcf2ec304b2 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 4 Apr 2019 10:31:14 +0800 Subject: iov_iter: Fix build error without CONFIG_CRYPTO If CONFIG_CRYPTO is not set or set to m, gcc building warn this: lib/iov_iter.o: In function `hash_and_copy_to_iter': iov_iter.c:(.text+0x9129): undefined reference to `crypto_stats_get' iov_iter.c:(.text+0x9152): undefined reference to `crypto_stats_ahash_update' Reported-by: Hulk Robot Fixes: d05f443554b3 ("iov_iter: introduce hash_and_copy_to_iter helper") Suggested-by: Al Viro Signed-off-by: YueHaibing Signed-off-by: Al Viro --- lib/iov_iter.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index ea36dc355da1..b396d328a764 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1528,6 +1528,7 @@ EXPORT_SYMBOL(csum_and_copy_to_iter); size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, struct iov_iter *i) { +#ifdef CONFIG_CRYPTO struct ahash_request *hash = hashp; struct scatterlist sg; size_t copied; @@ -1537,6 +1538,9 @@ size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, ahash_request_set_crypt(hash, &sg, NULL, copied); crypto_ahash_update(hash); return copied; +#else + return 0; +#endif } EXPORT_SYMBOL(hash_and_copy_to_iter); -- cgit v1.2.3 From 1cb1d2c64e812928fe0a40b8f7e74523d0283dbe Mon Sep 17 00:00:00 2001 From: Xose Vazquez Perez Date: Sat, 30 Mar 2019 15:43:31 +0100 Subject: scsi: core: add new RDAC LENOVO/DE_Series device Blacklist "Universal Xport" LUN. It's used for in-band storage array management. Also add model to the rdac dh family. Cc: Martin Wilck Cc: Hannes Reinecke Cc: NetApp RDAC team Cc: Christophe Varoqui Cc: James E.J. Bottomley Cc: Martin K. Petersen Cc: SCSI ML Cc: DM ML Signed-off-by: Xose Vazquez Perez Reviewed-by: Martin Wilck Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_devinfo.c | 1 + drivers/scsi/scsi_dh.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index c4cbfd07b916..a08ff3bd6310 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -238,6 +238,7 @@ static struct { {"NETAPP", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, {"LSI", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, {"ENGENIO", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, + {"LENOVO", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, {"SMSC", "USB 2 HS-CF", NULL, BLIST_SPARSELUN | BLIST_INQUIRY_36}, {"SONY", "CD-ROM CDU-8001", NULL, BLIST_BORKEN}, {"SONY", "TSL", NULL, BLIST_FORCELUN}, /* DDS3 & DDS4 autoloaders */ diff --git a/drivers/scsi/scsi_dh.c b/drivers/scsi/scsi_dh.c index 5a58cbf3a75d..c14006ac98f9 100644 --- a/drivers/scsi/scsi_dh.c +++ b/drivers/scsi/scsi_dh.c @@ -75,6 +75,7 @@ static const struct scsi_dh_blist scsi_dh_blist[] = { {"NETAPP", "INF-01-00", "rdac", }, {"LSI", "INF-01-00", "rdac", }, {"ENGENIO", "INF-01-00", "rdac", }, + {"LENOVO", "DE_Series", "rdac", }, {NULL, NULL, NULL }, }; -- cgit v1.2.3 From 382e06d11e075a40b4094b6ef809f8d4bcc7ab2a Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 1 Apr 2019 16:10:52 +0000 Subject: scsi: storvsc: Fix calculation of sub-channel count When the number of sub-channels offered by Hyper-V is >= the number of CPUs in the VM, calculate the correct number of sub-channels. The current code produces one too many. This scenario arises only when the number of CPUs is artificially restricted (for example, with maxcpus= on the kernel boot line), because Hyper-V normally offers a sub-channel count < number of CPUs. While the current code doesn't break, the extra sub-channel is unbalanced across the CPUs (for example, a total of 5 channels on a VM with 4 CPUs). Signed-off-by: Michael Kelley Reviewed-by: Vitaly Kuznetsov Reviewed-by: Long Li Signed-off-by: Martin K. Petersen --- drivers/scsi/storvsc_drv.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 84380bae20f1..e186743033f4 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -668,13 +668,22 @@ static void handle_multichannel_storage(struct hv_device *device, int max_chns) { struct device *dev = &device->device; struct storvsc_device *stor_device; - int num_cpus = num_online_cpus(); int num_sc; struct storvsc_cmd_request *request; struct vstor_packet *vstor_packet; int ret, t; - num_sc = ((max_chns > num_cpus) ? num_cpus : max_chns); + /* + * If the number of CPUs is artificially restricted, such as + * with maxcpus=1 on the kernel boot line, Hyper-V could offer + * sub-channels >= the number of CPUs. These sub-channels + * should not be created. The primary channel is already created + * and assigned to one CPU, so check against # CPUs - 1. + */ + num_sc = min((int)(num_online_cpus() - 1), max_chns); + if (!num_sc) + return; + stor_device = get_out_stor_device(device); if (!stor_device) return; -- cgit v1.2.3 From ed2e63aaec4fbf248275d2d83c4cfd65e069594f Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 1 Apr 2019 21:42:06 +0000 Subject: scsi: storvsc: Reduce default ring buffer size to 128 Kbytes Reduce the default VMbus channel ring buffer size for storvsc SCSI devices from 1 Mbyte to 128 Kbytes. Measurements show that ring buffer sizes above 128 Kbytes do not increase performance even at very high IOPS rates, so don't waste the memory. Also remove the dependence on PAGE_SIZE, since the ring buffer size should not change on architectures where PAGE_SIZE is not 4 Kbytes. Signed-off-by: Michael Kelley Reviewed-by: Haiyang Zhang Signed-off-by: Martin K. Petersen --- drivers/scsi/storvsc_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index e186743033f4..8472de1007ff 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -385,7 +385,7 @@ enum storvsc_request_type { * This is the end of Protocol specific defines. */ -static int storvsc_ringbuffer_size = (256 * PAGE_SIZE); +static int storvsc_ringbuffer_size = (128 * 1024); static u32 max_outstanding_req_per_channel; static int storvsc_vcpus_per_sub_channel = 4; -- cgit v1.2.3 From 4eb01535886644d79a58fe652e0782a194bc3402 Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 3 Apr 2019 11:10:34 -0700 Subject: scsi: lpfc: Fix missing wakeups on abort threads Abort thread wakeups, on some wqe types, are not happening. The thread wakeup logic is dependent upon the LPFC_DRIVER_ABORTED flag. However, on these wqes, the completion handler running prior to the io completion routine ends up clearing the flag. Rework the wakeup logic to look at a non-null waitq element which must be set if the abort thread is waiting. This is reverting the change in the indicated patch. Fixes: c2017260eea2d ("scsi: lpfc: Rework locking on SCSI io completion") Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_scsi.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index c98f264f1d83..a497b2c0cb79 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -3878,10 +3878,9 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn, * wake up the thread. */ spin_lock(&lpfc_cmd->buf_lock); - if (unlikely(lpfc_cmd->cur_iocbq.iocb_flag & LPFC_DRIVER_ABORTED)) { - lpfc_cmd->cur_iocbq.iocb_flag &= ~LPFC_DRIVER_ABORTED; - if (lpfc_cmd->waitq) - wake_up(lpfc_cmd->waitq); + lpfc_cmd->cur_iocbq.iocb_flag &= ~LPFC_DRIVER_ABORTED; + if (lpfc_cmd->waitq) { + wake_up(lpfc_cmd->waitq); lpfc_cmd->waitq = NULL; } spin_unlock(&lpfc_cmd->buf_lock); -- cgit v1.2.3 From 0ab03f353d3613ea49d1f924faf98559003670a8 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 2 Apr 2019 08:16:03 +0200 Subject: net-gro: Fix GRO flush when receiving a GSO packet. Currently we may merge incorrectly a received GSO packet or a packet with frag_list into a packet sitting in the gro_hash list. skb_segment() may crash case because the assumptions on the skb layout are not met. The correct behaviour would be to flush the packet in the gro_hash list and send the received GSO packet directly afterwards. Commit d61d072e87c8e ("net-gro: avoid reorders") sets NAPI_GRO_CB(skb)->flush in this case, but this is not checked before merging. This patch makes sure to check this flag and to not merge in that case. Fixes: d61d072e87c8e ("net-gro: avoid reorders") Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2415d9cb9b89..ef2cd5712098 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3801,7 +3801,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) unsigned int delta_truesize; struct sk_buff *lp; - if (unlikely(p->len + len >= 65536)) + if (unlikely(p->len + len >= 65536 || NAPI_GRO_CB(skb)->flush)) return -E2BIG; lp = NAPI_GRO_CB(p)->last; -- cgit v1.2.3 From ef0efcd3bd3fd0589732b67fb586ffd3c8705806 Mon Sep 17 00:00:00 2001 From: Junwei Hu Date: Tue, 2 Apr 2019 19:38:04 +0800 Subject: ipv6: Fix dangling pointer when ipv6 fragment At the beginning of ip6_fragment func, the prevhdr pointer is obtained in the ip6_find_1stfragopt func. However, all the pointers pointing into skb header may change when calling skb_checksum_help func with skb->ip_summed = CHECKSUM_PARTIAL condition. The prevhdr pointe will be dangling if it is not reloaded after calling __skb_linearize func in skb_checksum_help func. Here, I add a variable, nexthdr_offset, to evaluate the offset, which does not changes even after calling __skb_linearize func. Fixes: 405c92f7a541 ("ipv6: add defensive check for CHECKSUM_PARTIAL skbs in ip_fragment") Signed-off-by: Junwei Hu Reported-by: Wenhao Zhang Reported-by: syzbot+e8ce541d095e486074fc@syzkaller.appspotmail.com Reviewed-by: Zhiqiang Liu Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index edbd12067170..e51f3c648b09 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -601,7 +601,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, inet6_sk(skb->sk) : NULL; struct ipv6hdr *tmp_hdr; struct frag_hdr *fh; - unsigned int mtu, hlen, left, len; + unsigned int mtu, hlen, left, len, nexthdr_offset; int hroom, troom; __be32 frag_id; int ptr, offset = 0, err = 0; @@ -612,6 +612,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, goto fail; hlen = err; nexthdr = *prevhdr; + nexthdr_offset = prevhdr - skb_network_header(skb); mtu = ip6_skb_dst_mtu(skb); @@ -646,6 +647,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, (err = skb_checksum_help(skb))) goto fail; + prevhdr = skb_network_header(skb) + nexthdr_offset; hroom = LL_RESERVED_SPACE(rt->dst.dev); if (skb_has_frag_list(skb)) { unsigned int first_len = skb_pagelen(skb); -- cgit v1.2.3 From 7297ba6c74c5b9e78d8e936af82eecfcf7d32dfb Mon Sep 17 00:00:00 2001 From: Annaliese McDermond Date: Wed, 3 Apr 2019 21:17:15 -0700 Subject: ASoC: tlv320aic32x4: Change author's name The author of these files has changed her name. Update instances in the code of her dead name to current legal name. Signed-off-by: Annaliese McDermond Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320aic32x4-i2c.c | 4 ++-- sound/soc/codecs/tlv320aic32x4-spi.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/tlv320aic32x4-i2c.c b/sound/soc/codecs/tlv320aic32x4-i2c.c index 385fa2e9525a..22c3a6bc0b6c 100644 --- a/sound/soc/codecs/tlv320aic32x4-i2c.c +++ b/sound/soc/codecs/tlv320aic32x4-i2c.c @@ -3,7 +3,7 @@ * * Copyright 2011 NW Digital Radio * - * Author: Jeremy McDermond + * Author: Annaliese McDermond * * Based on sound/soc/codecs/wm8974 and TI driver for kernel 2.6.27. * @@ -72,5 +72,5 @@ static struct i2c_driver aic32x4_i2c_driver = { module_i2c_driver(aic32x4_i2c_driver); MODULE_DESCRIPTION("ASoC TLV320AIC32x4 codec driver I2C"); -MODULE_AUTHOR("Jeremy McDermond "); +MODULE_AUTHOR("Annaliese McDermond "); MODULE_LICENSE("GPL"); diff --git a/sound/soc/codecs/tlv320aic32x4-spi.c b/sound/soc/codecs/tlv320aic32x4-spi.c index 07d78ae51e05..aa5b7ba0254b 100644 --- a/sound/soc/codecs/tlv320aic32x4-spi.c +++ b/sound/soc/codecs/tlv320aic32x4-spi.c @@ -3,7 +3,7 @@ * * Copyright 2011 NW Digital Radio * - * Author: Jeremy McDermond + * Author: Annaliese McDermond * * Based on sound/soc/codecs/wm8974 and TI driver for kernel 2.6.27. * @@ -74,5 +74,5 @@ static struct spi_driver aic32x4_spi_driver = { module_spi_driver(aic32x4_spi_driver); MODULE_DESCRIPTION("ASoC TLV320AIC32x4 codec driver SPI"); -MODULE_AUTHOR("Jeremy McDermond "); +MODULE_AUTHOR("Annaliese McDermond "); MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 2201f31f2c6d6030cbd2f7085455e2172725b1c5 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Wed, 3 Apr 2019 20:19:25 -0700 Subject: xtensa: use actual syscall number in do_syscall_trace_leave Syscall may alter pt_regs structure passed to it, resulting in a mismatch between syscall entry end syscall exit entries in the ftrace. Temporary restore syscall field of the pt_regs for the duration of do_syscall_trace_leave. Signed-off-by: Max Filippov --- arch/xtensa/kernel/entry.S | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index e50f5124dc6f..e54af8b7e0f8 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -1860,6 +1860,8 @@ ENTRY(system_call) l32i a7, a2, PT_SYSCALL 1: + s32i a7, a1, 4 + /* syscall = sys_call_table[syscall_nr] */ movi a4, sys_call_table @@ -1893,8 +1895,12 @@ ENTRY(system_call) retw 1: + l32i a4, a1, 4 + l32i a3, a2, PT_SYSCALL + s32i a4, a2, PT_SYSCALL mov a6, a2 call4 do_syscall_trace_leave + s32i a3, a2, PT_SYSCALL retw ENDPROC(system_call) -- cgit v1.2.3 From 2663147dc7465cb29040a05cc4286fdd839978b5 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Wed, 3 Apr 2019 20:22:42 -0700 Subject: xtensa: fix initialization of pt_regs::syscall in start_thread New pt_regs should indicate that there's no syscall, not that there's syscall #0. While at it wrap macro body in do/while and parenthesize macro arguments. Signed-off-by: Max Filippov --- arch/xtensa/include/asm/processor.h | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index f7dd895b2353..0c14018d1c26 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -187,15 +187,18 @@ struct thread_struct { /* Clearing a0 terminates the backtrace. */ #define start_thread(regs, new_pc, new_sp) \ - memset(regs, 0, sizeof(*regs)); \ - regs->pc = new_pc; \ - regs->ps = USER_PS_VALUE; \ - regs->areg[1] = new_sp; \ - regs->areg[0] = 0; \ - regs->wmask = 1; \ - regs->depc = 0; \ - regs->windowbase = 0; \ - regs->windowstart = 1; + do { \ + memset((regs), 0, sizeof(*(regs))); \ + (regs)->pc = (new_pc); \ + (regs)->ps = USER_PS_VALUE; \ + (regs)->areg[1] = (new_sp); \ + (regs)->areg[0] = 0; \ + (regs)->wmask = 1; \ + (regs)->depc = 0; \ + (regs)->windowbase = 0; \ + (regs)->windowstart = 1; \ + (regs)->syscall = NO_SYSCALL; \ + } while (0) /* Forward declaration */ struct task_struct; -- cgit v1.2.3 From c2c616021d64d952dc9d37793924ce57833d7754 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 4 Apr 2019 09:52:15 +0900 Subject: ASoC: audio-graph-card: don't select DPCM via audio-graph-card commit ae3cb5790906b ("ASoC: audio-graph-card: merge audio-graph-scu-card") merged audio-graph-scu-card which can handle DPCM into audio-graph-card. By this patch, the judgement to select "normal sound card" or "DPCM sound card" is based on its OF-graph endpoint connection. But, because of it, existing "audio-graph-card" user who is assuming "normal sound card" might select DPCM unintentionally. To solve this issue, this patch allows "audio-graph-card" user can select "normal sound card", and "audio-graph-scu-card" user can select both "normal sound card" and "DPCM sound card". This keeps compatibility collectry. Fixes: ae3cb5790906b ("ASoC: audio-graph-card: merge audio-graph-scu-card") Reported-by: Arnaud Pouliquen Signed-off-by: Kuninori Morimoto Acked-by: Arnaud Pouliquen Signed-off-by: Mark Brown --- sound/soc/generic/audio-graph-card.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/sound/soc/generic/audio-graph-card.c b/sound/soc/generic/audio-graph-card.c index bb12351330e8..69bc4848d787 100644 --- a/sound/soc/generic/audio-graph-card.c +++ b/sound/soc/generic/audio-graph-card.c @@ -20,6 +20,8 @@ #include #include +#define DPCM_SELECTABLE 1 + struct graph_priv { struct snd_soc_card snd_card; struct graph_dai_props { @@ -440,6 +442,7 @@ static int graph_for_each_link(struct graph_priv *priv, struct device_node *codec_port; struct device_node *codec_port_old = NULL; struct asoc_simple_card_data adata; + uintptr_t dpcm_selectable = (uintptr_t)of_device_get_match_data(dev); int rc, ret; /* loop for all listed CPU port */ @@ -470,8 +473,9 @@ static int graph_for_each_link(struct graph_priv *priv, * if Codec port has many endpoints, * or has convert-xxx property */ - if ((of_get_child_count(codec_port) > 1) || - adata.convert_rate || adata.convert_channels) + if (dpcm_selectable && + ((of_get_child_count(codec_port) > 1) || + adata.convert_rate || adata.convert_channels)) ret = func_dpcm(priv, cpu_ep, codec_ep, li, (codec_port_old == codec_port)); /* else normal sound */ @@ -732,7 +736,8 @@ static int graph_remove(struct platform_device *pdev) static const struct of_device_id graph_of_match[] = { { .compatible = "audio-graph-card", }, - { .compatible = "audio-graph-scu-card", }, + { .compatible = "audio-graph-scu-card", + .data = (void *)DPCM_SELECTABLE }, {}, }; MODULE_DEVICE_TABLE(of, graph_of_match); -- cgit v1.2.3 From 42bf029a55a9bb8036f1d738a28dba2f7ec1e79d Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 4 Apr 2019 09:52:52 +0900 Subject: ASoC: simple-card: don't select DPCM via simple-audio-card commit da215354eb55c ("ASoC: simple-card: merge simple-scu-card") merged simple-scu-audio-card which can handle DPCM into simple-audio-card. By this patch, the judgement to select "normal sound card" or "DPCM sound card" is based on its CPU/Codec DAI count. But, because of it, existing "simple-audio-card" user who is assuming "normal sound card" might select DPCM unintentionally. To solve this issue, this patch allows "simple-audio-card" user can select "normal sound card", and "simple-scu-audio-card" user can select both "normal sound card" and "DPCM sound card". This keeps compatibility collectry. Fixes: da215354eb55c ("ASoC: simple-card: merge simple-scu-card") Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/generic/simple-card.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c index 7147bba45a2a..34de32efc4c4 100644 --- a/sound/soc/generic/simple-card.c +++ b/sound/soc/generic/simple-card.c @@ -9,12 +9,15 @@ #include #include #include +#include #include #include #include #include #include +#define DPCM_SELECTABLE 1 + struct simple_priv { struct snd_soc_card snd_card; struct simple_dai_props { @@ -441,6 +444,7 @@ static int simple_for_each_link(struct simple_priv *priv, struct device *dev = simple_priv_to_dev(priv); struct device_node *top = dev->of_node; struct device_node *node; + uintptr_t dpcm_selectable = (uintptr_t)of_device_get_match_data(dev); bool is_top = 0; int ret = 0; @@ -480,8 +484,9 @@ static int simple_for_each_link(struct simple_priv *priv, * if it has many CPUs, * or has convert-xxx property */ - if (num > 2 || - adata.convert_rate || adata.convert_channels) + if (dpcm_selectable && + (num > 2 || + adata.convert_rate || adata.convert_channels)) ret = func_dpcm(priv, np, codec, li, is_top); /* else normal sound */ else @@ -822,7 +827,8 @@ static int simple_remove(struct platform_device *pdev) static const struct of_device_id simple_of_match[] = { { .compatible = "simple-audio-card", }, - { .compatible = "simple-scu-audio-card", }, + { .compatible = "simple-scu-audio-card", + .data = (void *)DPCM_SELECTABLE }, {}, }; MODULE_DEVICE_TABLE(of, simple_of_match); -- cgit v1.2.3 From c85064435fe7a216ec0f0238ef2b8f7cd850a450 Mon Sep 17 00:00:00 2001 From: Sugar Zhang Date: Wed, 3 Apr 2019 21:40:45 +0800 Subject: ASoC: rockchip: pdm: fix regmap_ops hang issue This is because set_fmt ops maybe called when PD is off, and in such case, regmap_ops will lead system hang. enale PD before doing regmap_ops. Signed-off-by: Sugar Zhang Signed-off-by: Mark Brown --- sound/soc/rockchip/rockchip_pdm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/rockchip/rockchip_pdm.c b/sound/soc/rockchip/rockchip_pdm.c index 400e29edb1c9..8a2e3bbce3a1 100644 --- a/sound/soc/rockchip/rockchip_pdm.c +++ b/sound/soc/rockchip/rockchip_pdm.c @@ -208,7 +208,9 @@ static int rockchip_pdm_set_fmt(struct snd_soc_dai *cpu_dai, return -EINVAL; } + pm_runtime_get_sync(cpu_dai->dev); regmap_update_bits(pdm->regmap, PDM_CLK_CTRL, mask, val); + pm_runtime_put(cpu_dai->dev); return 0; } -- cgit v1.2.3 From 86a7b6ffd90095d81d9fa0d8b48955b7c83b2e2f Mon Sep 17 00:00:00 2001 From: Sugar Zhang Date: Thu, 4 Apr 2019 11:48:11 +0800 Subject: ASoC: rockchip: pdm: change dma burst to 8 This patch decreases the transfer bursts to avoid the fifo overrun. Signed-off-by: Sugar Zhang Signed-off-by: Mark Brown --- sound/soc/rockchip/rockchip_pdm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/rockchip/rockchip_pdm.c b/sound/soc/rockchip/rockchip_pdm.c index 8a2e3bbce3a1..d0b403a0e27b 100644 --- a/sound/soc/rockchip/rockchip_pdm.c +++ b/sound/soc/rockchip/rockchip_pdm.c @@ -24,7 +24,7 @@ #include "rockchip_pdm.h" -#define PDM_DMA_BURST_SIZE (16) /* size * width: 16*4 = 64 bytes */ +#define PDM_DMA_BURST_SIZE (8) /* size * width: 8*4 = 32 bytes */ struct rk_pdm_dev { struct device *dev; -- cgit v1.2.3 From a3f98bb22cbfaaf67717e156f79e2bfeb42d4cac Mon Sep 17 00:00:00 2001 From: Sean Paul Date: Wed, 3 Apr 2019 16:56:45 -0400 Subject: Documentation/gpu/meson: Remove link to meson_canvas.c The file was removed in the below patch and is causing this error: WARNING: kernel-doc '../scripts/kernel-doc -rst -enable-lineno -function Canvas ../drivers/gpu/drm/meson/meson_canvas.c' failed with return code Fixes: 2bf6b5b0e374 ("drm/meson: exclusively use the canvas provider module") Cc: Maxime Jourdan Cc: Neil Armstrong Cc: Kevin Hilman Cc: dri-devel@lists.freedesktop.org Cc: linux-amlogic@lists.infradead.org Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Sean Paul Acked-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20190403205652.183496-1-sean@poorly.run --- Documentation/gpu/meson.rst | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Documentation/gpu/meson.rst b/Documentation/gpu/meson.rst index 479f6f51a13b..b9e2f9aa3bd8 100644 --- a/Documentation/gpu/meson.rst +++ b/Documentation/gpu/meson.rst @@ -42,12 +42,6 @@ Video Encoder .. kernel-doc:: drivers/gpu/drm/meson/meson_venc.c :doc: Video Encoder -Video Canvas Management -======================= - -.. kernel-doc:: drivers/gpu/drm/meson/meson_canvas.c - :doc: Canvas - Video Clocks ============ -- cgit v1.2.3 From ba5e60c9b75dec92d4c695b928f69300b17d7686 Mon Sep 17 00:00:00 2001 From: Peng Hao Date: Tue, 2 Apr 2019 22:12:38 +0800 Subject: arm/mach-at91/pm : fix possible object reference leak of_find_device_by_node() takes a reference to the struct device when it finds a match via get_device. When returning error we should call put_device. Reviewed-by: Mukesh Ojha Signed-off-by: Peng Hao Signed-off-by: Ludovic Desroches --- arch/arm/mach-at91/pm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c index 51e808adb00c..2a757dcaa1a5 100644 --- a/arch/arm/mach-at91/pm.c +++ b/arch/arm/mach-at91/pm.c @@ -591,13 +591,13 @@ static int __init at91_pm_backup_init(void) np = of_find_compatible_node(NULL, NULL, "atmel,sama5d2-securam"); if (!np) - goto securam_fail; + goto securam_fail_no_ref_dev; pdev = of_find_device_by_node(np); of_node_put(np); if (!pdev) { pr_warn("%s: failed to find securam device!\n", __func__); - goto securam_fail; + goto securam_fail_no_ref_dev; } sram_pool = gen_pool_get(&pdev->dev, NULL); @@ -620,6 +620,8 @@ static int __init at91_pm_backup_init(void) return 0; securam_fail: + put_device(&pdev->dev); +securam_fail_no_ref_dev: iounmap(pm_data.sfrbu); pm_data.sfrbu = NULL; return ret; -- cgit v1.2.3 From 6b0868c820ff7370d15d6ddfe71b1ce6bbe8a25d Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 4 Apr 2019 11:54:09 +0100 Subject: mm/compaction.c: correct zone boundary handling when resetting pageblock skip hints Mikhail Gavrilo reported the following bug being triggered in a Fedora kernel based on 5.1-rc1 but it is relevant to a vanilla kernel. kernel: page dumped because: VM_BUG_ON_PAGE(PagePoisoned(p)) kernel: ------------[ cut here ]------------ kernel: kernel BUG at include/linux/mm.h:1021! kernel: invalid opcode: 0000 [#1] SMP NOPTI kernel: CPU: 6 PID: 116 Comm: kswapd0 Tainted: G C 5.1.0-0.rc1.git1.3.fc31.x86_64 #1 kernel: Hardware name: System manufacturer System Product Name/ROG STRIX X470-I GAMING, BIOS 1201 12/07/2018 kernel: RIP: 0010:__reset_isolation_pfn+0x244/0x2b0 kernel: Code: fe 06 e8 0f 8e fc ff 44 0f b6 4c 24 04 48 85 c0 0f 85 dc fe ff ff e9 68 fe ff ff 48 c7 c6 58 b7 2e 8c 4c 89 ff e8 0c 75 00 00 <0f> 0b 48 c7 c6 58 b7 2e 8c e8 fe 74 00 00 0f 0b 48 89 fa 41 b8 01 kernel: RSP: 0018:ffff9e2d03f0fde8 EFLAGS: 00010246 kernel: RAX: 0000000000000034 RBX: 000000000081f380 RCX: ffff8cffbddd6c20 kernel: RDX: 0000000000000000 RSI: 0000000000000006 RDI: ffff8cffbddd6c20 kernel: RBP: 0000000000000001 R08: 0000009898b94613 R09: 0000000000000000 kernel: R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000100000 kernel: R13: 0000000000100000 R14: 0000000000000001 R15: ffffca7de07ce000 kernel: FS: 0000000000000000(0000) GS:ffff8cffbdc00000(0000) knlGS:0000000000000000 kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 kernel: CR2: 00007fc1670e9000 CR3: 00000007f5276000 CR4: 00000000003406e0 kernel: Call Trace: kernel: __reset_isolation_suitable+0x62/0x120 kernel: reset_isolation_suitable+0x3b/0x40 kernel: kswapd+0x147/0x540 kernel: ? finish_wait+0x90/0x90 kernel: kthread+0x108/0x140 kernel: ? balance_pgdat+0x560/0x560 kernel: ? kthread_park+0x90/0x90 kernel: ret_from_fork+0x27/0x50 He bisected it down to e332f741a8dd ("mm, compaction: be selective about what pageblocks to clear skip hints"). The problem is that the patch in question was sloppy with respect to the handling of zone boundaries. In some instances, it was possible for PFNs outside of a zone to be examined and if those were not properly initialised or poisoned then it would trigger the VM_BUG_ON. This patch corrects the zone boundary issues when resetting pageblock skip hints and Mikhail reported that the bug did not trigger after 30 hours of testing. Link: http://lkml.kernel.org/r/20190327085424.GL3189@techsingularity.net Fixes: e332f741a8dd ("mm, compaction: be selective about what pageblocks to clear skip hints") Reported-by: Mikhail Gavrilov Tested-by: Mikhail Gavrilov Cc: Daniel Jordan Cc: Qian Cai Cc: Vlastimil Babka Signed-off-by: Mel Gorman --- mm/compaction.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/mm/compaction.c b/mm/compaction.c index f171a83707ce..b4930bf93c8a 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -242,6 +242,7 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source, bool check_target) { struct page *page = pfn_to_online_page(pfn); + struct page *block_page; struct page *end_page; unsigned long block_pfn; @@ -267,20 +268,26 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source, get_pageblock_migratetype(page) != MIGRATE_MOVABLE) return false; + /* Ensure the start of the pageblock or zone is online and valid */ + block_pfn = pageblock_start_pfn(pfn); + block_page = pfn_to_online_page(max(block_pfn, zone->zone_start_pfn)); + if (block_page) { + page = block_page; + pfn = block_pfn; + } + + /* Ensure the end of the pageblock or zone is online and valid */ + block_pfn += pageblock_nr_pages; + block_pfn = min(block_pfn, zone_end_pfn(zone) - 1); + end_page = pfn_to_online_page(block_pfn); + if (!end_page) + return false; + /* * Only clear the hint if a sample indicates there is either a * free page or an LRU page in the block. One or other condition * is necessary for the block to be a migration source/target. */ - block_pfn = pageblock_start_pfn(pfn); - pfn = max(block_pfn, zone->zone_start_pfn); - page = pfn_to_page(pfn); - if (zone != page_zone(page)) - return false; - pfn = block_pfn + pageblock_nr_pages; - pfn = min(pfn, zone_end_pfn(zone)); - end_page = pfn_to_page(pfn); - do { if (pfn_valid_within(pfn)) { if (check_source && PageLRU(page)) { @@ -309,7 +316,7 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source, static void __reset_isolation_suitable(struct zone *zone) { unsigned long migrate_pfn = zone->zone_start_pfn; - unsigned long free_pfn = zone_end_pfn(zone); + unsigned long free_pfn = zone_end_pfn(zone) - 1; unsigned long reset_migrate = free_pfn; unsigned long reset_free = migrate_pfn; bool source_set = false; -- cgit v1.2.3 From 5b56d996dd50a9d2ca87c25ebb50c07b255b7e04 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Thu, 4 Apr 2019 11:54:41 +0100 Subject: mm/compaction.c: abort search if isolation fails Running LTP oom01 in a tight loop or memory stress testing put the system in a low-memory situation could triggers random memory corruption like page flag corruption below due to in fast_isolate_freepages(), if isolation fails, next_search_order() does not abort the search immediately could lead to improper accesses. UBSAN: Undefined behaviour in ./include/linux/mm.h:1195:50 index 7 is out of range for type 'zone [5]' Call Trace: dump_stack+0x62/0x9a ubsan_epilogue+0xd/0x7f __ubsan_handle_out_of_bounds+0x14d/0x192 __isolate_free_page+0x52c/0x600 compaction_alloc+0x886/0x25f0 unmap_and_move+0x37/0x1e70 migrate_pages+0x2ca/0xb20 compact_zone+0x19cb/0x3620 kcompactd_do_work+0x2df/0x680 kcompactd+0x1d8/0x6c0 kthread+0x32c/0x3f0 ret_from_fork+0x35/0x40 ------------[ cut here ]------------ kernel BUG at mm/page_alloc.c:3124! invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI RIP: 0010:__isolate_free_page+0x464/0x600 RSP: 0000:ffff888b9e1af848 EFLAGS: 00010007 RAX: 0000000030000000 RBX: ffff888c39fcf0f8 RCX: 0000000000000000 RDX: 1ffff111873f9e25 RSI: 0000000000000004 RDI: ffffed1173c35ef6 RBP: ffff888b9e1af898 R08: fffffbfff4fc2461 R09: fffffbfff4fc2460 R10: fffffbfff4fc2460 R11: ffffffffa7e12303 R12: 0000000000000008 R13: dffffc0000000000 R14: 0000000000000000 R15: 0000000000000007 FS: 0000000000000000(0000) GS:ffff888ba8e80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fc7abc00000 CR3: 0000000752416004 CR4: 00000000001606a0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: compaction_alloc+0x886/0x25f0 unmap_and_move+0x37/0x1e70 migrate_pages+0x2ca/0xb20 compact_zone+0x19cb/0x3620 kcompactd_do_work+0x2df/0x680 kcompactd+0x1d8/0x6c0 kthread+0x32c/0x3f0 ret_from_fork+0x35/0x40 Link: http://lkml.kernel.org/r/20190320192648.52499-1-cai@lca.pw Fixes: dbe2d4e4f12e ("mm, compaction: round-robin the order while searching the free lists for a target") Signed-off-by: Qian Cai Acked-by: Mel Gorman Cc: Daniel Jordan Cc: Mikhail Gavrilov Cc: Vlastimil Babka Cc: Pavel Tatashin Signed-off-by: Mel Gorman --- mm/compaction.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/compaction.c b/mm/compaction.c index b4930bf93c8a..3319e0872d01 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1370,7 +1370,7 @@ fast_isolate_freepages(struct compact_control *cc) count_compact_events(COMPACTISOLATED, nr_isolated); } else { /* If isolation fails, abort the search */ - order = -1; + order = cc->search_order + 1; page = NULL; } } -- cgit v1.2.3 From 8b030a57e35a0efc1a8aa18bb10555bc5066ac40 Mon Sep 17 00:00:00 2001 From: Oleksandr Andrushchenko Date: Thu, 4 Apr 2019 15:38:38 +0300 Subject: ALSA: xen-front: Do not use stream buffer size before it is set This fixes the regression introduced while moving to Xen shared buffer implementation. Fixes: 58f9d806d16a ("ALSA: xen-front: Use Xen common shared buffer implementation") Reviewed-by: Juergen Gross Signed-off-by: Oleksandr Andrushchenko Cc: # v5.0+ Signed-off-by: Takashi Iwai --- sound/xen/xen_snd_front_alsa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/xen/xen_snd_front_alsa.c b/sound/xen/xen_snd_front_alsa.c index a7f413cb704d..b14ab512c2ce 100644 --- a/sound/xen/xen_snd_front_alsa.c +++ b/sound/xen/xen_snd_front_alsa.c @@ -441,7 +441,7 @@ static int shbuf_setup_backstore(struct xen_snd_front_pcm_stream_info *stream, { int i; - stream->buffer = alloc_pages_exact(stream->buffer_sz, GFP_KERNEL); + stream->buffer = alloc_pages_exact(buffer_sz, GFP_KERNEL); if (!stream->buffer) return -ENOMEM; -- cgit v1.2.3 From 631b7abacd02b88f4b0795c08b54ad4fc3e7c7c0 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 7 Nov 2016 16:26:35 -0500 Subject: ptrace: Remove maxargs from task_current_syscall() task_current_syscall() has a single user that passes in 6 for maxargs, which is the maximum arguments that can be used to get system calls from syscall_get_arguments(). Instead of passing in a number of arguments to grab, just get 6 arguments. The args argument even specifies that it's an array of 6 items. This will also allow changing syscall_get_arguments() to not get a variable number of arguments, but always grab 6. Linus also suggested not passing in a bunch of arguments to task_current_syscall() but to instead pass in a pointer to a structure, and just fill the structure. struct seccomp_data has almost all the parameters that is needed except for the stack pointer (sp). As seccomp_data is part of uapi, and I'm afraid to change it, a new structure was created "syscall_info", which includes seccomp_data and adds the "sp" field. Link: http://lkml.kernel.org/r/20161107213233.466776454@goodmis.org Cc: Andy Lutomirski Cc: Alexey Dobriyan Cc: Oleg Nesterov Cc: Kees Cook Cc: Al Viro Cc: linux-fsdevel@vger.kernel.org Reviewed-by: Thomas Gleixner Signed-off-by: Steven Rostedt (VMware) --- fs/proc/base.c | 17 ++++++++------- include/linux/ptrace.h | 11 +++++++--- lib/syscall.c | 57 ++++++++++++++++++++++---------------------------- 3 files changed, 42 insertions(+), 43 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index ddef482f1334..6a803a0b75df 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -616,24 +616,25 @@ static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns, static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { - long nr; - unsigned long args[6], sp, pc; + struct syscall_info info; + u64 *args = &info.data.args[0]; int res; res = lock_trace(task); if (res) return res; - if (task_current_syscall(task, &nr, args, 6, &sp, &pc)) + if (task_current_syscall(task, &info)) seq_puts(m, "running\n"); - else if (nr < 0) - seq_printf(m, "%ld 0x%lx 0x%lx\n", nr, sp, pc); + else if (info.data.nr < 0) + seq_printf(m, "%d 0x%llx 0x%llx\n", + info.data.nr, info.sp, info.data.instruction_pointer); else seq_printf(m, - "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", - nr, + "%d 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx\n", + info.data.nr, args[0], args[1], args[2], args[3], args[4], args[5], - sp, pc); + info.sp, info.data.instruction_pointer); unlock_trace(task); return 0; diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index edb9b040c94c..d5084ebd9f03 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -9,6 +9,13 @@ #include /* For BUG_ON. */ #include /* For task_active_pid_ns. */ #include +#include + +/* Add sp to seccomp_data, as seccomp is user API, we don't want to modify it */ +struct syscall_info { + __u64 sp; + struct seccomp_data data; +}; extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, unsigned int gup_flags); @@ -407,9 +414,7 @@ static inline void user_single_step_report(struct pt_regs *regs) #define current_user_stack_pointer() user_stack_pointer(current_pt_regs()) #endif -extern int task_current_syscall(struct task_struct *target, long *callno, - unsigned long args[6], unsigned int maxargs, - unsigned long *sp, unsigned long *pc); +extern int task_current_syscall(struct task_struct *target, struct syscall_info *info); extern void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact); #endif diff --git a/lib/syscall.c b/lib/syscall.c index 1a7077f20eae..e8467e17b9a2 100644 --- a/lib/syscall.c +++ b/lib/syscall.c @@ -5,16 +5,14 @@ #include #include -static int collect_syscall(struct task_struct *target, long *callno, - unsigned long args[6], unsigned int maxargs, - unsigned long *sp, unsigned long *pc) +static int collect_syscall(struct task_struct *target, struct syscall_info *info) { struct pt_regs *regs; if (!try_get_task_stack(target)) { /* Task has no stack, so the task isn't in a syscall. */ - *sp = *pc = 0; - *callno = -1; + memset(info, 0, sizeof(*info)); + info->data.nr = -1; return 0; } @@ -24,12 +22,13 @@ static int collect_syscall(struct task_struct *target, long *callno, return -EAGAIN; } - *sp = user_stack_pointer(regs); - *pc = instruction_pointer(regs); + info->sp = user_stack_pointer(regs); + info->data.instruction_pointer = instruction_pointer(regs); - *callno = syscall_get_nr(target, regs); - if (*callno != -1L && maxargs > 0) - syscall_get_arguments(target, regs, 0, maxargs, args); + info->data.nr = syscall_get_nr(target, regs); + if (info->data.nr != -1L) + syscall_get_arguments(target, regs, 0, 6, + (unsigned long *)&info->data.args[0]); put_task_stack(target); return 0; @@ -38,41 +37,35 @@ static int collect_syscall(struct task_struct *target, long *callno, /** * task_current_syscall - Discover what a blocked task is doing. * @target: thread to examine - * @callno: filled with system call number or -1 - * @args: filled with @maxargs system call arguments - * @maxargs: number of elements in @args to fill - * @sp: filled with user stack pointer - * @pc: filled with user PC + * @info: structure with the following fields: + * .sp - filled with user stack pointer + * .data.nr - filled with system call number or -1 + * .data.args - filled with @maxargs system call arguments + * .data.instruction_pointer - filled with user PC * - * If @target is blocked in a system call, returns zero with *@callno - * set to the the call's number and @args filled in with its arguments. - * Registers not used for system call arguments may not be available and - * it is not kosher to use &struct user_regset calls while the system + * If @target is blocked in a system call, returns zero with @info.data.nr + * set to the the call's number and @info.data.args filled in with its + * arguments. Registers not used for system call arguments may not be available + * and it is not kosher to use &struct user_regset calls while the system * call is still in progress. Note we may get this result if @target * has finished its system call but not yet returned to user mode, such * as when it's stopped for signal handling or syscall exit tracing. * * If @target is blocked in the kernel during a fault or exception, - * returns zero with *@callno set to -1 and does not fill in @args. - * If so, it's now safe to examine @target using &struct user_regset - * get() calls as long as we're sure @target won't return to user mode. + * returns zero with *@info.data.nr set to -1 and does not fill in + * @info.data.args. If so, it's now safe to examine @target using + * &struct user_regset get() calls as long as we're sure @target won't return + * to user mode. * * Returns -%EAGAIN if @target does not remain blocked. - * - * Returns -%EINVAL if @maxargs is too large (maximum is six). */ -int task_current_syscall(struct task_struct *target, long *callno, - unsigned long args[6], unsigned int maxargs, - unsigned long *sp, unsigned long *pc) +int task_current_syscall(struct task_struct *target, struct syscall_info *info) { long state; unsigned long ncsw; - if (unlikely(maxargs > 6)) - return -EINVAL; - if (target == current) - return collect_syscall(target, callno, args, maxargs, sp, pc); + return collect_syscall(target, info); state = target->state; if (unlikely(!state)) @@ -80,7 +73,7 @@ int task_current_syscall(struct task_struct *target, long *callno, ncsw = wait_task_inactive(target, state); if (unlikely(!ncsw) || - unlikely(collect_syscall(target, callno, args, maxargs, sp, pc)) || + unlikely(collect_syscall(target, info)) || unlikely(wait_task_inactive(target, state) != ncsw)) return -EAGAIN; -- cgit v1.2.3 From d08e411397cb6fcb3d3fb075c27a41975c99e88f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 7 Nov 2016 16:26:36 -0500 Subject: tracing/syscalls: Pass in hardcoded 6 into syscall_get_arguments() The only users that calls syscall_get_arguments() with a variable and not a hard coded '6' is ftrace_syscall_enter(). syscall_get_arguments() can be optimized by removing a variable input, and always grabbing 6 arguments regardless of what the system call actually uses. Change ftrace_syscall_enter() to pass the 6 args into a local stack array and copy the necessary arguments into the trace event as needed. This is needed to remove two parameters from syscall_get_arguments(). Link: http://lkml.kernel.org/r/20161107213233.627583542@goodmis.org Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_syscalls.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index f93a56d2db27..e9f5bbbad6d9 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -314,6 +314,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) struct ring_buffer_event *event; struct ring_buffer *buffer; unsigned long irq_flags; + unsigned long args[6]; int pc; int syscall_nr; int size; @@ -347,7 +348,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) entry = ring_buffer_event_data(event); entry->nr = syscall_nr; - syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args); + syscall_get_arguments(current, regs, 0, 6, args); + memcpy(entry->args, args, sizeof(unsigned long) * sys_data->nb_args); event_trigger_unlock_commit(trace_file, buffer, event, entry, irq_flags, pc); @@ -583,6 +585,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) struct syscall_metadata *sys_data; struct syscall_trace_enter *rec; struct hlist_head *head; + unsigned long args[6]; bool valid_prog_array; int syscall_nr; int rctx; @@ -613,8 +616,8 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) return; rec->nr = syscall_nr; - syscall_get_arguments(current, regs, 0, sys_data->nb_args, - (unsigned long *)&rec->args); + syscall_get_arguments(current, regs, 0, 6, args); + memcpy(&rec->args, args, sizeof(unsigned long) * sys_data->nb_args); if ((valid_prog_array && !perf_call_bpf_enter(sys_data->enter_event, regs, sys_data, rec)) || -- cgit v1.2.3 From 5eed7898626bedd6405421550c0c6e8ab9591bb2 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 3 Apr 2019 13:53:18 -0700 Subject: flow_dissector: rst'ify documentation Rename bpf_flow_dissector.txt to bpf_flow_dissector.rst and fix formatting. Also, link it from the Documentation/networking/index.rst. Tested with 'make htmldocs' to make sure it looks reasonable. Fixes: ae82899bbe92 ("flow_dissector: document BPF flow dissector environment") Signed-off-by: Stanislav Fomichev Acked-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann --- Documentation/networking/bpf_flow_dissector.rst | 126 ++++++++++++++++++++++++ Documentation/networking/bpf_flow_dissector.txt | 115 --------------------- Documentation/networking/index.rst | 1 + 3 files changed, 127 insertions(+), 115 deletions(-) create mode 100644 Documentation/networking/bpf_flow_dissector.rst delete mode 100644 Documentation/networking/bpf_flow_dissector.txt diff --git a/Documentation/networking/bpf_flow_dissector.rst b/Documentation/networking/bpf_flow_dissector.rst new file mode 100644 index 000000000000..b375ae2ec2c4 --- /dev/null +++ b/Documentation/networking/bpf_flow_dissector.rst @@ -0,0 +1,126 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================== +BPF Flow Dissector +================== + +Overview +======== + +Flow dissector is a routine that parses metadata out of the packets. It's +used in the various places in the networking subsystem (RFS, flow hash, etc). + +BPF flow dissector is an attempt to reimplement C-based flow dissector logic +in BPF to gain all the benefits of BPF verifier (namely, limits on the +number of instructions and tail calls). + +API +=== + +BPF flow dissector programs operate on an ``__sk_buff``. However, only the +limited set of fields is allowed: ``data``, ``data_end`` and ``flow_keys``. +``flow_keys`` is ``struct bpf_flow_keys`` and contains flow dissector input +and output arguments. + +The inputs are: + * ``nhoff`` - initial offset of the networking header + * ``thoff`` - initial offset of the transport header, initialized to nhoff + * ``n_proto`` - L3 protocol type, parsed out of L2 header + +Flow dissector BPF program should fill out the rest of the ``struct +bpf_flow_keys`` fields. Input arguments ``nhoff/thoff/n_proto`` should be +also adjusted accordingly. + +The return code of the BPF program is either BPF_OK to indicate successful +dissection, or BPF_DROP to indicate parsing error. + +__sk_buff->data +=============== + +In the VLAN-less case, this is what the initial state of the BPF flow +dissector looks like:: + + +------+------+------------+-----------+ + | DMAC | SMAC | ETHER_TYPE | L3_HEADER | + +------+------+------------+-----------+ + ^ + | + +-- flow dissector starts here + + +.. code:: c + + skb->data + flow_keys->nhoff point to the first byte of L3_HEADER + flow_keys->thoff = nhoff + flow_keys->n_proto = ETHER_TYPE + +In case of VLAN, flow dissector can be called with the two different states. + +Pre-VLAN parsing:: + + +------+------+------+-----+-----------+-----------+ + | DMAC | SMAC | TPID | TCI |ETHER_TYPE | L3_HEADER | + +------+------+------+-----+-----------+-----------+ + ^ + | + +-- flow dissector starts here + +.. code:: c + + skb->data + flow_keys->nhoff point the to first byte of TCI + flow_keys->thoff = nhoff + flow_keys->n_proto = TPID + +Please note that TPID can be 802.1AD and, hence, BPF program would +have to parse VLAN information twice for double tagged packets. + + +Post-VLAN parsing:: + + +------+------+------+-----+-----------+-----------+ + | DMAC | SMAC | TPID | TCI |ETHER_TYPE | L3_HEADER | + +------+------+------+-----+-----------+-----------+ + ^ + | + +-- flow dissector starts here + +.. code:: c + + skb->data + flow_keys->nhoff point the to first byte of L3_HEADER + flow_keys->thoff = nhoff + flow_keys->n_proto = ETHER_TYPE + +In this case VLAN information has been processed before the flow dissector +and BPF flow dissector is not required to handle it. + + +The takeaway here is as follows: BPF flow dissector program can be called with +the optional VLAN header and should gracefully handle both cases: when single +or double VLAN is present and when it is not present. The same program +can be called for both cases and would have to be written carefully to +handle both cases. + + +Reference Implementation +======================== + +See ``tools/testing/selftests/bpf/progs/bpf_flow.c`` for the reference +implementation and ``tools/testing/selftests/bpf/flow_dissector_load.[hc]`` +for the loader. bpftool can be used to load BPF flow dissector program as well. + +The reference implementation is organized as follows: + * ``jmp_table`` map that contains sub-programs for each supported L3 protocol + * ``_dissect`` routine - entry point; it does input ``n_proto`` parsing and + does ``bpf_tail_call`` to the appropriate L3 handler + +Since BPF at this point doesn't support looping (or any jumping back), +jmp_table is used instead to handle multiple levels of encapsulation (and +IPv6 options). + + +Current Limitations +=================== +BPF flow dissector doesn't support exporting all the metadata that in-kernel +C-based implementation can export. Notable example is single VLAN (802.1Q) +and double VLAN (802.1AD) tags. Please refer to the ``struct bpf_flow_keys`` +for a set of information that's currently can be exported from the BPF context. diff --git a/Documentation/networking/bpf_flow_dissector.txt b/Documentation/networking/bpf_flow_dissector.txt deleted file mode 100644 index 70f66a2d57e7..000000000000 --- a/Documentation/networking/bpf_flow_dissector.txt +++ /dev/null @@ -1,115 +0,0 @@ -================== -BPF Flow Dissector -================== - -Overview -======== - -Flow dissector is a routine that parses metadata out of the packets. It's -used in the various places in the networking subsystem (RFS, flow hash, etc). - -BPF flow dissector is an attempt to reimplement C-based flow dissector logic -in BPF to gain all the benefits of BPF verifier (namely, limits on the -number of instructions and tail calls). - -API -=== - -BPF flow dissector programs operate on an __sk_buff. However, only the -limited set of fields is allowed: data, data_end and flow_keys. flow_keys -is 'struct bpf_flow_keys' and contains flow dissector input and -output arguments. - -The inputs are: - * nhoff - initial offset of the networking header - * thoff - initial offset of the transport header, initialized to nhoff - * n_proto - L3 protocol type, parsed out of L2 header - -Flow dissector BPF program should fill out the rest of the 'struct -bpf_flow_keys' fields. Input arguments nhoff/thoff/n_proto should be also -adjusted accordingly. - -The return code of the BPF program is either BPF_OK to indicate successful -dissection, or BPF_DROP to indicate parsing error. - -__sk_buff->data -=============== - -In the VLAN-less case, this is what the initial state of the BPF flow -dissector looks like: -+------+------+------------+-----------+ -| DMAC | SMAC | ETHER_TYPE | L3_HEADER | -+------+------+------------+-----------+ - ^ - | - +-- flow dissector starts here - -skb->data + flow_keys->nhoff point to the first byte of L3_HEADER. -flow_keys->thoff = nhoff -flow_keys->n_proto = ETHER_TYPE - - -In case of VLAN, flow dissector can be called with the two different states. - -Pre-VLAN parsing: -+------+------+------+-----+-----------+-----------+ -| DMAC | SMAC | TPID | TCI |ETHER_TYPE | L3_HEADER | -+------+------+------+-----+-----------+-----------+ - ^ - | - +-- flow dissector starts here - -skb->data + flow_keys->nhoff point the to first byte of TCI. -flow_keys->thoff = nhoff -flow_keys->n_proto = TPID - -Please note that TPID can be 802.1AD and, hence, BPF program would -have to parse VLAN information twice for double tagged packets. - - -Post-VLAN parsing: -+------+------+------+-----+-----------+-----------+ -| DMAC | SMAC | TPID | TCI |ETHER_TYPE | L3_HEADER | -+------+------+------+-----+-----------+-----------+ - ^ - | - +-- flow dissector starts here - -skb->data + flow_keys->nhoff point the to first byte of L3_HEADER. -flow_keys->thoff = nhoff -flow_keys->n_proto = ETHER_TYPE - -In this case VLAN information has been processed before the flow dissector -and BPF flow dissector is not required to handle it. - - -The takeaway here is as follows: BPF flow dissector program can be called with -the optional VLAN header and should gracefully handle both cases: when single -or double VLAN is present and when it is not present. The same program -can be called for both cases and would have to be written carefully to -handle both cases. - - -Reference Implementation -======================== - -See tools/testing/selftests/bpf/progs/bpf_flow.c for the reference -implementation and tools/testing/selftests/bpf/flow_dissector_load.[hc] for -the loader. bpftool can be used to load BPF flow dissector program as well. - -The reference implementation is organized as follows: -* jmp_table map that contains sub-programs for each supported L3 protocol -* _dissect routine - entry point; it does input n_proto parsing and does - bpf_tail_call to the appropriate L3 handler - -Since BPF at this point doesn't support looping (or any jumping back), -jmp_table is used instead to handle multiple levels of encapsulation (and -IPv6 options). - - -Current Limitations -=================== -BPF flow dissector doesn't support exporting all the metadata that in-kernel -C-based implementation can export. Notable example is single VLAN (802.1Q) -and double VLAN (802.1AD) tags. Please refer to the 'struct bpf_flow_keys' -for a set of information that's currently can be exported from the BPF context. diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index 5449149be496..984e68f9e026 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -9,6 +9,7 @@ Contents: netdev-FAQ af_xdp batman-adv + bpf_flow_dissector can can_ucan_protocol device_drivers/freescale/dpaa2/index -- cgit v1.2.3 From 10a16997db3d99fc02c026cf2c6e6c670acafab0 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Fri, 29 Mar 2019 20:12:21 +0300 Subject: riscv: Fix syscall_get_arguments() and syscall_set_arguments() RISC-V syscall arguments are located in orig_a0,a1..a5 fields of struct pt_regs. Due to an off-by-one bug and a bug in pointer arithmetic syscall_get_arguments() was reading s3..s7 fields instead of a1..a5. Likewise, syscall_set_arguments() was writing s3..s7 fields instead of a1..a5. Link: http://lkml.kernel.org/r/20190329171221.GA32456@altlinux.org Fixes: e2c0cdfba7f69 ("RISC-V: User-facing API") Cc: Ingo Molnar Cc: Kees Cook Cc: Andy Lutomirski Cc: Will Drewry Cc: Albert Ou Cc: linux-riscv@lists.infradead.org Cc: stable@vger.kernel.org # v4.15+ Acked-by: Palmer Dabbelt Signed-off-by: Dmitry V. Levin Signed-off-by: Steven Rostedt (VMware) --- arch/riscv/include/asm/syscall.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h index bba3da6ef157..6ea9e1804233 100644 --- a/arch/riscv/include/asm/syscall.h +++ b/arch/riscv/include/asm/syscall.h @@ -79,10 +79,11 @@ static inline void syscall_get_arguments(struct task_struct *task, if (i == 0) { args[0] = regs->orig_a0; args++; - i++; n--; + } else { + i--; } - memcpy(args, ®s->a1 + i * sizeof(regs->a1), n * sizeof(args[0])); + memcpy(args, ®s->a1 + i, n * sizeof(args[0])); } static inline void syscall_set_arguments(struct task_struct *task, @@ -94,10 +95,11 @@ static inline void syscall_set_arguments(struct task_struct *task, if (i == 0) { regs->orig_a0 = args[0]; args++; - i++; n--; - } - memcpy(®s->a1 + i * sizeof(regs->a1), args, n * sizeof(regs->a0)); + } else { + i--; + } + memcpy(®s->a1 + i, args, n * sizeof(regs->a1)); } static inline int syscall_get_arch(void) -- cgit v1.2.3 From ed3bb007021b9bddb90afae28a19f08ed8890add Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Fri, 29 Mar 2019 20:12:30 +0300 Subject: csky: Fix syscall_get_arguments() and syscall_set_arguments() C-SKY syscall arguments are located in orig_a0,a1,a2,a3,regs[0],regs[1] fields of struct pt_regs. Due to an off-by-one bug and a bug in pointer arithmetic syscall_get_arguments() was reading orig_a0,regs[1..5] fields instead. Likewise, syscall_set_arguments() was writing orig_a0,regs[1..5] fields instead. Link: http://lkml.kernel.org/r/20190329171230.GB32456@altlinux.org Fixes: 4859bfca11c7d ("csky: System Call") Cc: Ingo Molnar Cc: Kees Cook Cc: Andy Lutomirski Cc: Will Drewry Cc: stable@vger.kernel.org # v4.20+ Tested-by: Guo Ren Acked-by: Guo Ren Signed-off-by: Dmitry V. Levin Signed-off-by: Steven Rostedt (VMware) --- arch/csky/include/asm/syscall.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/csky/include/asm/syscall.h b/arch/csky/include/asm/syscall.h index d637445737b7..9a9cd81e66c1 100644 --- a/arch/csky/include/asm/syscall.h +++ b/arch/csky/include/asm/syscall.h @@ -49,10 +49,11 @@ syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, if (i == 0) { args[0] = regs->orig_a0; args++; - i++; n--; + } else { + i--; } - memcpy(args, ®s->a1 + i * sizeof(regs->a1), n * sizeof(args[0])); + memcpy(args, ®s->a1 + i, n * sizeof(args[0])); } static inline void @@ -63,10 +64,11 @@ syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, if (i == 0) { regs->orig_a0 = args[0]; args++; - i++; n--; + } else { + i--; } - memcpy(®s->a1 + i * sizeof(regs->a1), args, n * sizeof(regs->a0)); + memcpy(®s->a1 + i, args, n * sizeof(regs->a1)); } static inline int -- cgit v1.2.3 From bcc816dfe51ab86ca94663c7b225f2d6eb0fddb9 Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Thu, 4 Apr 2019 10:57:44 +0800 Subject: blk-mq: do not reset plug->rq_count before the list is sorted We would never be able to sort the list if we first reset plug->rq_count which is used in conditional check later. Fixes: ce5b009cff19 ("block: improve logic around when to sort a plug list") Reviewed-by: Ming Lei Signed-off-by: Dongli Zhang Signed-off-by: Jens Axboe --- block/blk-mq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 22074a1e37cd..ac61bcc67a89 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1711,11 +1711,12 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) unsigned int depth; list_splice_init(&plug->mq_list, &list); - plug->rq_count = 0; if (plug->rq_count > 2 && plug->multiple_queues) list_sort(NULL, &list, plug_rq_cmp); + plug->rq_count = 0; + this_q = NULL; this_hctx = NULL; this_ctx = NULL; -- cgit v1.2.3 From 1c41860864c8ae0387ef7d44f0000e99cbb2e06d Mon Sep 17 00:00:00 2001 From: Wei Li Date: Mon, 1 Apr 2019 11:55:57 +0800 Subject: arm64: fix wrong check of on_sdei_stack in nmi context When doing unwind_frame() in the context of pseudo nmi (need enable CONFIG_ARM64_PSEUDO_NMI), reaching the bottom of the stack (fp == 0, pc != 0), function on_sdei_stack() will return true while the sdei acpi table is not inited in fact. This will cause a "NULL pointer dereference" oops when going on. Reviewed-by: Julien Thierry Signed-off-by: Wei Li Signed-off-by: Catalin Marinas --- arch/arm64/kernel/sdei.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index 5ba4465e44f0..ea94cf8f9dc6 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -94,6 +94,9 @@ static bool on_sdei_normal_stack(unsigned long sp, struct stack_info *info) unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr); unsigned long high = low + SDEI_STACK_SIZE; + if (!low) + return false; + if (sp < low || sp >= high) return false; @@ -111,6 +114,9 @@ static bool on_sdei_critical_stack(unsigned long sp, struct stack_info *info) unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr); unsigned long high = low + SDEI_STACK_SIZE; + if (!low) + return false; + if (sp < low || sp >= high) return false; -- cgit v1.2.3 From e7ad88553aa1d48e950ca9a4934d246c1bee4be4 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 3 Apr 2019 12:30:32 -0500 Subject: drm/amdkfd: Add picasso pci id Picasso is a new raven variant. Reviewed-by: Kent Russell Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 8be9677c0c07..cf9a49f49d3a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -320,6 +320,7 @@ static const struct kfd_deviceid supported_devices[] = { { 0x9876, &carrizo_device_info }, /* Carrizo */ { 0x9877, &carrizo_device_info }, /* Carrizo */ { 0x15DD, &raven_device_info }, /* Raven */ + { 0x15D8, &raven_device_info }, /* Raven */ #endif { 0x67A0, &hawaii_device_info }, /* Hawaii */ { 0x67A1, &hawaii_device_info }, /* Hawaii */ -- cgit v1.2.3 From d4162c61e253177936fcfe6c29f7b224d9a1efb8 Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Mon, 1 Apr 2019 16:09:34 -0400 Subject: drm/amdgpu: Adjust IB test timeout for XGMI configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On XGMI configuration the ib test may take longer to finish Signed-off-by: shaoyunl Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 0b8ef2d27d6b..fe393a46f881 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -35,6 +35,7 @@ #include "amdgpu_trace.h" #define AMDGPU_IB_TEST_TIMEOUT msecs_to_jiffies(1000) +#define AMDGPU_IB_TEST_GFX_XGMI_TIMEOUT msecs_to_jiffies(2000) /* * IB @@ -344,6 +345,8 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev) * cost waiting for it coming back under RUNTIME only */ tmo_gfx = 8 * AMDGPU_IB_TEST_TIMEOUT; + } else if (adev->gmc.xgmi.hive_id) { + tmo_gfx = AMDGPU_IB_TEST_GFX_XGMI_TIMEOUT; } for (i = 0; i < adev->num_rings; ++i) { -- cgit v1.2.3 From 1712fb1a2f6829150032ac76eb0e39b82a549cfb Mon Sep 17 00:00:00 2001 From: wentalou Date: Tue, 2 Apr 2019 17:13:05 +0800 Subject: drm/amdgpu: amdgpu_device_recover_vram always failed if only one node in shadow_list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit amdgpu_bo_restore_shadow would assign zero to r if succeeded. r would remain zero if there is only one node in shadow_list. current code would always return failure when r <= 0. restart the timeout for each wait was a rather problematic bug as well. The value of tmo SHOULD be changed, otherwise we wait tmo jiffies on each loop. Signed-off-by: Wentao Lou Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index ac0d646a7b74..5d8b30fd4534 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3173,11 +3173,16 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev) break; if (fence) { - r = dma_fence_wait_timeout(fence, false, tmo); + tmo = dma_fence_wait_timeout(fence, false, tmo); dma_fence_put(fence); fence = next; - if (r <= 0) + if (tmo == 0) { + r = -ETIMEDOUT; break; + } else if (tmo < 0) { + r = tmo; + break; + } } else { fence = next; } @@ -3188,8 +3193,8 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev) tmo = dma_fence_wait_timeout(fence, false, tmo); dma_fence_put(fence); - if (r <= 0 || tmo <= 0) { - DRM_ERROR("recover vram bo from shadow failed\n"); + if (r < 0 || tmo <= 0) { + DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo); return -EIO; } -- cgit v1.2.3 From c1cefe115d1cdc460014483319d440b2f0d07c68 Mon Sep 17 00:00:00 2001 From: tiancyin Date: Mon, 1 Apr 2019 10:15:31 +0800 Subject: drm/amd/display: fix cursor black issue [Why] the member sdr_white_level of struct dc_cursor_attributes was not initialized, then the random value result that dcn10_set_cursor_sdr_white_level() set error hw_scale value 0x20D9(normal value is 0x3c00), this cause the black cursor issue. [how] just initilize the obj of struct dc_cursor_attributes to zero to avoid the random value. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Tianci Yin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 81127f7d6ed1..3082b55b1e77 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4533,6 +4533,7 @@ static void handle_cursor_update(struct drm_plane *plane, amdgpu_crtc->cursor_width = plane->state->crtc_w; amdgpu_crtc->cursor_height = plane->state->crtc_h; + memset(&attributes, 0, sizeof(attributes)); attributes.address.high_part = upper_32_bits(address); attributes.address.low_part = lower_32_bits(address); attributes.width = plane->state->crtc_w; -- cgit v1.2.3 From 50398fde997f6be8faebdb5f38e9c9c467370f51 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Tue, 2 Apr 2019 18:07:38 +0800 Subject: btrfs: prop: fix zstd compression parameter validation We let pass zstd compression parameter even if it is not fully valid. For example: $ btrfs prop set /btrfs compression zst $ btrfs prop get /btrfs compression compression=zst zlib and lzo are fine. Fix it by checking the correct prefix length. Fixes: 5c1aab1dd544 ("btrfs: Add zstd support") CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Nikolay Borisov Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/props.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index dc6140013ae8..fd19f3078566 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -396,7 +396,7 @@ static int prop_compression_apply(struct inode *inode, btrfs_set_fs_incompat(fs_info, COMPRESS_LZO); } else if (!strncmp("zlib", value, 4)) { type = BTRFS_COMPRESS_ZLIB; - } else if (!strncmp("zstd", value, len)) { + } else if (!strncmp("zstd", value, 4)) { type = BTRFS_COMPRESS_ZSTD; btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD); } else { -- cgit v1.2.3 From 272e5326c7837697882ce3162029ba893059b616 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Tue, 2 Apr 2019 18:07:40 +0800 Subject: btrfs: prop: fix vanished compression property after failed set The compression property resets to NULL, instead of the old value if we fail to set the new compression parameter. $ btrfs prop get /btrfs compression compression=lzo $ btrfs prop set /btrfs compression zli ERROR: failed to set compression for /btrfs: Invalid argument $ btrfs prop get /btrfs compression This is because the compression property ->validate() is successful for 'zli' as the strncmp() used the length passed from the userspace. Fix it by using the expected string length in strncmp(). Fixes: 63541927c8d1 ("Btrfs: add support for inode properties") Fixes: 5c1aab1dd544 ("btrfs: Add zstd support") CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Nikolay Borisov Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/props.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index fd19f3078566..61d22a56c0ba 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -366,11 +366,11 @@ int btrfs_subvol_inherit_props(struct btrfs_trans_handle *trans, static int prop_compression_validate(const char *value, size_t len) { - if (!strncmp("lzo", value, len)) + if (!strncmp("lzo", value, 3)) return 0; - else if (!strncmp("zlib", value, len)) + else if (!strncmp("zlib", value, 4)) return 0; - else if (!strncmp("zstd", value, len)) + else if (!strncmp("zstd", value, 4)) return 0; return -EINVAL; -- cgit v1.2.3 From 3a39a12ad364a9acd1038ba8da67cd8430f30de4 Mon Sep 17 00:00:00 2001 From: Liubin Shu Date: Thu, 4 Apr 2019 16:46:42 +0800 Subject: net: hns: fix KASAN: use-after-free in hns_nic_net_xmit_hw() This patch is trying to fix the issue due to: [27237.844750] BUG: KASAN: use-after-free in hns_nic_net_xmit_hw+0x708/0xa18[hns_enet_drv] After hnae_queue_xmit() in hns_nic_net_xmit_hw(), can be interrupted by interruptions, and than call hns_nic_tx_poll_one() to handle the new packets, and free the skb. So, when turn back to hns_nic_net_xmit_hw(), calling skb->len will cause use-after-free. This patch update tx ring statistics in hns_nic_tx_poll_one() to fix the bug. Signed-off-by: Liubin Shu Signed-off-by: Zhen Lei Signed-off-by: Yonglong Liu Signed-off-by: Peng Li Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 60e7d7ae3787..e5a7c0761dbd 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -376,8 +376,6 @@ netdev_tx_t hns_nic_net_xmit_hw(struct net_device *ndev, wmb(); /* commit all data before submit */ assert(skb->queue_mapping < priv->ae_handle->q_num); hnae_queue_xmit(priv->ae_handle->qs[skb->queue_mapping], buf_num); - ring->stats.tx_pkts++; - ring->stats.tx_bytes += skb->len; return NETDEV_TX_OK; @@ -999,6 +997,9 @@ static int hns_nic_tx_poll_one(struct hns_nic_ring_data *ring_data, /* issue prefetch for next Tx descriptor */ prefetch(&ring->desc_cb[ring->next_to_clean]); } + /* update tx ring statistics. */ + ring->stats.tx_pkts += pkts; + ring->stats.tx_bytes += bytes; NETIF_TX_UNLOCK(ring); -- cgit v1.2.3 From acb1ce15a61154aa501891d67ebf79bc9ea26818 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Thu, 4 Apr 2019 16:46:43 +0800 Subject: net: hns: Use NAPI_POLL_WEIGHT for hns driver When the HNS driver loaded, always have an error print: "netif_napi_add() called with weight 256" This is because the kernel checks the NAPI polling weights requested by drivers and it prints an error message if a driver requests a weight bigger than 64. So use NAPI_POLL_WEIGHT to fix it. Signed-off-by: Yonglong Liu Signed-off-by: Peng Li Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index e5a7c0761dbd..4cd86ba1f050 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -29,9 +29,6 @@ #define SERVICE_TIMER_HZ (1 * HZ) -#define NIC_TX_CLEAN_MAX_NUM 256 -#define NIC_RX_CLEAN_MAX_NUM 64 - #define RCB_IRQ_NOT_INITED 0 #define RCB_IRQ_INITED 1 #define HNS_BUFFER_SIZE_2048 2048 @@ -2153,7 +2150,7 @@ static int hns_nic_init_ring_data(struct hns_nic_priv *priv) hns_nic_tx_fini_pro_v2; netif_napi_add(priv->netdev, &rd->napi, - hns_nic_common_poll, NIC_TX_CLEAN_MAX_NUM); + hns_nic_common_poll, NAPI_POLL_WEIGHT); rd->ring->irq_init_flag = RCB_IRQ_NOT_INITED; } for (i = h->q_num; i < h->q_num * 2; i++) { @@ -2166,7 +2163,7 @@ static int hns_nic_init_ring_data(struct hns_nic_priv *priv) hns_nic_rx_fini_pro_v2; netif_napi_add(priv->netdev, &rd->napi, - hns_nic_common_poll, NIC_RX_CLEAN_MAX_NUM); + hns_nic_common_poll, NAPI_POLL_WEIGHT); rd->ring->irq_init_flag = RCB_IRQ_NOT_INITED; } -- cgit v1.2.3 From c0b0984426814f3a9251873b689e67d34d8ccd84 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Thu, 4 Apr 2019 16:46:44 +0800 Subject: net: hns: Fix probabilistic memory overwrite when HNS driver initialized When reboot the system again and again, may cause a memory overwrite. [ 15.638922] systemd[1]: Reached target Swap. [ 15.667561] tun: Universal TUN/TAP device driver, 1.6 [ 15.676756] Bridge firewalling registered [ 17.344135] Unable to handle kernel paging request at virtual address 0000000200000040 [ 17.352179] Mem abort info: [ 17.355007] ESR = 0x96000004 [ 17.358105] Exception class = DABT (current EL), IL = 32 bits [ 17.364112] SET = 0, FnV = 0 [ 17.367209] EA = 0, S1PTW = 0 [ 17.370393] Data abort info: [ 17.373315] ISV = 0, ISS = 0x00000004 [ 17.377206] CM = 0, WnR = 0 [ 17.380214] user pgtable: 4k pages, 48-bit VAs, pgdp = (____ptrval____) [ 17.386926] [0000000200000040] pgd=0000000000000000 [ 17.391878] Internal error: Oops: 96000004 [#1] SMP [ 17.396824] CPU: 23 PID: 95 Comm: kworker/u130:0 Tainted: G E 4.19.25-1.2.78.aarch64 #1 [ 17.414175] Hardware name: Huawei TaiShan 2280 /BC11SPCD, BIOS 1.54 08/16/2018 [ 17.425615] Workqueue: events_unbound async_run_entry_fn [ 17.435151] pstate: 00000005 (nzcv daif -PAN -UAO) [ 17.444139] pc : __mutex_lock.isra.1+0x74/0x540 [ 17.453002] lr : __mutex_lock.isra.1+0x3c/0x540 [ 17.461701] sp : ffff000100d9bb60 [ 17.469146] x29: ffff000100d9bb60 x28: 0000000000000000 [ 17.478547] x27: 0000000000000000 x26: ffff802fb8945000 [ 17.488063] x25: 0000000000000000 x24: ffff802fa32081a8 [ 17.497381] x23: 0000000000000002 x22: ffff801fa2b15220 [ 17.506701] x21: ffff000009809000 x20: ffff802fa23a0888 [ 17.515980] x19: ffff801fa2b15220 x18: 0000000000000000 [ 17.525272] x17: 0000000200000000 x16: 0000000200000000 [ 17.534511] x15: 0000000000000000 x14: 0000000000000000 [ 17.543652] x13: ffff000008d95db8 x12: 000000000000000d [ 17.552780] x11: ffff000008d95d90 x10: 0000000000000b00 [ 17.561819] x9 : ffff000100d9bb90 x8 : ffff802fb89d6560 [ 17.570829] x7 : 0000000000000004 x6 : 00000004a1801d05 [ 17.579839] x5 : 0000000000000000 x4 : 0000000000000000 [ 17.588852] x3 : ffff802fb89d5a00 x2 : 0000000000000000 [ 17.597734] x1 : 0000000200000000 x0 : 0000000200000000 [ 17.606631] Process kworker/u130:0 (pid: 95, stack limit = 0x(____ptrval____)) [ 17.617438] Call trace: [ 17.623349] __mutex_lock.isra.1+0x74/0x540 [ 17.630927] __mutex_lock_slowpath+0x24/0x30 [ 17.638602] mutex_lock+0x50/0x60 [ 17.645295] drain_workqueue+0x34/0x198 [ 17.652623] __sas_drain_work+0x7c/0x168 [ 17.659903] sas_drain_work+0x60/0x68 [ 17.666947] hisi_sas_scan_finished+0x30/0x40 [hisi_sas_main] [ 17.676129] do_scsi_scan_host+0x70/0xb0 [ 17.683534] do_scan_async+0x20/0x228 [ 17.690586] async_run_entry_fn+0x4c/0x1d0 [ 17.697997] process_one_work+0x1b4/0x3f8 [ 17.705296] worker_thread+0x54/0x470 Every time the call trace is not the same, but the overwrite address is always the same: Unable to handle kernel paging request at virtual address 0000000200000040 The root cause is, when write the reg XGMAC_MAC_TX_LF_RF_CONTROL_REG, didn't use the io_base offset. Signed-off-by: Yonglong Liu Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c index ba4316910dea..a60f207768fc 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c @@ -129,7 +129,7 @@ static void hns_xgmac_lf_rf_control_init(struct mac_driver *mac_drv) dsaf_set_bit(val, XGMAC_UNIDIR_EN_B, 0); dsaf_set_bit(val, XGMAC_RF_TX_EN_B, 1); dsaf_set_field(val, XGMAC_LF_RF_INSERT_M, XGMAC_LF_RF_INSERT_S, 0); - dsaf_write_reg(mac_drv, XGMAC_MAC_TX_LF_RF_CONTROL_REG, val); + dsaf_write_dev(mac_drv, XGMAC_MAC_TX_LF_RF_CONTROL_REG, val); } /** -- cgit v1.2.3 From f058e46855dcbc28edb2ed4736f38a71fd19cadb Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Thu, 4 Apr 2019 16:46:45 +0800 Subject: net: hns: fix ICMP6 neighbor solicitation messages discard problem ICMP6 neighbor solicitation messages will be discard by the Hip06 chips, because of not setting forwarding pool. Enable promisc mode has the same problem. This patch fix the wrong forwarding table configs for the multicast vague matching when enable promisc mode, and add forwarding pool for the forwarding table. Signed-off-by: Yonglong Liu Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c | 33 ++++++++++++++++++---- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index ac55db065f16..f5ff07cb2b72 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -2750,6 +2750,17 @@ int hns_dsaf_get_regs_count(void) return DSAF_DUMP_REGS_NUM; } +static int hns_dsaf_get_port_id(u8 port) +{ + if (port < DSAF_SERVICE_NW_NUM) + return port; + + if (port >= DSAF_BASE_INNER_PORT_NUM) + return port - DSAF_BASE_INNER_PORT_NUM + DSAF_SERVICE_NW_NUM; + + return -EINVAL; +} + static void set_promisc_tcam_enable(struct dsaf_device *dsaf_dev, u32 port) { struct dsaf_tbl_tcam_ucast_cfg tbl_tcam_ucast = {0, 1, 0, 0, 0x80}; @@ -2815,23 +2826,33 @@ static void set_promisc_tcam_enable(struct dsaf_device *dsaf_dev, u32 port) memset(&temp_key, 0x0, sizeof(temp_key)); mask_entry.addr[0] = 0x01; hns_dsaf_set_mac_key(dsaf_dev, &mask_key, mask_entry.in_vlan_id, - port, mask_entry.addr); + 0xf, mask_entry.addr); tbl_tcam_mcast.tbl_mcast_item_vld = 1; tbl_tcam_mcast.tbl_mcast_old_en = 0; - if (port < DSAF_SERVICE_NW_NUM) { - mskid = port; - } else if (port >= DSAF_BASE_INNER_PORT_NUM) { - mskid = port - DSAF_BASE_INNER_PORT_NUM + DSAF_SERVICE_NW_NUM; - } else { + /* set MAC port to handle multicast */ + mskid = hns_dsaf_get_port_id(port); + if (mskid == -EINVAL) { dev_err(dsaf_dev->dev, "%s,pnum(%d)error,key(%#x:%#x)\n", dsaf_dev->ae_dev.name, port, mask_key.high.val, mask_key.low.val); return; } + dsaf_set_bit(tbl_tcam_mcast.tbl_mcast_port_msk[mskid / 32], + mskid % 32, 1); + /* set pool bit map to handle multicast */ + mskid = hns_dsaf_get_port_id(port_num); + if (mskid == -EINVAL) { + dev_err(dsaf_dev->dev, + "%s, pool bit map pnum(%d)error,key(%#x:%#x)\n", + dsaf_dev->ae_dev.name, port_num, + mask_key.high.val, mask_key.low.val); + return; + } dsaf_set_bit(tbl_tcam_mcast.tbl_mcast_port_msk[mskid / 32], mskid % 32, 1); + memcpy(&temp_key, &mask_key, sizeof(mask_key)); hns_dsaf_tcam_mc_cfg_vague(dsaf_dev, entry_index, &tbl_tcam_data_mc, (struct dsaf_tbl_tcam_data *)(&mask_key), -- cgit v1.2.3 From 8601a99d7c0256b7a7fdd1ab14cf6c1f1dfcadc6 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Thu, 4 Apr 2019 16:46:46 +0800 Subject: net: hns: Fix WARNING when remove HNS driver with SMMU enabled When enable SMMU, remove HNS driver will cause a WARNING: [ 141.924177] WARNING: CPU: 36 PID: 2708 at drivers/iommu/dma-iommu.c:443 __iommu_dma_unmap+0xc0/0xc8 [ 141.954673] Modules linked in: hns_enet_drv(-) [ 141.963615] CPU: 36 PID: 2708 Comm: rmmod Tainted: G W 5.0.0-rc1-28723-gb729c57de95c-dirty #32 [ 141.983593] Hardware name: Huawei D05/D05, BIOS Hisilicon D05 UEFI Nemo 1.8 RC0 08/31/2017 [ 142.000244] pstate: 60000005 (nZCv daif -PAN -UAO) [ 142.009886] pc : __iommu_dma_unmap+0xc0/0xc8 [ 142.018476] lr : __iommu_dma_unmap+0xc0/0xc8 [ 142.027066] sp : ffff000013533b90 [ 142.033728] x29: ffff000013533b90 x28: ffff8013e6983600 [ 142.044420] x27: 0000000000000000 x26: 0000000000000000 [ 142.055113] x25: 0000000056000000 x24: 0000000000000015 [ 142.065806] x23: 0000000000000028 x22: ffff8013e66eee68 [ 142.076499] x21: ffff8013db919800 x20: 0000ffffefbff000 [ 142.087192] x19: 0000000000001000 x18: 0000000000000007 [ 142.097885] x17: 000000000000000e x16: 0000000000000001 [ 142.108578] x15: 0000000000000019 x14: 363139343a70616d [ 142.119270] x13: 6e75656761705f67 x12: 0000000000000000 [ 142.129963] x11: 00000000ffffffff x10: 0000000000000006 [ 142.140656] x9 : 1346c1aa88093500 x8 : ffff0000114de4e0 [ 142.151349] x7 : 6662666578303d72 x6 : ffff0000105ffec8 [ 142.162042] x5 : 0000000000000000 x4 : 0000000000000000 [ 142.172734] x3 : 00000000ffffffff x2 : ffff0000114de500 [ 142.183427] x1 : 0000000000000000 x0 : 0000000000000035 [ 142.194120] Call trace: [ 142.199030] __iommu_dma_unmap+0xc0/0xc8 [ 142.206920] iommu_dma_unmap_page+0x20/0x28 [ 142.215335] __iommu_unmap_page+0x40/0x60 [ 142.223399] hnae_unmap_buffer+0x110/0x134 [ 142.231639] hnae_free_desc+0x6c/0x10c [ 142.239177] hnae_fini_ring+0x14/0x34 [ 142.246540] hnae_fini_queue+0x2c/0x40 [ 142.254080] hnae_put_handle+0x38/0xcc [ 142.261619] hns_nic_dev_remove+0x54/0xfc [hns_enet_drv] [ 142.272312] platform_drv_remove+0x24/0x64 [ 142.280552] device_release_driver_internal+0x17c/0x20c [ 142.291070] driver_detach+0x4c/0x90 [ 142.298259] bus_remove_driver+0x5c/0xd8 [ 142.306148] driver_unregister+0x2c/0x54 [ 142.314037] platform_driver_unregister+0x10/0x18 [ 142.323505] hns_nic_dev_driver_exit+0x14/0xf0c [hns_enet_drv] [ 142.335248] __arm64_sys_delete_module+0x214/0x25c [ 142.344891] el0_svc_common+0xb0/0x10c [ 142.352430] el0_svc_handler+0x24/0x80 [ 142.359968] el0_svc+0x8/0x7c0 [ 142.366104] ---[ end trace 60ad1cd58e63c407 ]--- The tx ring buffer map when xmit and unmap when xmit done. So in hnae_init_ring() did not map tx ring buffer, but in hnae_fini_ring() have a unmap operation for tx ring buffer, which is already unmapped when xmit done, than cause this WARNING. The hnae_alloc_buffers() is called in hnae_init_ring(), so the hnae_free_buffers() should be in hnae_fini_ring(), not in hnae_free_desc(). In hnae_fini_ring(), adds a check is_rx_ring() as in hnae_init_ring(). When the ring buffer is tx ring, adds a piece of code to ensure that the tx ring is unmap. Signed-off-by: Yonglong Liu Signed-off-by: Peng Li Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hnae.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.c b/drivers/net/ethernet/hisilicon/hns/hnae.c index 79d03f8ee7b1..c7fa97a7e1f4 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.c +++ b/drivers/net/ethernet/hisilicon/hns/hnae.c @@ -150,7 +150,6 @@ out_buffer_fail: /* free desc along with its attached buffer */ static void hnae_free_desc(struct hnae_ring *ring) { - hnae_free_buffers(ring); dma_unmap_single(ring_to_dev(ring), ring->desc_dma_addr, ring->desc_num * sizeof(ring->desc[0]), ring_to_dma_dir(ring)); @@ -183,6 +182,9 @@ static int hnae_alloc_desc(struct hnae_ring *ring) /* fini ring, also free the buffer for the ring */ static void hnae_fini_ring(struct hnae_ring *ring) { + if (is_rx_ring(ring)) + hnae_free_buffers(ring); + hnae_free_desc(ring); kfree(ring->desc_cb); ring->desc_cb = NULL; -- cgit v1.2.3 From 15400663aba5de11e99a9a2a35bfb2bae65e28e0 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Thu, 4 Apr 2019 16:46:47 +0800 Subject: net: hns: Fix sparse: some warnings in HNS drivers There are some sparse warnings in the HNS drivers: warning: incorrect type in assignment (different address spaces) expected void [noderef] *io_base got void *vaddr warning: cast removes address space '' of expression [...] Add __iomem and change all the u8 __iomem to void __iomem to fix these kind of warnings. warning: incorrect type in argument 1 (different address spaces) expected void [noderef] *base got unsigned char [usertype] *base_addr warning: cast to restricted __le16 warning: incorrect type in assignment (different base types) expected unsigned int [usertype] tbl_tcam_data_high got restricted __le32 [usertype] warning: cast to restricted __le32 [...] These variables used u32/u16 as their type, and finally as a parameter of writel(), writel() will do the cpu_to_le32 coversion so remove the little endian covert code to fix these kind of warnings. Signed-off-by: Yonglong Liu Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hnae.h | 2 +- drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c | 2 +- drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h | 4 ++-- drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c | 20 ++++++-------------- drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h | 2 ++ drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c | 2 +- drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c | 6 +++--- drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h | 4 ++-- drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c | 4 ++-- drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h | 12 ++++++------ drivers/net/ethernet/hisilicon/hns_mdio.c | 18 +++++++----------- 11 files changed, 33 insertions(+), 43 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h index 08a750fb60c4..d6fb83437230 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.h +++ b/drivers/net/ethernet/hisilicon/hns/hnae.h @@ -357,7 +357,7 @@ struct hnae_buf_ops { }; struct hnae_queue { - void __iomem *io_base; + u8 __iomem *io_base; phys_addr_t phy_base; struct hnae_ae_dev *dev; /* the device who use this queue */ struct hnae_ring rx_ring ____cacheline_internodealigned_in_smp; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c index a97228c93831..6c0507921623 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c @@ -370,7 +370,7 @@ int hns_mac_clr_multicast(struct hns_mac_cb *mac_cb, int vfn) static void hns_mac_param_get(struct mac_params *param, struct hns_mac_cb *mac_cb) { - param->vaddr = (void *)mac_cb->vaddr; + param->vaddr = mac_cb->vaddr; param->mac_mode = hns_get_enet_interface(mac_cb); ether_addr_copy(param->addr, mac_cb->addr_entry_idx[0].addr); param->mac_id = mac_cb->mac_id; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h index fbc75341bef7..22589799f1a5 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h @@ -187,7 +187,7 @@ struct mac_statistics { /*mac para struct ,mac get param from nic or dsaf when initialize*/ struct mac_params { char addr[ETH_ALEN]; - void *vaddr; /*virtual address*/ + u8 __iomem *vaddr; /*virtual address*/ struct device *dev; u8 mac_id; /**< Ethernet operation mode (MAC-PHY interface and speed) */ @@ -402,7 +402,7 @@ struct mac_driver { enum mac_mode mac_mode; u8 mac_id; struct hns_mac_cb *mac_cb; - void __iomem *io_base; + u8 __iomem *io_base; unsigned int mac_en_flg;/*you'd better don't enable mac twice*/ unsigned int virt_dev_num; struct device *dev; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index f5ff07cb2b72..61eea6ac846f 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -1602,8 +1602,6 @@ static void hns_dsaf_set_mac_key( DSAF_TBL_TCAM_KEY_VLAN_S, vlan_id); dsaf_set_field(mac_key->low.bits.port_vlan, DSAF_TBL_TCAM_KEY_PORT_M, DSAF_TBL_TCAM_KEY_PORT_S, port); - - mac_key->low.bits.port_vlan = le16_to_cpu(mac_key->low.bits.port_vlan); } /** @@ -1663,8 +1661,8 @@ int hns_dsaf_set_mac_uc_entry( /* default config dvc to 0 */ mac_data.tbl_ucast_dvc = 0; mac_data.tbl_ucast_out_port = mac_entry->port_num; - tcam_data.tbl_tcam_data_high = cpu_to_le32(mac_key.high.val); - tcam_data.tbl_tcam_data_low = cpu_to_le32(mac_key.low.val); + tcam_data.tbl_tcam_data_high = mac_key.high.val; + tcam_data.tbl_tcam_data_low = mac_key.low.val; hns_dsaf_tcam_uc_cfg(dsaf_dev, entry_index, &tcam_data, &mac_data); @@ -1786,9 +1784,6 @@ int hns_dsaf_add_mac_mc_port(struct dsaf_device *dsaf_dev, 0xff, mc_mask); - mask_key.high.val = le32_to_cpu(mask_key.high.val); - mask_key.low.val = le32_to_cpu(mask_key.low.val); - pmask_key = (struct dsaf_tbl_tcam_data *)(&mask_key); } @@ -1840,8 +1835,8 @@ int hns_dsaf_add_mac_mc_port(struct dsaf_device *dsaf_dev, dsaf_dev->ae_dev.name, mac_key.high.val, mac_key.low.val, entry_index); - tcam_data.tbl_tcam_data_high = cpu_to_le32(mac_key.high.val); - tcam_data.tbl_tcam_data_low = cpu_to_le32(mac_key.low.val); + tcam_data.tbl_tcam_data_high = mac_key.high.val; + tcam_data.tbl_tcam_data_low = mac_key.low.val; /* config mc entry with mask */ hns_dsaf_tcam_mc_cfg(dsaf_dev, entry_index, &tcam_data, @@ -1956,9 +1951,6 @@ int hns_dsaf_del_mac_mc_port(struct dsaf_device *dsaf_dev, /* config key mask */ hns_dsaf_set_mac_key(dsaf_dev, &mask_key, 0x00, 0xff, mc_mask); - mask_key.high.val = le32_to_cpu(mask_key.high.val); - mask_key.low.val = le32_to_cpu(mask_key.low.val); - pmask_key = (struct dsaf_tbl_tcam_data *)(&mask_key); } @@ -2012,8 +2004,8 @@ int hns_dsaf_del_mac_mc_port(struct dsaf_device *dsaf_dev, soft_mac_entry += entry_index; soft_mac_entry->index = DSAF_INVALID_ENTRY_IDX; } else { /* not zero, just del port, update */ - tcam_data.tbl_tcam_data_high = cpu_to_le32(mac_key.high.val); - tcam_data.tbl_tcam_data_low = cpu_to_le32(mac_key.low.val); + tcam_data.tbl_tcam_data_high = mac_key.high.val; + tcam_data.tbl_tcam_data_low = mac_key.low.val; hns_dsaf_tcam_mc_cfg(dsaf_dev, entry_index, &tcam_data, diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h index 0e1cd99831a6..76cc8887e1a8 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h @@ -467,4 +467,6 @@ int hns_dsaf_clr_mac_mc_port(struct dsaf_device *dsaf_dev, u8 mac_id, u8 port_num); int hns_dsaf_wait_pkt_clean(struct dsaf_device *dsaf_dev, int port); +int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset); + #endif /* __HNS_DSAF_MAIN_H__ */ diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c index 16294cd3c954..19b94879691f 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c @@ -670,7 +670,7 @@ static int hns_mac_config_sds_loopback(struct hns_mac_cb *mac_cb, bool en) dsaf_set_field(origin, 1ull << 10, 10, en); dsaf_write_syscon(mac_cb->serdes_ctrl, reg_offset, origin); } else { - u8 *base_addr = (u8 *)mac_cb->serdes_vaddr + + u8 __iomem *base_addr = mac_cb->serdes_vaddr + (mac_cb->mac_id <= 3 ? 0x00280000 : 0x00200000); dsaf_set_reg_field(base_addr, reg_offset, 1ull << 10, 10, en); } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c index 3d07c8a7639d..17c019106e6e 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c @@ -61,7 +61,7 @@ void hns_ppe_set_indir_table(struct hns_ppe_cb *ppe_cb, } } -static void __iomem * +static u8 __iomem * hns_ppe_common_get_ioaddr(struct ppe_common_cb *ppe_common) { return ppe_common->dsaf_dev->ppe_base + PPE_COMMON_REG_OFFSET; @@ -111,8 +111,8 @@ hns_ppe_common_free_cfg(struct dsaf_device *dsaf_dev, u32 comm_index) dsaf_dev->ppe_common[comm_index] = NULL; } -static void __iomem *hns_ppe_get_iobase(struct ppe_common_cb *ppe_common, - int ppe_idx) +static u8 __iomem *hns_ppe_get_iobase(struct ppe_common_cb *ppe_common, + int ppe_idx) { return ppe_common->dsaf_dev->ppe_base + ppe_idx * PPE_REG_OFFSET; } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h index f670e63a5a01..110c6e8222c7 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h @@ -80,7 +80,7 @@ struct hns_ppe_cb { struct hns_ppe_hw_stats hw_stats; u8 index; /* index in a ppe common device */ - void __iomem *io_base; + u8 __iomem *io_base; int virq; u32 rss_indir_table[HNS_PPEV2_RSS_IND_TBL_SIZE]; /*shadow indir tab */ u32 rss_key[HNS_PPEV2_RSS_KEY_NUM]; /* rss hash key */ @@ -89,7 +89,7 @@ struct hns_ppe_cb { struct ppe_common_cb { struct device *dev; struct dsaf_device *dsaf_dev; - void __iomem *io_base; + u8 __iomem *io_base; enum ppe_common_mode ppe_mode; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c index 6bf346c11b25..ac3518ca4d7b 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c @@ -458,7 +458,7 @@ static void hns_rcb_ring_get_cfg(struct hnae_queue *q, int ring_type) mdnum_ppkt = HNS_RCB_RING_MAX_BD_PER_PKT; } else { ring = &q->tx_ring; - ring->io_base = (u8 __iomem *)ring_pair_cb->q.io_base + + ring->io_base = ring_pair_cb->q.io_base + HNS_RCB_TX_REG_OFFSET; irq_idx = HNS_RCB_IRQ_IDX_TX; mdnum_ppkt = is_ver1 ? HNS_RCB_RING_MAX_TXBD_PER_PKT : @@ -764,7 +764,7 @@ static int hns_rcb_get_ring_num(struct dsaf_device *dsaf_dev) } } -static void __iomem *hns_rcb_common_get_vaddr(struct rcb_common_cb *rcb_common) +static u8 __iomem *hns_rcb_common_get_vaddr(struct rcb_common_cb *rcb_common) { struct dsaf_device *dsaf_dev = rcb_common->dsaf_dev; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h index b9733b0b8482..b9e7f11f0896 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h @@ -1018,7 +1018,7 @@ #define XGMAC_PAUSE_CTL_RSP_MODE_B 2 #define XGMAC_PAUSE_CTL_TX_XOFF_B 3 -static inline void dsaf_write_reg(void __iomem *base, u32 reg, u32 value) +static inline void dsaf_write_reg(u8 __iomem *base, u32 reg, u32 value) { writel(value, base + reg); } @@ -1053,7 +1053,7 @@ static inline int dsaf_read_syscon(struct regmap *base, u32 reg, u32 *val) #define dsaf_set_bit(origin, shift, val) \ dsaf_set_field((origin), (1ull << (shift)), (shift), (val)) -static inline void dsaf_set_reg_field(void __iomem *base, u32 reg, u32 mask, +static inline void dsaf_set_reg_field(u8 __iomem *base, u32 reg, u32 mask, u32 shift, u32 val) { u32 origin = dsaf_read_reg(base, reg); @@ -1073,7 +1073,7 @@ static inline void dsaf_set_reg_field(void __iomem *base, u32 reg, u32 mask, #define dsaf_get_bit(origin, shift) \ dsaf_get_field((origin), (1ull << (shift)), (shift)) -static inline u32 dsaf_get_reg_field(void __iomem *base, u32 reg, u32 mask, +static inline u32 dsaf_get_reg_field(u8 __iomem *base, u32 reg, u32 mask, u32 shift) { u32 origin; @@ -1089,11 +1089,11 @@ static inline u32 dsaf_get_reg_field(void __iomem *base, u32 reg, u32 mask, dsaf_get_reg_field((dev)->io_base, (reg), (1ull << (bit)), (bit)) #define dsaf_write_b(addr, data)\ - writeb((data), (__iomem unsigned char *)(addr)) + writeb((data), (__iomem u8 *)(addr)) #define dsaf_read_b(addr)\ - readb((__iomem unsigned char *)(addr)) + readb((__iomem u8 *)(addr)) #define hns_mac_reg_read64(drv, offset) \ - readq((__iomem void *)(((u8 *)(drv)->io_base + 0xc00 + (offset)))) + readq((__iomem void *)(((drv)->io_base + 0xc00 + (offset)))) #endif /* _DSAF_REG_H */ diff --git a/drivers/net/ethernet/hisilicon/hns_mdio.c b/drivers/net/ethernet/hisilicon/hns_mdio.c index baf5cc251f32..8b8a7d00e8e0 100644 --- a/drivers/net/ethernet/hisilicon/hns_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns_mdio.c @@ -39,7 +39,7 @@ struct hns_mdio_sc_reg { }; struct hns_mdio_device { - void *vbase; /* mdio reg base address */ + u8 __iomem *vbase; /* mdio reg base address */ struct regmap *subctrl_vbase; struct hns_mdio_sc_reg sc_reg; }; @@ -96,21 +96,17 @@ enum mdio_c45_op_seq { #define MDIO_SC_CLK_ST 0x531C #define MDIO_SC_RESET_ST 0x5A1C -static void mdio_write_reg(void *base, u32 reg, u32 value) +static void mdio_write_reg(u8 __iomem *base, u32 reg, u32 value) { - u8 __iomem *reg_addr = (u8 __iomem *)base; - - writel_relaxed(value, reg_addr + reg); + writel_relaxed(value, base + reg); } #define MDIO_WRITE_REG(a, reg, value) \ mdio_write_reg((a)->vbase, (reg), (value)) -static u32 mdio_read_reg(void *base, u32 reg) +static u32 mdio_read_reg(u8 __iomem *base, u32 reg) { - u8 __iomem *reg_addr = (u8 __iomem *)base; - - return readl_relaxed(reg_addr + reg); + return readl_relaxed(base + reg); } #define mdio_set_field(origin, mask, shift, val) \ @@ -121,7 +117,7 @@ static u32 mdio_read_reg(void *base, u32 reg) #define mdio_get_field(origin, mask, shift) (((origin) >> (shift)) & (mask)) -static void mdio_set_reg_field(void *base, u32 reg, u32 mask, u32 shift, +static void mdio_set_reg_field(u8 __iomem *base, u32 reg, u32 mask, u32 shift, u32 val) { u32 origin = mdio_read_reg(base, reg); @@ -133,7 +129,7 @@ static void mdio_set_reg_field(void *base, u32 reg, u32 mask, u32 shift, #define MDIO_SET_REG_FIELD(dev, reg, mask, shift, val) \ mdio_set_reg_field((dev)->vbase, (reg), (mask), (shift), (val)) -static u32 mdio_get_reg_field(void *base, u32 reg, u32 mask, u32 shift) +static u32 mdio_get_reg_field(u8 __iomem *base, u32 reg, u32 mask, u32 shift) { u32 origin; -- cgit v1.2.3 From 2ec1ed2aa68782b342458681aa4d16b65c9014d6 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 4 Apr 2019 12:16:27 +0200 Subject: net: thunderx: fix NULL pointer dereference in nicvf_open/nicvf_stop When a bpf program is uploaded, the driver computes the number of xdp tx queues resulting in the allocation of additional qsets. Starting from commit '2ecbe4f4a027 ("net: thunderx: replace global nicvf_rx_mode_wq work queue for all VFs to private for each of them")' the driver runs link state polling for each VF resulting in the following NULL pointer dereference: [ 56.169256] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000020 [ 56.178032] Mem abort info: [ 56.180834] ESR = 0x96000005 [ 56.183877] Exception class = DABT (current EL), IL = 32 bits [ 56.189792] SET = 0, FnV = 0 [ 56.192834] EA = 0, S1PTW = 0 [ 56.195963] Data abort info: [ 56.198831] ISV = 0, ISS = 0x00000005 [ 56.202662] CM = 0, WnR = 0 [ 56.205619] user pgtable: 64k pages, 48-bit VAs, pgdp = 0000000021f0c7a0 [ 56.212315] [0000000000000020] pgd=0000000000000000, pud=0000000000000000 [ 56.219094] Internal error: Oops: 96000005 [#1] SMP [ 56.260459] CPU: 39 PID: 2034 Comm: ip Not tainted 5.1.0-rc3+ #3 [ 56.266452] Hardware name: GIGABYTE R120-T33/MT30-GS1, BIOS T49 02/02/2018 [ 56.273315] pstate: 80000005 (Nzcv daif -PAN -UAO) [ 56.278098] pc : __ll_sc___cmpxchg_case_acq_64+0x4/0x20 [ 56.283312] lr : mutex_lock+0x2c/0x50 [ 56.286962] sp : ffff0000219af1b0 [ 56.290264] x29: ffff0000219af1b0 x28: ffff800f64de49a0 [ 56.295565] x27: 0000000000000000 x26: 0000000000000015 [ 56.300865] x25: 0000000000000000 x24: 0000000000000000 [ 56.306165] x23: 0000000000000000 x22: ffff000011117000 [ 56.311465] x21: ffff800f64dfc080 x20: 0000000000000020 [ 56.316766] x19: 0000000000000020 x18: 0000000000000001 [ 56.322066] x17: 0000000000000000 x16: ffff800f2e077080 [ 56.327367] x15: 0000000000000004 x14: 0000000000000000 [ 56.332667] x13: ffff000010964438 x12: 0000000000000002 [ 56.337967] x11: 0000000000000000 x10: 0000000000000c70 [ 56.343268] x9 : ffff0000219af120 x8 : ffff800f2e077d50 [ 56.348568] x7 : 0000000000000027 x6 : 000000062a9d6a84 [ 56.353869] x5 : 0000000000000000 x4 : ffff800f2e077480 [ 56.359169] x3 : 0000000000000008 x2 : ffff800f2e077080 [ 56.364469] x1 : 0000000000000000 x0 : 0000000000000020 [ 56.369770] Process ip (pid: 2034, stack limit = 0x00000000c862da3a) [ 56.376110] Call trace: [ 56.378546] __ll_sc___cmpxchg_case_acq_64+0x4/0x20 [ 56.383414] drain_workqueue+0x34/0x198 [ 56.387247] nicvf_open+0x48/0x9e8 [nicvf] [ 56.391334] nicvf_open+0x898/0x9e8 [nicvf] [ 56.395507] nicvf_xdp+0x1bc/0x238 [nicvf] [ 56.399595] dev_xdp_install+0x68/0x90 [ 56.403333] dev_change_xdp_fd+0xc8/0x240 [ 56.407333] do_setlink+0x8e0/0xbe8 [ 56.410810] __rtnl_newlink+0x5b8/0x6d8 [ 56.414634] rtnl_newlink+0x54/0x80 [ 56.418112] rtnetlink_rcv_msg+0x22c/0x2f8 [ 56.422199] netlink_rcv_skb+0x60/0x120 [ 56.426023] rtnetlink_rcv+0x28/0x38 [ 56.429587] netlink_unicast+0x1c8/0x258 [ 56.433498] netlink_sendmsg+0x1b4/0x350 [ 56.437410] sock_sendmsg+0x4c/0x68 [ 56.440887] ___sys_sendmsg+0x240/0x280 [ 56.444711] __sys_sendmsg+0x68/0xb0 [ 56.448275] __arm64_sys_sendmsg+0x2c/0x38 [ 56.452361] el0_svc_handler+0x9c/0x128 [ 56.456186] el0_svc+0x8/0xc [ 56.459056] Code: 35ffff91 2a1003e0 d65f03c0 f9800011 (c85ffc10) [ 56.465166] ---[ end trace 4a57fdc27b0a572c ]--- [ 56.469772] Kernel panic - not syncing: Fatal exception Fix it by checking nicvf_rx_mode_wq pointer in nicvf_open and nicvf_stop Fixes: 2ecbe4f4a027 ("net: thunderx: replace global nicvf_rx_mode_wq work queue for all VFs to private for each of them") Fixes: 2c632ad8bc74 ("net: thunderx: move link state polling function to VF") Reported-by: Matteo Croce Signed-off-by: Lorenzo Bianconi Tested-by: Matteo Croce Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index aa2be4807191..28eac9056211 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -1328,10 +1328,11 @@ int nicvf_stop(struct net_device *netdev) struct nicvf_cq_poll *cq_poll = NULL; union nic_mbx mbx = {}; - cancel_delayed_work_sync(&nic->link_change_work); - /* wait till all queued set_rx_mode tasks completes */ - drain_workqueue(nic->nicvf_rx_mode_wq); + if (nic->nicvf_rx_mode_wq) { + cancel_delayed_work_sync(&nic->link_change_work); + drain_workqueue(nic->nicvf_rx_mode_wq); + } mbx.msg.msg = NIC_MBOX_MSG_SHUTDOWN; nicvf_send_msg_to_pf(nic, &mbx); @@ -1452,7 +1453,8 @@ int nicvf_open(struct net_device *netdev) struct nicvf_cq_poll *cq_poll = NULL; /* wait till all queued set_rx_mode tasks completes if any */ - drain_workqueue(nic->nicvf_rx_mode_wq); + if (nic->nicvf_rx_mode_wq) + drain_workqueue(nic->nicvf_rx_mode_wq); netif_carrier_off(netdev); @@ -1550,10 +1552,12 @@ int nicvf_open(struct net_device *netdev) /* Send VF config done msg to PF */ nicvf_send_cfg_done(nic); - INIT_DELAYED_WORK(&nic->link_change_work, - nicvf_link_status_check_task); - queue_delayed_work(nic->nicvf_rx_mode_wq, - &nic->link_change_work, 0); + if (nic->nicvf_rx_mode_wq) { + INIT_DELAYED_WORK(&nic->link_change_work, + nicvf_link_status_check_task); + queue_delayed_work(nic->nicvf_rx_mode_wq, + &nic->link_change_work, 0); + } return 0; cleanup: -- cgit v1.2.3 From fae2708174ae95d98d19f194e03d6e8f688ae195 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 4 Apr 2019 12:31:35 +0200 Subject: net/sched: act_sample: fix divide by zero in the traffic path the control path of 'sample' action does not validate the value of 'rate' provided by the user, but then it uses it as divisor in the traffic path. Validate it in tcf_sample_init(), and return -EINVAL with a proper extack message in case that value is zero, to fix a splat with the script below: # tc f a dev test0 egress matchall action sample rate 0 group 1 index 2 # tc -s a s action sample total acts 1 action order 0: sample rate 1/0 group 1 pipe index 2 ref 1 bind 1 installed 19 sec used 19 sec Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 # ping 192.0.2.1 -I test0 -c1 -q divide error: 0000 [#1] SMP PTI CPU: 1 PID: 6192 Comm: ping Not tainted 5.1.0-rc2.diag2+ #591 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:tcf_sample_act+0x9e/0x1e0 [act_sample] Code: 6a f1 85 c0 74 0d 80 3d 83 1a 00 00 00 0f 84 9c 00 00 00 4d 85 e4 0f 84 85 00 00 00 e8 9b d7 9c f1 44 8b 8b e0 00 00 00 31 d2 <41> f7 f1 85 d2 75 70 f6 85 83 00 00 00 10 48 8b 45 10 8b 88 08 01 RSP: 0018:ffffae320190ba30 EFLAGS: 00010246 RAX: 00000000b0677d21 RBX: ffff8af1ed9ec000 RCX: 0000000059a9fe49 RDX: 0000000000000000 RSI: 000000000c7e33b7 RDI: ffff8af23daa0af0 RBP: ffff8af1ee11b200 R08: 0000000074fcaf7e R09: 0000000000000000 R10: 0000000000000050 R11: ffffffffb3088680 R12: ffff8af232307f80 R13: 0000000000000003 R14: ffff8af1ed9ec000 R15: 0000000000000000 FS: 00007fe9c6d2f740(0000) GS:ffff8af23da80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fff6772f000 CR3: 00000000746a2004 CR4: 00000000001606e0 Call Trace: tcf_action_exec+0x7c/0x1c0 tcf_classify+0x57/0x160 __dev_queue_xmit+0x3dc/0xd10 ip_finish_output2+0x257/0x6d0 ip_output+0x75/0x280 ip_send_skb+0x15/0x40 raw_sendmsg+0xae3/0x1410 sock_sendmsg+0x36/0x40 __sys_sendto+0x10e/0x140 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x60/0x210 entry_SYSCALL_64_after_hwframe+0x49/0xbe [...] Kernel panic - not syncing: Fatal exception in interrupt Add a TDC selftest to document that 'rate' is now being validated. Reported-by: Matteo Croce Fixes: 5c5670fae430 ("net/sched: Introduce sample tc action") Signed-off-by: Davide Caratti Acked-by: Yotam Gigi Signed-off-by: David S. Miller --- net/sched/act_sample.c | 10 +++++++-- .../tc-testing/tc-tests/actions/sample.json | 24 ++++++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 4060b0955c97..0f82d50ea232 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -45,8 +45,8 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, struct nlattr *tb[TCA_SAMPLE_MAX + 1]; struct psample_group *psample_group; struct tcf_chain *goto_ch = NULL; + u32 psample_group_num, rate; struct tc_sample *parm; - u32 psample_group_num; struct tcf_sample *s; bool exists = false; int ret, err; @@ -85,6 +85,12 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, if (err < 0) goto release_idr; + rate = nla_get_u32(tb[TCA_SAMPLE_RATE]); + if (!rate) { + NL_SET_ERR_MSG(extack, "invalid sample rate"); + err = -EINVAL; + goto put_chain; + } psample_group_num = nla_get_u32(tb[TCA_SAMPLE_PSAMPLE_GROUP]); psample_group = psample_group_get(net, psample_group_num); if (!psample_group) { @@ -96,7 +102,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, spin_lock_bh(&s->tcf_lock); goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); - s->rate = nla_get_u32(tb[TCA_SAMPLE_RATE]); + s->rate = rate; s->psample_group_num = psample_group_num; RCU_INIT_POINTER(s->psample_group, psample_group); diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json index 27f0acaed880..ddabb160a11b 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json @@ -143,6 +143,30 @@ "$TC actions flush action sample" ] }, + { + "id": "7571", + "name": "Add sample action with invalid rate", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 0 group 1 index 2", + "expExitCode": "255", + "verifyCmd": "$TC actions get action sample index 2", + "matchPattern": "action order [0-9]+: sample rate 1/0 group 1.*index 2 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action sample" + ] + }, { "id": "b6d4", "name": "Add sample action with mandatory arguments and invalid control action", -- cgit v1.2.3 From aecfde23108b8e637d9f5c5e523b24fb97035dc3 Mon Sep 17 00:00:00 2001 From: Koen De Schepper Date: Thu, 4 Apr 2019 12:24:02 +0000 Subject: tcp: Ensure DCTCP reacts to losses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RFC8257 §3.5 explicitly states that "A DCTCP sender MUST react to loss episodes in the same way as conventional TCP". Currently, Linux DCTCP performs no cwnd reduction when losses are encountered. Optionally, the dctcp_clamp_alpha_on_loss resets alpha to its maximal value if a RTO happens. This behavior is sub-optimal for at least two reasons: i) it ignores losses triggering fast retransmissions; and ii) it causes unnecessary large cwnd reduction in the future if the loss was isolated as it resets the historical term of DCTCP's alpha EWMA to its maximal value (i.e., denoting a total congestion). The second reason has an especially noticeable effect when using DCTCP in high BDP environments, where alpha normally stays at low values. This patch replace the clamping of alpha by setting ssthresh to half of cwnd for both fast retransmissions and RTOs, at most once per RTT. Consequently, the dctcp_clamp_alpha_on_loss module parameter has been removed. The table below shows experimental results where we measured the drop probability of a PIE AQM (not applying ECN marks) at a bottleneck in the presence of a single TCP flow with either the alpha-clamping option enabled or the cwnd halving proposed by this patch. Results using reno or cubic are given for comparison. | Link | RTT | Drop TCP CC | speed | base+AQM | probability ==================|=========|==========|============ CUBIC | 40Mbps | 7+20ms | 0.21% RENO | | | 0.19% DCTCP-CLAMP-ALPHA | | | 25.80% DCTCP-HALVE-CWND | | | 0.22% ------------------|---------|----------|------------ CUBIC | 100Mbps | 7+20ms | 0.03% RENO | | | 0.02% DCTCP-CLAMP-ALPHA | | | 23.30% DCTCP-HALVE-CWND | | | 0.04% ------------------|---------|----------|------------ CUBIC | 800Mbps | 1+1ms | 0.04% RENO | | | 0.05% DCTCP-CLAMP-ALPHA | | | 18.70% DCTCP-HALVE-CWND | | | 0.06% We see that, without halving its cwnd for all source of losses, DCTCP drives the AQM to large drop probabilities in order to keep the queue length under control (i.e., it repeatedly faces RTOs). Instead, if DCTCP reacts to all source of losses, it can then be controlled by the AQM using similar drop levels than cubic or reno. Signed-off-by: Koen De Schepper Signed-off-by: Olivier Tilmans Cc: Bob Briscoe Cc: Lawrence Brakmo Cc: Florian Westphal Cc: Daniel Borkmann Cc: Yuchung Cheng Cc: Neal Cardwell Cc: Eric Dumazet Cc: Andrew Shewmaker Cc: Glenn Judd Acked-by: Florian Westphal Acked-by: Neal Cardwell Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/ipv4/tcp_dctcp.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index cd4814f7e962..359da68d7c06 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -67,11 +67,6 @@ static unsigned int dctcp_alpha_on_init __read_mostly = DCTCP_MAX_ALPHA; module_param(dctcp_alpha_on_init, uint, 0644); MODULE_PARM_DESC(dctcp_alpha_on_init, "parameter for initial alpha value"); -static unsigned int dctcp_clamp_alpha_on_loss __read_mostly; -module_param(dctcp_clamp_alpha_on_loss, uint, 0644); -MODULE_PARM_DESC(dctcp_clamp_alpha_on_loss, - "parameter for clamping alpha on loss"); - static struct tcp_congestion_ops dctcp_reno; static void dctcp_reset(const struct tcp_sock *tp, struct dctcp *ca) @@ -164,21 +159,23 @@ static void dctcp_update_alpha(struct sock *sk, u32 flags) } } -static void dctcp_state(struct sock *sk, u8 new_state) +static void dctcp_react_to_loss(struct sock *sk) { - if (dctcp_clamp_alpha_on_loss && new_state == TCP_CA_Loss) { - struct dctcp *ca = inet_csk_ca(sk); + struct dctcp *ca = inet_csk_ca(sk); + struct tcp_sock *tp = tcp_sk(sk); - /* If this extension is enabled, we clamp dctcp_alpha to - * max on packet loss; the motivation is that dctcp_alpha - * is an indicator to the extend of congestion and packet - * loss is an indicator of extreme congestion; setting - * this in practice turned out to be beneficial, and - * effectively assumes total congestion which reduces the - * window by half. - */ - ca->dctcp_alpha = DCTCP_MAX_ALPHA; - } + ca->loss_cwnd = tp->snd_cwnd; + tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U); +} + +static void dctcp_state(struct sock *sk, u8 new_state) +{ + if (new_state == TCP_CA_Recovery && + new_state != inet_csk(sk)->icsk_ca_state) + dctcp_react_to_loss(sk); + /* We handle RTO in dctcp_cwnd_event to ensure that we perform only + * one loss-adjustment per RTT. + */ } static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) @@ -190,6 +187,9 @@ static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) case CA_EVENT_ECN_NO_CE: dctcp_ece_ack_update(sk, ev, &ca->prior_rcv_nxt, &ca->ce_state); break; + case CA_EVENT_LOSS: + dctcp_react_to_loss(sk); + break; default: /* Don't care for the rest. */ break; -- cgit v1.2.3 From b2100cc56fca8c51d28aa42a9f1fbcb2cf351996 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Thu, 4 Apr 2019 15:01:33 +0200 Subject: sch_cake: Use tc_skb_protocol() helper for getting packet protocol MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We shouldn't be using skb->protocol directly as that will miss cases with hardware-accelerated VLAN tags. Use the helper instead to get the right protocol number. Reported-by: Kevin Darbyshire-Bryant Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- net/sched/sch_cake.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index acc9b9da985f..a3b55e18df04 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1519,7 +1519,7 @@ static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash) { u8 dscp; - switch (skb->protocol) { + switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2; if (wash && dscp) -- cgit v1.2.3 From c87b4ecdbe8db27867a7b7f840291cd843406bd7 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Thu, 4 Apr 2019 15:01:33 +0200 Subject: sch_cake: Make sure we can write the IP header before changing DSCP bits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is not actually any guarantee that the IP headers are valid before we access the DSCP bits of the packets. Fix this using the same approach taken in sch_dsmark. Reported-by: Kevin Darbyshire-Bryant Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- net/sched/sch_cake.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index a3b55e18df04..259d97bc2abd 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1517,16 +1517,27 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free) static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash) { + int wlen = skb_network_offset(skb); u8 dscp; switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): + wlen += sizeof(struct iphdr); + if (!pskb_may_pull(skb, wlen) || + skb_try_make_writable(skb, wlen)) + return 0; + dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2; if (wash && dscp) ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0); return dscp; case htons(ETH_P_IPV6): + wlen += sizeof(struct ipv6hdr); + if (!pskb_may_pull(skb, wlen) || + skb_try_make_writable(skb, wlen)) + return 0; + dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2; if (wash && dscp) ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0); -- cgit v1.2.3 From 6e3572e83dc3563e3b7e742bcb225b42a60cdaeb Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Thu, 4 Apr 2019 10:25:28 +0200 Subject: MIPS: generic: Add switchdev, pinctrl and fit to ocelot_defconfig Some of the configuration were not selected by default anymore, therefore enable them again. Also remove some configs which are used for MSCC Ocelot. Signed-off-by: Horatiu Vultur Signed-off-by: Paul Burton Cc: Cc: Cc: Cc: Cc: Cc: --- arch/mips/configs/generic/board-ocelot.config | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/mips/configs/generic/board-ocelot.config b/arch/mips/configs/generic/board-ocelot.config index f607888d2483..184eb65a6ba7 100644 --- a/arch/mips/configs/generic/board-ocelot.config +++ b/arch/mips/configs/generic/board-ocelot.config @@ -1,6 +1,10 @@ # require CONFIG_CPU_MIPS32_R2=y CONFIG_LEGACY_BOARD_OCELOT=y +CONFIG_FIT_IMAGE_FDT_OCELOT=y + +CONFIG_BRIDGE=y +CONFIG_GENERIC_PHY=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y @@ -19,6 +23,8 @@ CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_OF_PLATFORM=y CONFIG_NETDEVICES=y +CONFIG_NET_SWITCHDEV=y +CONFIG_NET_DSA=y CONFIG_MSCC_OCELOT_SWITCH=y CONFIG_MSCC_OCELOT_SWITCH_OCELOT=y CONFIG_MDIO_MSCC_MIIM=y @@ -35,6 +41,8 @@ CONFIG_SPI_DESIGNWARE=y CONFIG_SPI_DW_MMIO=y CONFIG_SPI_SPIDEV=y +CONFIG_PINCTRL_OCELOT=y + CONFIG_GPIO_SYSFS=y CONFIG_POWER_RESET=y -- cgit v1.2.3 From ada770b1e74a77fff2d5f539bf6c42c25f4784db Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Thu, 4 Apr 2019 11:08:40 -0700 Subject: xtensa: fix return_address return_address returns the address that is one level higher in the call stack than requested in its argument, because level 0 corresponds to its caller's return address. Use requested level as the number of stack frames to skip. This fixes the address reported by might_sleep and friends. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov --- arch/xtensa/kernel/stacktrace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/xtensa/kernel/stacktrace.c b/arch/xtensa/kernel/stacktrace.c index 174c11f13bba..b9f82510c650 100644 --- a/arch/xtensa/kernel/stacktrace.c +++ b/arch/xtensa/kernel/stacktrace.c @@ -253,10 +253,14 @@ static int return_address_cb(struct stackframe *frame, void *data) return 1; } +/* + * level == 0 is for the return address from the caller of this function, + * not from this function itself. + */ unsigned long return_address(unsigned level) { struct return_addr_data r = { - .skip = level + 1, + .skip = level, }; walk_stackframe(stack_pointer(NULL), return_address_cb, &r); return r.addr; -- cgit v1.2.3 From bcb44433bba5eaff293888ef22ffa07f1f0347d6 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 3 Apr 2019 12:23:11 -0400 Subject: dm: disable DISCARD if the underlying storage no longer supports it Storage devices which report supporting discard commands like WRITE_SAME_16 with unmap, but reject discard commands sent to the storage device. This is a clear storage firmware bug but it doesn't change the fact that should a program cause discards to be sent to a multipath device layered on this buggy storage, all paths can end up failed at the same time from the discards, causing possible I/O loss. The first discard to a path will fail with Illegal Request, Invalid field in cdb, e.g.: kernel: sd 8:0:8:19: [sdfn] tag#0 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE kernel: sd 8:0:8:19: [sdfn] tag#0 Sense Key : Illegal Request [current] kernel: sd 8:0:8:19: [sdfn] tag#0 Add. Sense: Invalid field in cdb kernel: sd 8:0:8:19: [sdfn] tag#0 CDB: Write same(16) 93 08 00 00 00 00 00 a0 08 00 00 00 80 00 00 00 kernel: blk_update_request: critical target error, dev sdfn, sector 10487808 The SCSI layer converts this to the BLK_STS_TARGET error number, the sd device disables its support for discard on this path, and because of the BLK_STS_TARGET error multipath fails the discard without failing any path or retrying down a different path. But subsequent discards can cause path failures. Any discards sent to the path which already failed a discard ends up failing with EIO from blk_cloned_rq_check_limits with an "over max size limit" error since the discard limit was set to 0 by the sd driver for the path. As the error is EIO, this now fails the path and multipath tries to send the discard down the next path. This cycle continues as discards are sent until all paths fail. Fix this by training DM core to disable DISCARD if the underlying storage already did so. Also, fix branching in dm_done() and clone_endio() to reflect the mutually exclussive nature of the IO operations in question. Cc: stable@vger.kernel.org Reported-by: David Jeffery Signed-off-by: Mike Snitzer --- drivers/md/dm-core.h | 1 + drivers/md/dm-rq.c | 11 +++++++---- drivers/md/dm.c | 20 ++++++++++++++++---- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 95c6d86ab5e8..c4ef1fceead6 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -115,6 +115,7 @@ struct mapped_device { struct srcu_struct io_barrier; }; +void disable_discard(struct mapped_device *md); void disable_write_same(struct mapped_device *md); void disable_write_zeroes(struct mapped_device *md); diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 09773636602d..b66745bd08bb 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -222,11 +222,14 @@ static void dm_done(struct request *clone, blk_status_t error, bool mapped) } if (unlikely(error == BLK_STS_TARGET)) { - if (req_op(clone) == REQ_OP_WRITE_SAME && - !clone->q->limits.max_write_same_sectors) + if (req_op(clone) == REQ_OP_DISCARD && + !clone->q->limits.max_discard_sectors) + disable_discard(tio->md); + else if (req_op(clone) == REQ_OP_WRITE_SAME && + !clone->q->limits.max_write_same_sectors) disable_write_same(tio->md); - if (req_op(clone) == REQ_OP_WRITE_ZEROES && - !clone->q->limits.max_write_zeroes_sectors) + else if (req_op(clone) == REQ_OP_WRITE_ZEROES && + !clone->q->limits.max_write_zeroes_sectors) disable_write_zeroes(tio->md); } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 89b04169658d..043f0761e4a0 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -945,6 +945,15 @@ static void dec_pending(struct dm_io *io, blk_status_t error) } } +void disable_discard(struct mapped_device *md) +{ + struct queue_limits *limits = dm_get_queue_limits(md); + + /* device doesn't really support DISCARD, disable it */ + limits->max_discard_sectors = 0; + blk_queue_flag_clear(QUEUE_FLAG_DISCARD, md->queue); +} + void disable_write_same(struct mapped_device *md) { struct queue_limits *limits = dm_get_queue_limits(md); @@ -970,11 +979,14 @@ static void clone_endio(struct bio *bio) dm_endio_fn endio = tio->ti->type->end_io; if (unlikely(error == BLK_STS_TARGET) && md->type != DM_TYPE_NVME_BIO_BASED) { - if (bio_op(bio) == REQ_OP_WRITE_SAME && - !bio->bi_disk->queue->limits.max_write_same_sectors) + if (bio_op(bio) == REQ_OP_DISCARD && + !bio->bi_disk->queue->limits.max_discard_sectors) + disable_discard(md); + else if (bio_op(bio) == REQ_OP_WRITE_SAME && + !bio->bi_disk->queue->limits.max_write_same_sectors) disable_write_same(md); - if (bio_op(bio) == REQ_OP_WRITE_ZEROES && - !bio->bi_disk->queue->limits.max_write_zeroes_sectors) + else if (bio_op(bio) == REQ_OP_WRITE_ZEROES && + !bio->bi_disk->queue->limits.max_write_zeroes_sectors) disable_write_zeroes(md); } -- cgit v1.2.3 From d9b8a67b3b95a5c5aae6422b8113adc1c2485f2b Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Sun, 3 Mar 2019 15:04:18 +0800 Subject: mtd: cfi: fix deadloop in cfi_cmdset_0002.c do_write_buffer In function do_write_buffer(), in the for loop, there is a case chip_ready() returns 1 while chip_good() returns 0, so it never break the loop. To fix this, chip_good() is enough and it should timeout if it stay bad for a while. Fixes: dfeae1073583("mtd: cfi_cmdset_0002: Change write buffer to check correct value") Signed-off-by: Yi Huaijie Signed-off-by: Liu Jian Reviewed-by: Tokunori Ikegami Signed-off-by: Richard Weinberger --- drivers/mtd/chips/cfi_cmdset_0002.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c index 72428b6bfc47..7b7286b4d81e 100644 --- a/drivers/mtd/chips/cfi_cmdset_0002.c +++ b/drivers/mtd/chips/cfi_cmdset_0002.c @@ -1876,7 +1876,11 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip, continue; } - if (time_after(jiffies, timeo) && !chip_ready(map, adr)) + /* + * We check "time_after" and "!chip_good" before checking "chip_good" to avoid + * the failure due to scheduling. + */ + if (time_after(jiffies, timeo) && !chip_good(map, adr, datum)) break; if (chip_good(map, adr, datum)) { -- cgit v1.2.3 From 6af1c849dfb1f1d326fbdd157c9bc882b921f450 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 4 Apr 2019 08:44:05 +0000 Subject: aio: use kmem_cache_free() instead of kfree() memory allocated by kmem_cache_alloc() should be freed using kmem_cache_free(), not kfree(). Fixes: fa0ca2aee3be ("deal with get_reqs_available() in aio_get_req() itself") Signed-off-by: Wei Yongjun Signed-off-by: Al Viro --- fs/aio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/aio.c b/fs/aio.c index 7ccecaab487a..3490d1fa0e16 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1034,7 +1034,7 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx) return NULL; if (unlikely(!get_reqs_available(ctx))) { - kfree(req); + kmem_cache_free(kiocb_cachep, req); return NULL; } -- cgit v1.2.3 From 0a89eb92d8c335da92bd5f54d8463b87dd440d45 Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Tue, 2 Apr 2019 15:06:12 -0700 Subject: vlan: conditional inclusion of FCoE hooks to match netdevice.h and bnx2x Way back in 3c9c36bcedd426f2be2826da43e5163de61735f7 the ndo_fcoe_get_wwn pointer was switched from depending on CONFIG_FCOE to CONFIG_LIBFCOE in order to allow building FCoE support into the bnx2x driver and used by bnx2fc without including the generic software fcoe module. But, FCoE is generally used over an 802.1q VLAN, and the implementation of ndo_fcoe_get_wwn in the 8021q module was not similarly changed. The result is that if CONFIG_FCOE is disabled, then bnz2fc cannot make a call to ndo_fcoe_get_wwn through the 8021q interface to the underlying bnx2x interface. The bnx2fc driver then falls back to a potentially different mapping of Ethernet MAC to Fibre Channel WWN, creating an incompatibility with the fabric and target configurations when compared to the WWNs used by pre-boot firmware and differently-configured kernels. So make the conditional inclusion of FCoE code in 8021q match the conditional inclusion in netdevice.h Signed-off-by: Chris Leech Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 15293c2a5dd8..8d77b6ee4477 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -443,27 +443,29 @@ static int vlan_dev_fcoe_disable(struct net_device *dev) return rc; } -static int vlan_dev_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type) +static int vlan_dev_fcoe_ddp_target(struct net_device *dev, u16 xid, + struct scatterlist *sgl, unsigned int sgc) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; const struct net_device_ops *ops = real_dev->netdev_ops; - int rc = -EINVAL; + int rc = 0; + + if (ops->ndo_fcoe_ddp_target) + rc = ops->ndo_fcoe_ddp_target(real_dev, xid, sgl, sgc); - if (ops->ndo_fcoe_get_wwn) - rc = ops->ndo_fcoe_get_wwn(real_dev, wwn, type); return rc; } +#endif -static int vlan_dev_fcoe_ddp_target(struct net_device *dev, u16 xid, - struct scatterlist *sgl, unsigned int sgc) +#ifdef NETDEV_FCOE_WWNN +static int vlan_dev_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; const struct net_device_ops *ops = real_dev->netdev_ops; - int rc = 0; - - if (ops->ndo_fcoe_ddp_target) - rc = ops->ndo_fcoe_ddp_target(real_dev, xid, sgl, sgc); + int rc = -EINVAL; + if (ops->ndo_fcoe_get_wwn) + rc = ops->ndo_fcoe_get_wwn(real_dev, wwn, type); return rc; } #endif @@ -794,9 +796,11 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done, .ndo_fcoe_enable = vlan_dev_fcoe_enable, .ndo_fcoe_disable = vlan_dev_fcoe_disable, - .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn, .ndo_fcoe_ddp_target = vlan_dev_fcoe_ddp_target, #endif +#ifdef NETDEV_FCOE_WWNN + .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn, +#endif #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = vlan_dev_poll_controller, .ndo_netpoll_setup = vlan_dev_netpoll_setup, -- cgit v1.2.3 From cc5a726c79158bd307150e8d4176ec79b52001ea Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Wed, 3 Apr 2019 17:30:14 +0530 Subject: libcxgb: fix incorrect ppmax calculation BITS_TO_LONGS() uses DIV_ROUND_UP() because of this ppmax value can be greater than available per cpu page pods. This patch removes BITS_TO_LONGS() to fix this issue. Signed-off-by: Varun Prakash Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c index 74849be5f004..e2919005ead3 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c @@ -354,7 +354,10 @@ static struct cxgbi_ppm_pool *ppm_alloc_cpu_pool(unsigned int *total, ppmax = max; /* pool size must be multiple of unsigned long */ - bmap = BITS_TO_LONGS(ppmax); + bmap = ppmax / BITS_PER_TYPE(unsigned long); + if (!bmap) + return NULL; + ppmax = (bmap * sizeof(unsigned long)) << 3; alloc_sz = sizeof(*pools) + sizeof(unsigned long) * bmap; @@ -402,6 +405,10 @@ int cxgbi_ppm_init(void **ppm_pp, struct net_device *ndev, if (reserve_factor) { ppmax_pool = ppmax / reserve_factor; pool = ppm_alloc_cpu_pool(&ppmax_pool, &pool_index_max); + if (!pool) { + ppmax_pool = 0; + reserve_factor = 0; + } pr_debug("%s: ppmax %u, cpu total %u, per cpu %u.\n", ndev->name, ppmax, ppmax_pool, pool_index_max); -- cgit v1.2.3 From 1515a63fc413f160d20574ab0894e7f1020c7be2 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 3 Apr 2019 23:27:24 +0300 Subject: net: bridge: always clear mcast matching struct on reports and leaves We need to be careful and always zero the whole br_ip struct when it is used for matching since the rhashtable change. This patch fixes all the places which didn't properly clear it which in turn might've caused mismatches. Thanks for the great bug report with reproducing steps and bisection. Steps to reproduce (from the bug report): ip link add br0 type bridge mcast_querier 1 ip link set br0 up ip link add v2 type veth peer name v3 ip link set v2 master br0 ip link set v2 up ip link set v3 up ip addr add 3.0.0.2/24 dev v3 ip netns add test ip link add v1 type veth peer name v1 netns test ip link set v1 master br0 ip link set v1 up ip -n test link set v1 up ip -n test addr add 3.0.0.1/24 dev v1 # Multicast receiver ip netns exec test socat UDP4-RECVFROM:5588,ip-add-membership=224.224.224.224:3.0.0.1,fork - # Multicast sender echo hello | nc -u -s 3.0.0.2 224.224.224.224 5588 Reported-by: liam.mcbirnie@boeing.com Fixes: 19e3a9c90c53 ("net: bridge: convert multicast to generic rhashtable") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index a0e369179f6d..02da21d771c9 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -601,6 +601,7 @@ static int br_ip4_multicast_add_group(struct net_bridge *br, if (ipv4_is_local_multicast(group)) return 0; + memset(&br_group, 0, sizeof(br_group)); br_group.u.ip4 = group; br_group.proto = htons(ETH_P_IP); br_group.vid = vid; @@ -1497,6 +1498,7 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br, own_query = port ? &port->ip4_own_query : &br->ip4_own_query; + memset(&br_group, 0, sizeof(br_group)); br_group.u.ip4 = group; br_group.proto = htons(ETH_P_IP); br_group.vid = vid; @@ -1520,6 +1522,7 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br, own_query = port ? &port->ip6_own_query : &br->ip6_own_query; + memset(&br_group, 0, sizeof(br_group)); br_group.u.ip6 = *group; br_group.proto = htons(ETH_P_IPV6); br_group.vid = vid; -- cgit v1.2.3 From 86baf800de84eb89615c138d368b14bff5ee7d8a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 6 Mar 2019 12:08:57 +0100 Subject: extcon: ptn5150: fix COMPILE_TEST dependencies The PTN5150 dependencies look like they were meant to do the right thing, but they actually should not allow building without I2C for compile testing, as that results in a Kconfig warning and subsequent build failure: WARNING: unmet direct dependencies detected for REGMAP_I2C Depends on [m]: I2C [=m] Selected by [y]: - EXTCON_PTN5150 [=y] && EXTCON [=y] && (I2C [=m] && GPIOLIB [=y] || COMPILE_TEST [=y]) Selected by [m]: - EEPROM_AT24 [=m] && I2C [=m] && SYSFS [=y] - KEYBOARD_CAP11XX [=m] && !UML && INPUT [=y] && INPUT_KEYBOARD [=y] && OF [=y] && I2C [=m] - INPUT_DRV260X_HAPTICS [=m] && !UML && INPUT_MISC [=y] && INPUT [=y] && I2C [=m] && (GPIOLIB [=y] || COMPILE_TEST [=y]) - ... [many others] Add parentheses around the expression so we can compile-test without GPIOLIB but not without I2C. Fixes: 4ed754de2d66 ("extcon: Add support for ptn5150 extcon driver") Signed-off-by: Arnd Bergmann Signed-off-by: Chanwoo Choi --- drivers/extcon/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/extcon/Kconfig b/drivers/extcon/Kconfig index 8e17149655f0..540e8cd16ee6 100644 --- a/drivers/extcon/Kconfig +++ b/drivers/extcon/Kconfig @@ -116,7 +116,7 @@ config EXTCON_PALMAS config EXTCON_PTN5150 tristate "NXP PTN5150 CC LOGIC USB EXTCON support" - depends on I2C && GPIOLIB || COMPILE_TEST + depends on I2C && (GPIOLIB || COMPILE_TEST) select REGMAP_I2C help Say Y here to enable support for USB peripheral and USB host -- cgit v1.2.3 From bb9bd814ebf04f579be466ba61fc922625508807 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 4 Apr 2019 16:37:53 +0200 Subject: ipv6: sit: reset ip header pointer in ipip6_rcv ipip6 tunnels run iptunnel_pull_header on received skbs. This can determine the following use-after-free accessing iph pointer since the packet will be 'uncloned' running pskb_expand_head if it is a cloned gso skb (e.g if the packet has been sent though a veth device) [ 706.369655] BUG: KASAN: use-after-free in ipip6_rcv+0x1678/0x16e0 [sit] [ 706.449056] Read of size 1 at addr ffffe01b6bd855f5 by task ksoftirqd/1/= [ 706.669494] Hardware name: HPE ProLiant m400 Server/ProLiant m400 Server, BIOS U02 08/19/2016 [ 706.771839] Call trace: [ 706.801159] dump_backtrace+0x0/0x2f8 [ 706.845079] show_stack+0x24/0x30 [ 706.884833] dump_stack+0xe0/0x11c [ 706.925629] print_address_description+0x68/0x260 [ 706.982070] kasan_report+0x178/0x340 [ 707.025995] __asan_report_load1_noabort+0x30/0x40 [ 707.083481] ipip6_rcv+0x1678/0x16e0 [sit] [ 707.132623] tunnel64_rcv+0xd4/0x200 [tunnel4] [ 707.185940] ip_local_deliver_finish+0x3b8/0x988 [ 707.241338] ip_local_deliver+0x144/0x470 [ 707.289436] ip_rcv_finish+0x43c/0x14b0 [ 707.335447] ip_rcv+0x628/0x1138 [ 707.374151] __netif_receive_skb_core+0x1670/0x2600 [ 707.432680] __netif_receive_skb+0x28/0x190 [ 707.482859] process_backlog+0x1d0/0x610 [ 707.529913] net_rx_action+0x37c/0xf68 [ 707.574882] __do_softirq+0x288/0x1018 [ 707.619852] run_ksoftirqd+0x70/0xa8 [ 707.662734] smpboot_thread_fn+0x3a4/0x9e8 [ 707.711875] kthread+0x2c8/0x350 [ 707.750583] ret_from_fork+0x10/0x18 [ 707.811302] Allocated by task 16982: [ 707.854182] kasan_kmalloc.part.1+0x40/0x108 [ 707.905405] kasan_kmalloc+0xb4/0xc8 [ 707.948291] kasan_slab_alloc+0x14/0x20 [ 707.994309] __kmalloc_node_track_caller+0x158/0x5e0 [ 708.053902] __kmalloc_reserve.isra.8+0x54/0xe0 [ 708.108280] __alloc_skb+0xd8/0x400 [ 708.150139] sk_stream_alloc_skb+0xa4/0x638 [ 708.200346] tcp_sendmsg_locked+0x818/0x2b90 [ 708.251581] tcp_sendmsg+0x40/0x60 [ 708.292376] inet_sendmsg+0xf0/0x520 [ 708.335259] sock_sendmsg+0xac/0xf8 [ 708.377096] sock_write_iter+0x1c0/0x2c0 [ 708.424154] new_sync_write+0x358/0x4a8 [ 708.470162] __vfs_write+0xc4/0xf8 [ 708.510950] vfs_write+0x12c/0x3d0 [ 708.551739] ksys_write+0xcc/0x178 [ 708.592533] __arm64_sys_write+0x70/0xa0 [ 708.639593] el0_svc_handler+0x13c/0x298 [ 708.686646] el0_svc+0x8/0xc [ 708.739019] Freed by task 17: [ 708.774597] __kasan_slab_free+0x114/0x228 [ 708.823736] kasan_slab_free+0x10/0x18 [ 708.868703] kfree+0x100/0x3d8 [ 708.905320] skb_free_head+0x7c/0x98 [ 708.948204] skb_release_data+0x320/0x490 [ 708.996301] pskb_expand_head+0x60c/0x970 [ 709.044399] __iptunnel_pull_header+0x3b8/0x5d0 [ 709.098770] ipip6_rcv+0x41c/0x16e0 [sit] [ 709.146873] tunnel64_rcv+0xd4/0x200 [tunnel4] [ 709.200195] ip_local_deliver_finish+0x3b8/0x988 [ 709.255596] ip_local_deliver+0x144/0x470 [ 709.303692] ip_rcv_finish+0x43c/0x14b0 [ 709.349705] ip_rcv+0x628/0x1138 [ 709.388413] __netif_receive_skb_core+0x1670/0x2600 [ 709.446943] __netif_receive_skb+0x28/0x190 [ 709.497120] process_backlog+0x1d0/0x610 [ 709.544169] net_rx_action+0x37c/0xf68 [ 709.589131] __do_softirq+0x288/0x1018 [ 709.651938] The buggy address belongs to the object at ffffe01b6bd85580 which belongs to the cache kmalloc-1024 of size 1024 [ 709.804356] The buggy address is located 117 bytes inside of 1024-byte region [ffffe01b6bd85580, ffffe01b6bd85980) [ 709.946340] The buggy address belongs to the page: [ 710.003824] page:ffff7ff806daf600 count:1 mapcount:0 mapping:ffffe01c4001f600 index:0x0 [ 710.099914] flags: 0xfffff8000000100(slab) [ 710.149059] raw: 0fffff8000000100 dead000000000100 dead000000000200 ffffe01c4001f600 [ 710.242011] raw: 0000000000000000 0000000000380038 00000001ffffffff 0000000000000000 [ 710.334966] page dumped because: kasan: bad access detected Fix it resetting iph pointer after iptunnel_pull_header Fixes: a09a4c8dd1ec ("tunnels: Remove encapsulation offloads on decap") Tested-by: Jianlin Shi Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller --- net/ipv6/sit.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 07e21a82ce4c..b2109b74857d 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -669,6 +669,10 @@ static int ipip6_rcv(struct sk_buff *skb) !net_eq(tunnel->net, dev_net(tunnel->dev)))) goto out; + /* skb can be uncloned in iptunnel_pull_header, so + * old iph is no longer valid + */ + iph = (const struct iphdr *)skb_mac_header(skb); err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { if (log_ecn_error) -- cgit v1.2.3 From bbd669a868bba591ffd38b7bc75a7b361bb54b04 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Thu, 4 Apr 2019 18:58:26 -0500 Subject: ibmvnic: Fix completion structure initialization Fix device initialization completion handling for vNIC adapters. Initialize the completion structure on probe and reinitialize when needed. This also fixes a race condition during kdump where the driver can attempt to access the completion struct before it is initialized: Unable to handle kernel paging request for data at address 0x00000000 Faulting instruction address: 0xc0000000081acbe0 Oops: Kernel access of bad area, sig: 11 [#1] LE SMP NR_CPUS=2048 NUMA pSeries Modules linked in: ibmvnic(+) ibmveth sunrpc overlay squashfs loop CPU: 19 PID: 301 Comm: systemd-udevd Not tainted 4.18.0-64.el8.ppc64le #1 NIP: c0000000081acbe0 LR: c0000000081ad964 CTR: c0000000081ad900 REGS: c000000027f3f990 TRAP: 0300 Not tainted (4.18.0-64.el8.ppc64le) MSR: 800000010280b033 CR: 28228288 XER: 00000006 CFAR: c000000008008934 DAR: 0000000000000000 DSISR: 40000000 IRQMASK: 1 GPR00: c0000000081ad964 c000000027f3fc10 c0000000095b5800 c0000000221b4e58 GPR04: 0000000000000003 0000000000000001 000049a086918581 00000000000000d4 GPR08: 0000000000000007 0000000000000000 ffffffffffffffe8 d0000000014dde28 GPR12: c0000000081ad900 c000000009a00c00 0000000000000001 0000000000000100 GPR16: 0000000000000038 0000000000000007 c0000000095e2230 0000000000000006 GPR20: 0000000000400140 0000000000000001 c00000000910c880 0000000000000000 GPR24: 0000000000000000 0000000000000006 0000000000000000 0000000000000003 GPR28: 0000000000000001 0000000000000001 c0000000221b4e60 c0000000221b4e58 NIP [c0000000081acbe0] __wake_up_locked+0x50/0x100 LR [c0000000081ad964] complete+0x64/0xa0 Call Trace: [c000000027f3fc10] [c000000027f3fc60] 0xc000000027f3fc60 (unreliable) [c000000027f3fc60] [c0000000081ad964] complete+0x64/0xa0 [c000000027f3fca0] [d0000000014dad58] ibmvnic_handle_crq+0xce0/0x1160 [ibmvnic] [c000000027f3fd50] [d0000000014db270] ibmvnic_tasklet+0x98/0x130 [ibmvnic] [c000000027f3fda0] [c00000000813f334] tasklet_action_common.isra.3+0xc4/0x1a0 [c000000027f3fe00] [c000000008cd13f4] __do_softirq+0x164/0x400 [c000000027f3fef0] [c00000000813ed64] irq_exit+0x184/0x1c0 [c000000027f3ff20] [c0000000080188e8] __do_irq+0xb8/0x210 [c000000027f3ff90] [c00000000802d0a4] call_do_irq+0x14/0x24 [c000000026a5b010] [c000000008018adc] do_IRQ+0x9c/0x130 [c000000026a5b060] [c000000008008ce4] hardware_interrupt_common+0x114/0x120 Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 5ecbb1adcf3b..51cfe95f3e24 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1885,6 +1885,7 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter, */ adapter->state = VNIC_PROBED; + reinit_completion(&adapter->init_done); rc = init_crq_queue(adapter); if (rc) { netdev_err(adapter->netdev, @@ -4625,7 +4626,7 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter) old_num_rx_queues = adapter->req_rx_queues; old_num_tx_queues = adapter->req_tx_queues; - init_completion(&adapter->init_done); + reinit_completion(&adapter->init_done); adapter->init_done_rc = 0; ibmvnic_send_crq_init(adapter); if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { @@ -4680,7 +4681,6 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter) adapter->from_passive_init = false; - init_completion(&adapter->init_done); adapter->init_done_rc = 0; ibmvnic_send_crq_init(adapter); if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { @@ -4759,6 +4759,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) INIT_WORK(&adapter->ibmvnic_reset, __ibmvnic_reset); INIT_LIST_HEAD(&adapter->rwi_list); spin_lock_init(&adapter->rwi_lock); + init_completion(&adapter->init_done); adapter->resetting = false; adapter->mac_change_pending = false; -- cgit v1.2.3 From ecae26fae15abb7d433557afbd15467ce1c444f5 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Thu, 4 Apr 2019 18:42:05 -0700 Subject: xtensa: fix format string warning in init_pmd Use %lu instead of %zu to fix the following warning introduced with recent memblock refactoring: xtensa/mm/mmu.c:36:9: warning: format '%zu' expects argument of type 'size_t', but argument 3 has type 'long unsigned int Signed-off-by: Max Filippov --- arch/xtensa/mm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/xtensa/mm/mmu.c b/arch/xtensa/mm/mmu.c index 2fb7d1172228..03678c4afc39 100644 --- a/arch/xtensa/mm/mmu.c +++ b/arch/xtensa/mm/mmu.c @@ -33,7 +33,7 @@ static void * __init init_pmd(unsigned long vaddr, unsigned long n_pages) pte = memblock_alloc_low(n_pages * sizeof(pte_t), PAGE_SIZE); if (!pte) - panic("%s: Failed to allocate %zu bytes align=%lx\n", + panic("%s: Failed to allocate %lu bytes align=%lx\n", __func__, n_pages * sizeof(pte_t), PAGE_SIZE); for (i = 0; i < n_pages; ++i) -- cgit v1.2.3 From 47c4cc08cb5b34e93ab337b924c5ede77ca3c936 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Thu, 4 Apr 2019 17:27:20 +0100 Subject: ASoC: cs35l35: Disable regulators on driver removal The chips main power supplies VA and VP are enabled during probe but then never disabled, this will cause warnings from the regulator framework on driver removal. Fix this by adding a remove callback and disabling the supplies, whilst doing so follow best practice and put the chip back into reset as well. Signed-off-by: Charles Keepax Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l35.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/soc/codecs/cs35l35.c b/sound/soc/codecs/cs35l35.c index 9f4a59871cee..c71696146c5e 100644 --- a/sound/soc/codecs/cs35l35.c +++ b/sound/soc/codecs/cs35l35.c @@ -1635,6 +1635,16 @@ err: return ret; } +static int cs35l35_i2c_remove(struct i2c_client *i2c_client) +{ + struct cs35l35_private *cs35l35 = i2c_get_clientdata(i2c_client); + + regulator_bulk_disable(cs35l35->num_supplies, cs35l35->supplies); + gpiod_set_value_cansleep(cs35l35->reset_gpio, 0); + + return 0; +} + static const struct of_device_id cs35l35_of_match[] = { {.compatible = "cirrus,cs35l35"}, {}, @@ -1655,6 +1665,7 @@ static struct i2c_driver cs35l35_i2c_driver = { }, .id_table = cs35l35_id, .probe = cs35l35_i2c_probe, + .remove = cs35l35_i2c_remove, }; module_i2c_driver(cs35l35_i2c_driver); -- cgit v1.2.3 From 2e05ddd2c9f8000751d52fcf35b8318da46026bc Mon Sep 17 00:00:00 2001 From: Ranjani Sridharan Date: Thu, 4 Apr 2019 17:30:39 -0700 Subject: ASoC: intel: skylake: add remove() callback for component driver Topology is not unloaded in the core during unregister_component() anymore. So, add the remove() callback that will unload the topology. Signed-off-by: Ranjani Sridharan Signed-off-by: Mark Brown --- sound/soc/intel/skylake/skl-pcm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/intel/skylake/skl-pcm.c b/sound/soc/intel/skylake/skl-pcm.c index 56099db8f86d..57031b6d4d45 100644 --- a/sound/soc/intel/skylake/skl-pcm.c +++ b/sound/soc/intel/skylake/skl-pcm.c @@ -1462,9 +1462,16 @@ static int skl_platform_soc_probe(struct snd_soc_component *component) return 0; } +static void skl_pcm_remove(struct snd_soc_component *component) +{ + /* remove topology */ + snd_soc_tplg_component_remove(component, SND_SOC_TPLG_INDEX_ALL); +} + static const struct snd_soc_component_driver skl_component = { .name = "pcm", .probe = skl_platform_soc_probe, + .remove = skl_pcm_remove, .ops = &skl_platform_ops, .pcm_new = skl_pcm_new, .pcm_free = skl_pcm_free, -- cgit v1.2.3 From ad94dc3a7eb5fa6ff469dbcf401c44b14ad50595 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Wed, 3 Apr 2019 07:26:36 +0200 Subject: xen: use struct_size() helper in kzalloc() struct privcmd_buf_vma_private has a zero-sized array at the end (pages), use the new struct_size() helper to determine the proper allocation size and avoid potential type mistakes. Signed-off-by: Andrea Righi Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross --- drivers/xen/privcmd-buf.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/xen/privcmd-buf.c b/drivers/xen/privcmd-buf.c index de01a6d0059d..a1c61e351d3f 100644 --- a/drivers/xen/privcmd-buf.c +++ b/drivers/xen/privcmd-buf.c @@ -140,8 +140,7 @@ static int privcmd_buf_mmap(struct file *file, struct vm_area_struct *vma) if (!(vma->vm_flags & VM_SHARED)) return -EINVAL; - vma_priv = kzalloc(sizeof(*vma_priv) + count * sizeof(void *), - GFP_KERNEL); + vma_priv = kzalloc(struct_size(vma_priv, pages, count), GFP_KERNEL); if (!vma_priv) return -ENOMEM; -- cgit v1.2.3 From 42d8644bd77dd2d747e004e367cb0c895a606f39 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 4 Apr 2019 18:12:17 +0300 Subject: xen: Prevent buffer overflow in privcmd ioctl The "call" variable comes from the user in privcmd_ioctl_hypercall(). It's an offset into the hypercall_page[] which has (PAGE_SIZE / 32) elements. We need to put an upper bound on it to prevent an out of bounds access. Cc: stable@vger.kernel.org Fixes: 1246ae0bb992 ("xen: add variable hypercall caller") Signed-off-by: Dan Carpenter Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- arch/x86/include/asm/xen/hypercall.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index de6f0d59a24f..2863c2026655 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -206,6 +206,9 @@ xen_single_call(unsigned int call, __HYPERCALL_DECLS; __HYPERCALL_5ARG(a1, a2, a3, a4, a5); + if (call >= PAGE_SIZE / sizeof(hypercall_page[0])) + return -EINVAL; + asm volatile(CALL_NOSPEC : __HYPERCALL_5PARAM : [thunk_target] "a" (&hypercall_page[call]) -- cgit v1.2.3 From 3df1af984b76bc50cdbedbdd69d3f69192269cfe Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Thu, 4 Apr 2019 16:43:42 +0200 Subject: Revert "Documentation/gpu/meson: Remove link to meson_canvas.c" This reverts commit a3f98bb22cbfaaf67717e156f79e2bfeb42d4cac. Patch "Documentation/gpu/meson: Remove link to meson_canvas.c" was incorrectly applied on the wrong branch not containing the fixed commit 2bf6b5b0e374 ("drm/meson: exclusively use the canvas provider module") Acked-by: Sean Paul Signed-off-by: Neil Armstrong Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20190404144342.15238-1-narmstrong@baylibre.com --- Documentation/gpu/meson.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/gpu/meson.rst b/Documentation/gpu/meson.rst index b9e2f9aa3bd8..479f6f51a13b 100644 --- a/Documentation/gpu/meson.rst +++ b/Documentation/gpu/meson.rst @@ -42,6 +42,12 @@ Video Encoder .. kernel-doc:: drivers/gpu/drm/meson/meson_venc.c :doc: Video Encoder +Video Canvas Management +======================= + +.. kernel-doc:: drivers/gpu/drm/meson/meson_canvas.c + :doc: Canvas + Video Clocks ============ -- cgit v1.2.3 From cd9063757a227cf31ebf5391ccda2bf583b0806e Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Sun, 24 Mar 2019 20:06:09 +0100 Subject: drm/sun4i: DW HDMI: Lower max. supported rate for H6 Currently resolutions with pixel clock higher than 340 MHz don't work with H6 HDMI controller. They just produce a blank screen. Limit maximum pixel clock rate to 340 MHz until scrambling is supported. Cc: stable@vger.kernel.org # 5.0 Fixes: 40bb9d3147b2 ("drm/sun4i: Add support for H6 DW HDMI controller") Signed-off-by: Jernej Skrabec Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20190324190609.32721-1-jernej.skrabec@siol.net --- drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c index dc47720c99ba..39d8509d96a0 100644 --- a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c +++ b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c @@ -48,8 +48,13 @@ static enum drm_mode_status sun8i_dw_hdmi_mode_valid_h6(struct drm_connector *connector, const struct drm_display_mode *mode) { - /* This is max for HDMI 2.0b (4K@60Hz) */ - if (mode->clock > 594000) + /* + * Controller support maximum of 594 MHz, which correlates to + * 4K@60Hz 4:4:4 or RGB. However, for frequencies greater than + * 340 MHz scrambling has to be enabled. Because scrambling is + * not yet implemented, just limit to 340 MHz for now. + */ + if (mode->clock > 340000) return MODE_CLOCK_HIGH; return MODE_OK; -- cgit v1.2.3 From 4fa5ecda2bf96be7464eb406df8aba9d89260227 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 4 Apr 2019 12:17:35 -0500 Subject: objtool: Add rewind_stack_do_exit() to the noreturn list This fixes the following warning seen on GCC 7.3: arch/x86/kernel/dumpstack.o: warning: objtool: oops_end() falls through to next function show_regs() Reported-by: kbuild test robot Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Link: https://lkml.kernel.org/r/3418ebf5a5a9f6ed7e80954c741c0b904b67b5dc.1554398240.git.jpoimboe@redhat.com --- tools/objtool/check.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 5dde107083c6..479196aeb409 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -165,6 +165,7 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func, "fortify_panic", "usercopy_abort", "machine_real_restart", + "rewind_stack_do_exit", }; if (func->bind == STB_WEAK) -- cgit v1.2.3 From 212ac181c158c09038c474ba68068be49caecebb Mon Sep 17 00:00:00 2001 From: Zubin Mithra Date: Thu, 4 Apr 2019 14:33:55 -0700 Subject: ALSA: seq: Fix OOB-reads from strlcpy When ioctl calls are made with non-null-terminated userspace strings, strlcpy causes an OOB-read from within strlen. Fix by changing to use strscpy instead. Signed-off-by: Zubin Mithra Reviewed-by: Guenter Roeck Cc: Signed-off-by: Takashi Iwai --- sound/core/seq/seq_clientmgr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 7d4640d1fe9f..38e7deab6384 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -1252,7 +1252,7 @@ static int snd_seq_ioctl_set_client_info(struct snd_seq_client *client, /* fill the info fields */ if (client_info->name[0]) - strlcpy(client->name, client_info->name, sizeof(client->name)); + strscpy(client->name, client_info->name, sizeof(client->name)); client->filter = client_info->filter; client->event_lost = client_info->event_lost; @@ -1530,7 +1530,7 @@ static int snd_seq_ioctl_create_queue(struct snd_seq_client *client, void *arg) /* set queue name */ if (!info->name[0]) snprintf(info->name, sizeof(info->name), "Queue-%d", q->queue); - strlcpy(q->name, info->name, sizeof(q->name)); + strscpy(q->name, info->name, sizeof(q->name)); snd_use_lock_free(&q->use_lock); return 0; @@ -1592,7 +1592,7 @@ static int snd_seq_ioctl_set_queue_info(struct snd_seq_client *client, queuefree(q); return -EPERM; } - strlcpy(q->name, info->name, sizeof(q->name)); + strscpy(q->name, info->name, sizeof(q->name)); queuefree(q); return 0; -- cgit v1.2.3 From 95c5c618fa4349b2ba13aebeabf71911208dfc5e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 29 Mar 2019 09:21:37 +0300 Subject: irqchip/irq-ls1x: Missing error code in ls1x_intc_of_init() Currently, when irq_domain_add_linear() fails, the error code does not get set so it returns zero which is wrong. Fix it by setting the appropriate error code. Fixes: 9e543e22e204 ("irqchip: Add driver for Loongson-1 interrupt controller") Signed-off-by: Dan Carpenter Signed-off-by: Thomas Gleixner Reviewed-by: Mukesh Ojha Cc: Marc Zyngier Cc: Jiaxun Yang Cc: Jason Cooper Cc: kernel-janitors@vger.kernel.org Link: https://lkml.kernel.org/r/20190329062136.GQ32613@kadam --- drivers/irqchip/irq-ls1x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/irqchip/irq-ls1x.c b/drivers/irqchip/irq-ls1x.c index 86b72fbd3b45..353111a10413 100644 --- a/drivers/irqchip/irq-ls1x.c +++ b/drivers/irqchip/irq-ls1x.c @@ -130,6 +130,7 @@ static int __init ls1x_intc_of_init(struct device_node *node, NULL); if (!priv->domain) { pr_err("ls1x-irq: cannot add IRQ domain\n"); + err = -ENOMEM; goto out_iounmap; } -- cgit v1.2.3 From e8458e7afa855317b14915d7b86ab3caceea7eb6 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 4 Apr 2019 15:45:12 +0800 Subject: genirq: Initialize request_mutex if CONFIG_SPARSE_IRQ=n When CONFIG_SPARSE_IRQ is disable, the request_mutex in struct irq_desc is not initialized which causes malfunction. Fixes: 9114014cf4e6 ("genirq: Add mutex to irq desc to serialize request/free_irq()") Signed-off-by: Kefeng Wang Signed-off-by: Thomas Gleixner Reviewed-by: Mukesh Ojha Cc: Marc Zyngier Cc: Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20190404074512.145533-1-wangkefeng.wang@huawei.com --- kernel/irq/irqdesc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 13539e12cd80..9f8a709337cf 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -558,6 +558,7 @@ int __init early_irq_init(void) alloc_masks(&desc[i], node); raw_spin_lock_init(&desc[i].lock); lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); + mutex_init(&desc[i].request_mutex); desc_set_defaults(i, &desc[i], node, NULL, NULL); } return arch_early_irq_init(); -- cgit v1.2.3 From b35f549df1d7520d37ba1e6d4a8d4df6bd52d136 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 7 Nov 2016 16:26:37 -0500 Subject: syscalls: Remove start and number from syscall_get_arguments() args At Linux Plumbers, Andy Lutomirski approached me and pointed out that the function call syscall_get_arguments() implemented in x86 was horribly written and not optimized for the standard case of passing in 0 and 6 for the starting index and the number of system calls to get. When looking at all the users of this function, I discovered that all instances pass in only 0 and 6 for these arguments. Instead of having this function handle different cases that are never used, simply rewrite it to return the first 6 arguments of a system call. This should help out the performance of tracing system calls by ptrace, ftrace and perf. Link: http://lkml.kernel.org/r/20161107213233.754809394@goodmis.org Cc: Oleg Nesterov Cc: Kees Cook Cc: Andy Lutomirski Cc: Dominik Brodowski Cc: Dave Martin Cc: "Dmitry V. Levin" Cc: x86@kernel.org Cc: linux-snps-arc@lists.infradead.org Cc: linux-kernel@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-c6x-dev@linux-c6x.org Cc: uclinux-h8-devel@lists.sourceforge.jp Cc: linux-hexagon@vger.kernel.org Cc: linux-ia64@vger.kernel.org Cc: linux-mips@vger.kernel.org Cc: nios2-dev@lists.rocketboards.org Cc: openrisc@lists.librecores.org Cc: linux-parisc@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-riscv@lists.infradead.org Cc: linux-s390@vger.kernel.org Cc: linux-sh@vger.kernel.org Cc: sparclinux@vger.kernel.org Cc: linux-um@lists.infradead.org Cc: linux-xtensa@linux-xtensa.org Cc: linux-arch@vger.kernel.org Acked-by: Paul Burton # MIPS parts Acked-by: Max Filippov # For xtensa changes Acked-by: Will Deacon # For the arm64 bits Reviewed-by: Thomas Gleixner # for x86 Reviewed-by: Dmitry V. Levin Reported-by: Andy Lutomirski Signed-off-by: Steven Rostedt (VMware) --- arch/arc/include/asm/syscall.h | 7 ++-- arch/arm/include/asm/syscall.h | 23 ++--------- arch/arm64/include/asm/syscall.h | 22 ++--------- arch/c6x/include/asm/syscall.h | 41 ++++---------------- arch/csky/include/asm/syscall.h | 14 ++----- arch/h8300/include/asm/syscall.h | 34 ++++------------ arch/hexagon/include/asm/syscall.h | 4 +- arch/ia64/include/asm/syscall.h | 5 +-- arch/microblaze/include/asm/syscall.h | 4 +- arch/mips/include/asm/syscall.h | 3 +- arch/mips/kernel/ptrace.c | 2 +- arch/nds32/include/asm/syscall.h | 33 +++------------- arch/nios2/include/asm/syscall.h | 42 ++++---------------- arch/openrisc/include/asm/syscall.h | 6 +-- arch/parisc/include/asm/syscall.h | 30 ++++---------- arch/powerpc/include/asm/syscall.h | 8 ++-- arch/riscv/include/asm/syscall.h | 13 ++----- arch/s390/include/asm/syscall.h | 17 +++----- arch/sh/include/asm/syscall_32.h | 26 +++---------- arch/sh/include/asm/syscall_64.h | 4 +- arch/sparc/include/asm/syscall.h | 4 +- arch/um/include/asm/syscall-generic.h | 39 +++---------------- arch/x86/include/asm/syscall.h | 73 ++++++++--------------------------- arch/xtensa/include/asm/syscall.h | 16 ++------ include/asm-generic/syscall.h | 11 ++---- include/trace/events/syscalls.h | 2 +- kernel/seccomp.c | 2 +- kernel/trace/trace_syscalls.c | 4 +- lib/syscall.c | 2 +- 29 files changed, 113 insertions(+), 378 deletions(-) diff --git a/arch/arc/include/asm/syscall.h b/arch/arc/include/asm/syscall.h index 29de09804306..c7a4201ed62b 100644 --- a/arch/arc/include/asm/syscall.h +++ b/arch/arc/include/asm/syscall.h @@ -55,12 +55,11 @@ syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, */ static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) + unsigned long *args) { unsigned long *inside_ptregs = &(regs->r0); - inside_ptregs -= i; - - BUG_ON((i + n) > 6); + unsigned int n = 6; + unsigned int i = 0; while (n--) { args[i++] = (*inside_ptregs); diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h index 06dea6bce293..db969a2972ae 100644 --- a/arch/arm/include/asm/syscall.h +++ b/arch/arm/include/asm/syscall.h @@ -55,29 +55,12 @@ static inline void syscall_set_return_value(struct task_struct *task, static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { - if (n == 0) - return; - - if (i + n > SYSCALL_MAX_ARGS) { - unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i; - unsigned int n_bad = n + i - SYSCALL_MAX_ARGS; - pr_warn("%s called with max args %d, handling only %d\n", - __func__, i + n, SYSCALL_MAX_ARGS); - memset(args_bad, 0, n_bad * sizeof(args[0])); - n = SYSCALL_MAX_ARGS - i; - } - - if (i == 0) { - args[0] = regs->ARM_ORIG_r0; - args++; - i++; - n--; - } + args[0] = regs->ARM_ORIG_r0; + args++; - memcpy(args, ®s->ARM_r0 + i, n * sizeof(args[0])); + memcpy(args, ®s->ARM_r0 + 1, 5 * sizeof(args[0])); } static inline void syscall_set_arguments(struct task_struct *task, diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index ad8be16a39c9..55b2dab21023 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -65,28 +65,12 @@ static inline void syscall_set_return_value(struct task_struct *task, static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { - if (n == 0) - return; - - if (i + n > SYSCALL_MAX_ARGS) { - unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i; - unsigned int n_bad = n + i - SYSCALL_MAX_ARGS; - pr_warning("%s called with max args %d, handling only %d\n", - __func__, i + n, SYSCALL_MAX_ARGS); - memset(args_bad, 0, n_bad * sizeof(args[0])); - } - - if (i == 0) { - args[0] = regs->orig_x0; - args++; - i++; - n--; - } + args[0] = regs->orig_x0; + args++; - memcpy(args, ®s->regs[i], n * sizeof(args[0])); + memcpy(args, ®s->regs[1], 5 * sizeof(args[0])); } static inline void syscall_set_arguments(struct task_struct *task, diff --git a/arch/c6x/include/asm/syscall.h b/arch/c6x/include/asm/syscall.h index ae2be315ee9c..06db3251926b 100644 --- a/arch/c6x/include/asm/syscall.h +++ b/arch/c6x/include/asm/syscall.h @@ -46,40 +46,15 @@ static inline void syscall_set_return_value(struct task_struct *task, } static inline void syscall_get_arguments(struct task_struct *task, - struct pt_regs *regs, unsigned int i, - unsigned int n, unsigned long *args) + struct pt_regs *regs, + unsigned long *args) { - switch (i) { - case 0: - if (!n--) - break; - *args++ = regs->a4; - case 1: - if (!n--) - break; - *args++ = regs->b4; - case 2: - if (!n--) - break; - *args++ = regs->a6; - case 3: - if (!n--) - break; - *args++ = regs->b6; - case 4: - if (!n--) - break; - *args++ = regs->a8; - case 5: - if (!n--) - break; - *args++ = regs->b8; - case 6: - if (!n--) - break; - default: - BUG(); - } + *args++ = regs->a4; + *args++ = regs->b4; + *args++ = regs->a6; + *args++ = regs->b6; + *args++ = regs->a8; + *args = regs->b8; } static inline void syscall_set_arguments(struct task_struct *task, diff --git a/arch/csky/include/asm/syscall.h b/arch/csky/include/asm/syscall.h index 9a9cd81e66c1..c691fe92edc6 100644 --- a/arch/csky/include/asm/syscall.h +++ b/arch/csky/include/asm/syscall.h @@ -43,17 +43,11 @@ syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) + unsigned long *args) { - BUG_ON(i + n > 6); - if (i == 0) { - args[0] = regs->orig_a0; - args++; - n--; - } else { - i--; - } - memcpy(args, ®s->a1 + i, n * sizeof(args[0])); + args[0] = regs->orig_a0; + args++; + memcpy(args, ®s->a1, 5 * sizeof(args[0])); } static inline void diff --git a/arch/h8300/include/asm/syscall.h b/arch/h8300/include/asm/syscall.h index 924990401237..ddd483c6ca95 100644 --- a/arch/h8300/include/asm/syscall.h +++ b/arch/h8300/include/asm/syscall.h @@ -17,34 +17,14 @@ syscall_get_nr(struct task_struct *task, struct pt_regs *regs) static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) + unsigned long *args) { - BUG_ON(i + n > 6); - - while (n > 0) { - switch (i) { - case 0: - *args++ = regs->er1; - break; - case 1: - *args++ = regs->er2; - break; - case 2: - *args++ = regs->er3; - break; - case 3: - *args++ = regs->er4; - break; - case 4: - *args++ = regs->er5; - break; - case 5: - *args++ = regs->er6; - break; - } - i++; - n--; - } + *args++ = regs->er1; + *args++ = regs->er2; + *args++ = regs->er3; + *args++ = regs->er4; + *args++ = regs->er5; + *args = regs->er6; } diff --git a/arch/hexagon/include/asm/syscall.h b/arch/hexagon/include/asm/syscall.h index 4af9c7b6f13a..ae3a1e24fabd 100644 --- a/arch/hexagon/include/asm/syscall.h +++ b/arch/hexagon/include/asm/syscall.h @@ -37,10 +37,8 @@ static inline long syscall_get_nr(struct task_struct *task, static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { - BUG_ON(i + n > 6); - memcpy(args, &(®s->r00)[i], n * sizeof(args[0])); + memcpy(args, &(®s->r00)[0], 6 * sizeof(args[0])); } #endif diff --git a/arch/ia64/include/asm/syscall.h b/arch/ia64/include/asm/syscall.h index 1d0b875fec44..8204c1ff70ce 100644 --- a/arch/ia64/include/asm/syscall.h +++ b/arch/ia64/include/asm/syscall.h @@ -63,12 +63,9 @@ extern void ia64_syscall_get_set_arguments(struct task_struct *task, unsigned long *args, int rw); static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { - BUG_ON(i + n > 6); - - ia64_syscall_get_set_arguments(task, regs, i, n, args, 0); + ia64_syscall_get_set_arguments(task, regs, 0, 6, args, 0); } static inline void syscall_set_arguments(struct task_struct *task, diff --git a/arch/microblaze/include/asm/syscall.h b/arch/microblaze/include/asm/syscall.h index 220decd605a4..4b23e44e5c3c 100644 --- a/arch/microblaze/include/asm/syscall.h +++ b/arch/microblaze/include/asm/syscall.h @@ -82,9 +82,11 @@ static inline void microblaze_set_syscall_arg(struct pt_regs *regs, static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { + unsigned int i = 0; + unsigned int n = 6; + while (n--) *args++ = microblaze_get_syscall_arg(regs, i++); } diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h index 6cf8ffb5367e..a2b4748655df 100644 --- a/arch/mips/include/asm/syscall.h +++ b/arch/mips/include/asm/syscall.h @@ -116,9 +116,10 @@ static inline void syscall_set_return_value(struct task_struct *task, static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { + unsigned int i = 0; + unsigned int n = 6; int ret; /* O32 ABI syscall() */ diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 0057c910bc2f..3a62f80958e1 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -1419,7 +1419,7 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall) sd.nr = syscall; sd.arch = syscall_get_arch(); - syscall_get_arguments(current, regs, 0, 6, args); + syscall_get_arguments(current, regs, args); for (i = 0; i < 6; i++) sd.args[i] = args[i]; sd.instruction_pointer = KSTK_EIP(current); diff --git a/arch/nds32/include/asm/syscall.h b/arch/nds32/include/asm/syscall.h index f7e5e86765fe..89a6ec8731d8 100644 --- a/arch/nds32/include/asm/syscall.h +++ b/arch/nds32/include/asm/syscall.h @@ -108,42 +108,21 @@ void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, * syscall_get_arguments - extract system call parameter values * @task: task of interest, must be blocked * @regs: task_pt_regs() of @task - * @i: argument index [0,5] - * @n: number of arguments; n+i must be [1,6]. * @args: array filled with argument values * - * Fetches @n arguments to the system call starting with the @i'th argument - * (from 0 through 5). Argument @i is stored in @args[0], and so on. - * An arch inline version is probably optimal when @i and @n are constants. + * Fetches 6 arguments to the system call (from 0 through 5). The first + * argument is stored in @args[0], and so on. * * It's only valid to call this when @task is stopped for tracing on * entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT. - * It's invalid to call this with @i + @n > 6; we only support system calls - * taking up to 6 arguments. */ #define SYSCALL_MAX_ARGS 6 void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) + unsigned long *args) { - if (n == 0) - return; - if (i + n > SYSCALL_MAX_ARGS) { - unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i; - unsigned int n_bad = n + i - SYSCALL_MAX_ARGS; - pr_warning("%s called with max args %d, handling only %d\n", - __func__, i + n, SYSCALL_MAX_ARGS); - memset(args_bad, 0, n_bad * sizeof(args[0])); - memset(args_bad, 0, n_bad * sizeof(args[0])); - } - - if (i == 0) { - args[0] = regs->orig_r0; - args++; - i++; - n--; - } - - memcpy(args, ®s->uregs[0] + i, n * sizeof(args[0])); + args[0] = regs->orig_r0; + args++; + memcpy(args, ®s->uregs[0] + 1, 5 * sizeof(args[0])); } /** diff --git a/arch/nios2/include/asm/syscall.h b/arch/nios2/include/asm/syscall.h index 9de220854c4a..792bd449d839 100644 --- a/arch/nios2/include/asm/syscall.h +++ b/arch/nios2/include/asm/syscall.h @@ -58,42 +58,14 @@ static inline void syscall_set_return_value(struct task_struct *task, } static inline void syscall_get_arguments(struct task_struct *task, - struct pt_regs *regs, unsigned int i, unsigned int n, - unsigned long *args) + struct pt_regs *regs, unsigned long *args) { - BUG_ON(i + n > 6); - - switch (i) { - case 0: - if (!n--) - break; - *args++ = regs->r4; - case 1: - if (!n--) - break; - *args++ = regs->r5; - case 2: - if (!n--) - break; - *args++ = regs->r6; - case 3: - if (!n--) - break; - *args++ = regs->r7; - case 4: - if (!n--) - break; - *args++ = regs->r8; - case 5: - if (!n--) - break; - *args++ = regs->r9; - case 6: - if (!n--) - break; - default: - BUG(); - } + *args++ = regs->r4; + *args++ = regs->r5; + *args++ = regs->r6; + *args++ = regs->r7; + *args++ = regs->r8; + *args = regs->r9; } static inline void syscall_set_arguments(struct task_struct *task, diff --git a/arch/openrisc/include/asm/syscall.h b/arch/openrisc/include/asm/syscall.h index 2db9f1cf0694..72607860cd55 100644 --- a/arch/openrisc/include/asm/syscall.h +++ b/arch/openrisc/include/asm/syscall.h @@ -56,11 +56,9 @@ syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) + unsigned long *args) { - BUG_ON(i + n > 6); - - memcpy(args, ®s->gpr[3 + i], n * sizeof(args[0])); + memcpy(args, ®s->gpr[3], 6 * sizeof(args[0])); } static inline void diff --git a/arch/parisc/include/asm/syscall.h b/arch/parisc/include/asm/syscall.h index 8bff1a58c97f..62a6d477fae0 100644 --- a/arch/parisc/include/asm/syscall.h +++ b/arch/parisc/include/asm/syscall.h @@ -18,29 +18,15 @@ static inline long syscall_get_nr(struct task_struct *tsk, } static inline void syscall_get_arguments(struct task_struct *tsk, - struct pt_regs *regs, unsigned int i, - unsigned int n, unsigned long *args) + struct pt_regs *regs, + unsigned long *args) { - BUG_ON(i); - - switch (n) { - case 6: - args[5] = regs->gr[21]; - case 5: - args[4] = regs->gr[22]; - case 4: - args[3] = regs->gr[23]; - case 3: - args[2] = regs->gr[24]; - case 2: - args[1] = regs->gr[25]; - case 1: - args[0] = regs->gr[26]; - case 0: - break; - default: - BUG(); - } + args[5] = regs->gr[21]; + args[4] = regs->gr[22]; + args[3] = regs->gr[23]; + args[2] = regs->gr[24]; + args[1] = regs->gr[25]; + args[0] = regs->gr[26]; } static inline long syscall_get_return_value(struct task_struct *task, diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h index 1a0e7a8b1c81..5c9b9dc82b7e 100644 --- a/arch/powerpc/include/asm/syscall.h +++ b/arch/powerpc/include/asm/syscall.h @@ -65,22 +65,20 @@ static inline void syscall_set_return_value(struct task_struct *task, static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { unsigned long val, mask = -1UL; - - BUG_ON(i + n > 6); + unsigned int n = 6; #ifdef CONFIG_COMPAT if (test_tsk_thread_flag(task, TIF_32BIT)) mask = 0xffffffff; #endif while (n--) { - if (n == 0 && i == 0) + if (n == 0) val = regs->orig_gpr3; else - val = regs->gpr[3 + i + n]; + val = regs->gpr[3 + n]; args[n] = val & mask; } diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h index 6ea9e1804233..6adca1804be1 100644 --- a/arch/riscv/include/asm/syscall.h +++ b/arch/riscv/include/asm/syscall.h @@ -72,18 +72,11 @@ static inline void syscall_set_return_value(struct task_struct *task, static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { - BUG_ON(i + n > 6); - if (i == 0) { - args[0] = regs->orig_a0; - args++; - n--; - } else { - i--; - } - memcpy(args, ®s->a1 + i, n * sizeof(args[0])); + args[0] = regs->orig_a0; + args++; + memcpy(args, ®s->a1, 5 * sizeof(args[0])); } static inline void syscall_set_arguments(struct task_struct *task, diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index 96f9a9151fde..ee0b1f6aa36d 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -56,27 +56,20 @@ static inline void syscall_set_return_value(struct task_struct *task, static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { unsigned long mask = -1UL; + unsigned int n = 6; - /* - * No arguments for this syscall, there's nothing to do. - */ - if (!n) - return; - - BUG_ON(i + n > 6); #ifdef CONFIG_COMPAT if (test_tsk_thread_flag(task, TIF_31BIT)) mask = 0xffffffff; #endif while (n-- > 0) - if (i + n > 0) - args[n] = regs->gprs[2 + i + n] & mask; - if (i == 0) - args[0] = regs->orig_gpr2 & mask; + if (n > 0) + args[n] = regs->gprs[2 + n] & mask; + + args[0] = regs->orig_gpr2 & mask; } static inline void syscall_set_arguments(struct task_struct *task, diff --git a/arch/sh/include/asm/syscall_32.h b/arch/sh/include/asm/syscall_32.h index 6e118799831c..2bf1199a0595 100644 --- a/arch/sh/include/asm/syscall_32.h +++ b/arch/sh/include/asm/syscall_32.h @@ -48,30 +48,16 @@ static inline void syscall_set_return_value(struct task_struct *task, static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { - /* - * Do this simply for now. If we need to start supporting - * fetching arguments from arbitrary indices, this will need some - * extra logic. Presently there are no in-tree users that depend - * on this behaviour. - */ - BUG_ON(i); /* Argument pattern is: R4, R5, R6, R7, R0, R1 */ - switch (n) { - case 6: args[5] = regs->regs[1]; - case 5: args[4] = regs->regs[0]; - case 4: args[3] = regs->regs[7]; - case 3: args[2] = regs->regs[6]; - case 2: args[1] = regs->regs[5]; - case 1: args[0] = regs->regs[4]; - case 0: - break; - default: - BUG(); - } + args[5] = regs->regs[1]; + args[4] = regs->regs[0]; + args[3] = regs->regs[7]; + args[2] = regs->regs[6]; + args[1] = regs->regs[5]; + args[0] = regs->regs[4]; } static inline void syscall_set_arguments(struct task_struct *task, diff --git a/arch/sh/include/asm/syscall_64.h b/arch/sh/include/asm/syscall_64.h index 43882580c7f9..4e8f6460c703 100644 --- a/arch/sh/include/asm/syscall_64.h +++ b/arch/sh/include/asm/syscall_64.h @@ -47,11 +47,9 @@ static inline void syscall_set_return_value(struct task_struct *task, static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { - BUG_ON(i + n > 6); - memcpy(args, ®s->regs[2 + i], n * sizeof(args[0])); + memcpy(args, ®s->regs[2], 6 * sizeof(args[0])); } static inline void syscall_set_arguments(struct task_struct *task, diff --git a/arch/sparc/include/asm/syscall.h b/arch/sparc/include/asm/syscall.h index 053989e3f6a6..872dfee852d6 100644 --- a/arch/sparc/include/asm/syscall.h +++ b/arch/sparc/include/asm/syscall.h @@ -96,11 +96,11 @@ static inline void syscall_set_return_value(struct task_struct *task, static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { int zero_extend = 0; unsigned int j; + unsigned int n = 6; #ifdef CONFIG_SPARC64 if (test_tsk_thread_flag(task, TIF_32BIT)) @@ -108,7 +108,7 @@ static inline void syscall_get_arguments(struct task_struct *task, #endif for (j = 0; j < n; j++) { - unsigned long val = regs->u_regs[UREG_I0 + i + j]; + unsigned long val = regs->u_regs[UREG_I0 + j]; if (zero_extend) args[j] = (u32) val; diff --git a/arch/um/include/asm/syscall-generic.h b/arch/um/include/asm/syscall-generic.h index 9fb9cf8cd39a..25d00acd1322 100644 --- a/arch/um/include/asm/syscall-generic.h +++ b/arch/um/include/asm/syscall-generic.h @@ -53,43 +53,16 @@ static inline void syscall_set_return_value(struct task_struct *task, static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { const struct uml_pt_regs *r = ®s->regs; - switch (i) { - case 0: - if (!n--) - break; - *args++ = UPT_SYSCALL_ARG1(r); - case 1: - if (!n--) - break; - *args++ = UPT_SYSCALL_ARG2(r); - case 2: - if (!n--) - break; - *args++ = UPT_SYSCALL_ARG3(r); - case 3: - if (!n--) - break; - *args++ = UPT_SYSCALL_ARG4(r); - case 4: - if (!n--) - break; - *args++ = UPT_SYSCALL_ARG5(r); - case 5: - if (!n--) - break; - *args++ = UPT_SYSCALL_ARG6(r); - case 6: - if (!n--) - break; - default: - BUG(); - break; - } + *args++ = UPT_SYSCALL_ARG1(r); + *args++ = UPT_SYSCALL_ARG2(r); + *args++ = UPT_SYSCALL_ARG3(r); + *args++ = UPT_SYSCALL_ARG4(r); + *args++ = UPT_SYSCALL_ARG5(r); + *args = UPT_SYSCALL_ARG6(r); } static inline void syscall_set_arguments(struct task_struct *task, diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index d653139857af..8dbb5c379450 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -91,11 +91,9 @@ static inline void syscall_set_return_value(struct task_struct *task, static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { - BUG_ON(i + n > 6); - memcpy(args, ®s->bx + i, n * sizeof(args[0])); + memcpy(args, ®s->bx, 6 * sizeof(args[0])); } static inline void syscall_set_arguments(struct task_struct *task, @@ -116,63 +114,26 @@ static inline int syscall_get_arch(void) static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { # ifdef CONFIG_IA32_EMULATION - if (task->thread_info.status & TS_COMPAT) - switch (i) { - case 0: - if (!n--) break; - *args++ = regs->bx; - case 1: - if (!n--) break; - *args++ = regs->cx; - case 2: - if (!n--) break; - *args++ = regs->dx; - case 3: - if (!n--) break; - *args++ = regs->si; - case 4: - if (!n--) break; - *args++ = regs->di; - case 5: - if (!n--) break; - *args++ = regs->bp; - case 6: - if (!n--) break; - default: - BUG(); - break; - } - else + if (task->thread_info.status & TS_COMPAT) { + *args++ = regs->bx; + *args++ = regs->cx; + *args++ = regs->dx; + *args++ = regs->si; + *args++ = regs->di; + *args = regs->bp; + } else # endif - switch (i) { - case 0: - if (!n--) break; - *args++ = regs->di; - case 1: - if (!n--) break; - *args++ = regs->si; - case 2: - if (!n--) break; - *args++ = regs->dx; - case 3: - if (!n--) break; - *args++ = regs->r10; - case 4: - if (!n--) break; - *args++ = regs->r8; - case 5: - if (!n--) break; - *args++ = regs->r9; - case 6: - if (!n--) break; - default: - BUG(); - break; - } + { + *args++ = regs->di; + *args++ = regs->si; + *args++ = regs->dx; + *args++ = regs->r10; + *args++ = regs->r8; + *args = regs->r9; + } } static inline void syscall_set_arguments(struct task_struct *task, diff --git a/arch/xtensa/include/asm/syscall.h b/arch/xtensa/include/asm/syscall.h index a168bf81c7f4..1504ce9a233a 100644 --- a/arch/xtensa/include/asm/syscall.h +++ b/arch/xtensa/include/asm/syscall.h @@ -59,23 +59,13 @@ static inline void syscall_set_return_value(struct task_struct *task, static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { static const unsigned int reg[] = XTENSA_SYSCALL_ARGUMENT_REGS; - unsigned int j; - - if (n == 0) - return; - - WARN_ON_ONCE(i + n > SYSCALL_MAX_ARGS); + unsigned int i; - for (j = 0; j < n; ++j) { - if (i + j < SYSCALL_MAX_ARGS) - args[j] = regs->areg[reg[i + j]]; - else - args[j] = 0; - } + for (i = 0; i < 6; ++i) + args[i] = regs->areg[reg[i]]; } static inline void syscall_set_arguments(struct task_struct *task, diff --git a/include/asm-generic/syscall.h b/include/asm-generic/syscall.h index 0c938a4354f6..269e9412ef42 100644 --- a/include/asm-generic/syscall.h +++ b/include/asm-generic/syscall.h @@ -105,21 +105,16 @@ void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, * syscall_get_arguments - extract system call parameter values * @task: task of interest, must be blocked * @regs: task_pt_regs() of @task - * @i: argument index [0,5] - * @n: number of arguments; n+i must be [1,6]. * @args: array filled with argument values * - * Fetches @n arguments to the system call starting with the @i'th argument - * (from 0 through 5). Argument @i is stored in @args[0], and so on. - * An arch inline version is probably optimal when @i and @n are constants. + * Fetches 6 arguments to the system call. First argument is stored in +* @args[0], and so on. * * It's only valid to call this when @task is stopped for tracing on * entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT. - * It's invalid to call this with @i + @n > 6; we only support system calls - * taking up to 6 arguments. */ void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args); + unsigned long *args); /** * syscall_set_arguments - change system call parameter value diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h index 44a3259ed4a5..b6e0cbc2c71f 100644 --- a/include/trace/events/syscalls.h +++ b/include/trace/events/syscalls.h @@ -28,7 +28,7 @@ TRACE_EVENT_FN(sys_enter, TP_fast_assign( __entry->id = id; - syscall_get_arguments(current, regs, 0, 6, __entry->args); + syscall_get_arguments(current, regs, __entry->args); ), TP_printk("NR %ld (%lx, %lx, %lx, %lx, %lx, %lx)", diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 54a0347ca812..df27e499956a 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -149,7 +149,7 @@ static void populate_seccomp_data(struct seccomp_data *sd) sd->nr = syscall_get_nr(task, regs); sd->arch = syscall_get_arch(); - syscall_get_arguments(task, regs, 0, 6, args); + syscall_get_arguments(task, regs, args); sd->args[0] = args[0]; sd->args[1] = args[1]; sd->args[2] = args[2]; diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index e9f5bbbad6d9..fa8fbff736d6 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -348,7 +348,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) entry = ring_buffer_event_data(event); entry->nr = syscall_nr; - syscall_get_arguments(current, regs, 0, 6, args); + syscall_get_arguments(current, regs, args); memcpy(entry->args, args, sizeof(unsigned long) * sys_data->nb_args); event_trigger_unlock_commit(trace_file, buffer, event, entry, @@ -616,7 +616,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) return; rec->nr = syscall_nr; - syscall_get_arguments(current, regs, 0, 6, args); + syscall_get_arguments(current, regs, args); memcpy(&rec->args, args, sizeof(unsigned long) * sys_data->nb_args); if ((valid_prog_array && diff --git a/lib/syscall.c b/lib/syscall.c index e8467e17b9a2..fb328e7ccb08 100644 --- a/lib/syscall.c +++ b/lib/syscall.c @@ -27,7 +27,7 @@ static int collect_syscall(struct task_struct *target, struct syscall_info *info info->data.nr = syscall_get_nr(target, regs); if (info->data.nr != -1L) - syscall_get_arguments(target, regs, 0, 6, + syscall_get_arguments(target, regs, (unsigned long *)&info->data.args[0]); put_task_stack(target); -- cgit v1.2.3 From 32d92586629a8b3637a3c9361709818e25f327ad Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 27 Mar 2019 20:07:31 -0400 Subject: syscalls: Remove start and number from syscall_set_arguments() args After removing the start and count arguments of syscall_get_arguments() it seems reasonable to remove them from syscall_set_arguments(). Note, as of today, there are no users of syscall_set_arguments(). But we are told that there will be soon. But for now, at least make it consistent with syscall_get_arguments(). Link: http://lkml.kernel.org/r/20190327222014.GA32540@altlinux.org Cc: Oleg Nesterov Cc: Kees Cook Cc: Andy Lutomirski Cc: Dominik Brodowski Cc: Dave Martin Cc: "Dmitry V. Levin" Cc: x86@kernel.org Cc: linux-snps-arc@lists.infradead.org Cc: linux-kernel@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-c6x-dev@linux-c6x.org Cc: uclinux-h8-devel@lists.sourceforge.jp Cc: linux-hexagon@vger.kernel.org Cc: linux-ia64@vger.kernel.org Cc: linux-mips@vger.kernel.org Cc: nios2-dev@lists.rocketboards.org Cc: openrisc@lists.librecores.org Cc: linux-parisc@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-riscv@lists.infradead.org Cc: linux-s390@vger.kernel.org Cc: linux-sh@vger.kernel.org Cc: sparclinux@vger.kernel.org Cc: linux-um@lists.infradead.org Cc: linux-xtensa@linux-xtensa.org Cc: linux-arch@vger.kernel.org Acked-by: Max Filippov # For xtensa changes Acked-by: Will Deacon # For the arm64 bits Reviewed-by: Thomas Gleixner # for x86 Reviewed-by: Dmitry V. Levin Signed-off-by: Steven Rostedt (VMware) --- arch/arm/include/asm/syscall.h | 22 ++--------- arch/arm64/include/asm/syscall.h | 22 ++--------- arch/c6x/include/asm/syscall.h | 38 +++---------------- arch/csky/include/asm/syscall.h | 14 ++----- arch/ia64/include/asm/syscall.h | 10 ++--- arch/ia64/kernel/ptrace.c | 7 ++-- arch/microblaze/include/asm/syscall.h | 4 +- arch/nds32/include/asm/syscall.h | 29 +++------------ arch/nios2/include/asm/syscall.h | 42 ++++----------------- arch/openrisc/include/asm/syscall.h | 6 +-- arch/powerpc/include/asm/syscall.h | 7 +--- arch/riscv/include/asm/syscall.h | 13 ++----- arch/s390/include/asm/syscall.h | 11 +++--- arch/sh/include/asm/syscall_32.h | 21 +++-------- arch/sh/include/asm/syscall_64.h | 4 +- arch/sparc/include/asm/syscall.h | 7 ++-- arch/um/include/asm/syscall-generic.h | 39 +++----------------- arch/x86/include/asm/syscall.h | 69 ++++++++--------------------------- arch/xtensa/include/asm/syscall.h | 17 ++------- include/asm-generic/syscall.h | 10 +---- 20 files changed, 88 insertions(+), 304 deletions(-) diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h index db969a2972ae..080ce70cab12 100644 --- a/arch/arm/include/asm/syscall.h +++ b/arch/arm/include/asm/syscall.h @@ -65,26 +65,12 @@ static inline void syscall_get_arguments(struct task_struct *task, static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args) { - if (n == 0) - return; - - if (i + n > SYSCALL_MAX_ARGS) { - pr_warn("%s called with max args %d, handling only %d\n", - __func__, i + n, SYSCALL_MAX_ARGS); - n = SYSCALL_MAX_ARGS - i; - } - - if (i == 0) { - regs->ARM_ORIG_r0 = args[0]; - args++; - i++; - n--; - } - - memcpy(®s->ARM_r0 + i, args, n * sizeof(args[0])); + regs->ARM_ORIG_r0 = args[0]; + args++; + + memcpy(®s->ARM_r0 + 1, args, 5 * sizeof(args[0])); } static inline int syscall_get_arch(void) diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index 55b2dab21023..a179df3674a1 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -75,26 +75,12 @@ static inline void syscall_get_arguments(struct task_struct *task, static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args) { - if (n == 0) - return; - - if (i + n > SYSCALL_MAX_ARGS) { - pr_warning("%s called with max args %d, handling only %d\n", - __func__, i + n, SYSCALL_MAX_ARGS); - n = SYSCALL_MAX_ARGS - i; - } - - if (i == 0) { - regs->orig_x0 = args[0]; - args++; - i++; - n--; - } - - memcpy(®s->regs[i], args, n * sizeof(args[0])); + regs->orig_x0 = args[0]; + args++; + + memcpy(®s->regs[1], args, 5 * sizeof(args[0])); } /* diff --git a/arch/c6x/include/asm/syscall.h b/arch/c6x/include/asm/syscall.h index 06db3251926b..15ba8599858e 100644 --- a/arch/c6x/include/asm/syscall.h +++ b/arch/c6x/include/asm/syscall.h @@ -59,40 +59,14 @@ static inline void syscall_get_arguments(struct task_struct *task, static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args) { - switch (i) { - case 0: - if (!n--) - break; - regs->a4 = *args++; - case 1: - if (!n--) - break; - regs->b4 = *args++; - case 2: - if (!n--) - break; - regs->a6 = *args++; - case 3: - if (!n--) - break; - regs->b6 = *args++; - case 4: - if (!n--) - break; - regs->a8 = *args++; - case 5: - if (!n--) - break; - regs->a9 = *args++; - case 6: - if (!n) - break; - default: - BUG(); - } + regs->a4 = *args++; + regs->b4 = *args++; + regs->a6 = *args++; + regs->b6 = *args++; + regs->a8 = *args++; + regs->a9 = *args; } #endif /* __ASM_C6X_SYSCALLS_H */ diff --git a/arch/csky/include/asm/syscall.h b/arch/csky/include/asm/syscall.h index c691fe92edc6..bda0a446c63e 100644 --- a/arch/csky/include/asm/syscall.h +++ b/arch/csky/include/asm/syscall.h @@ -52,17 +52,11 @@ syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args) + const unsigned long *args) { - BUG_ON(i + n > 6); - if (i == 0) { - regs->orig_a0 = args[0]; - args++; - n--; - } else { - i--; - } - memcpy(®s->a1 + i, args, n * sizeof(regs->a1)); + regs->orig_a0 = args[0]; + args++; + memcpy(®s->a1, args, 5 * sizeof(regs->a1)); } static inline int diff --git a/arch/ia64/include/asm/syscall.h b/arch/ia64/include/asm/syscall.h index 8204c1ff70ce..0d9e7fab4a79 100644 --- a/arch/ia64/include/asm/syscall.h +++ b/arch/ia64/include/asm/syscall.h @@ -59,23 +59,19 @@ static inline void syscall_set_return_value(struct task_struct *task, } extern void ia64_syscall_get_set_arguments(struct task_struct *task, - struct pt_regs *regs, unsigned int i, unsigned int n, - unsigned long *args, int rw); + struct pt_regs *regs, unsigned long *args, int rw); static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, unsigned long *args) { - ia64_syscall_get_set_arguments(task, regs, 0, 6, args, 0); + ia64_syscall_get_set_arguments(task, regs, args, 0); } static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { - BUG_ON(i + n > 6); - - ia64_syscall_get_set_arguments(task, regs, i, n, args, 1); + ia64_syscall_get_set_arguments(task, regs, args, 1); } static inline int syscall_get_arch(void) diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 6d50ede0ed69..bf9c24d9ce84 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -2179,12 +2179,11 @@ static void syscall_get_set_args_cb(struct unw_frame_info *info, void *data) } void ia64_syscall_get_set_arguments(struct task_struct *task, - struct pt_regs *regs, unsigned int i, unsigned int n, - unsigned long *args, int rw) + struct pt_regs *regs, unsigned long *args, int rw) { struct syscall_get_set_args data = { - .i = i, - .n = n, + .i = 0, + .n = 6, .args = args, .regs = regs, .rw = rw, diff --git a/arch/microblaze/include/asm/syscall.h b/arch/microblaze/include/asm/syscall.h index 4b23e44e5c3c..833d3a53dab3 100644 --- a/arch/microblaze/include/asm/syscall.h +++ b/arch/microblaze/include/asm/syscall.h @@ -93,9 +93,11 @@ static inline void syscall_get_arguments(struct task_struct *task, static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args) { + unsigned int i = 0; + unsigned int n = 6; + while (n--) microblaze_set_syscall_arg(regs, i++, *args++); } diff --git a/arch/nds32/include/asm/syscall.h b/arch/nds32/include/asm/syscall.h index 89a6ec8731d8..671ebd357496 100644 --- a/arch/nds32/include/asm/syscall.h +++ b/arch/nds32/include/asm/syscall.h @@ -129,39 +129,20 @@ void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, * syscall_set_arguments - change system call parameter value * @task: task of interest, must be in system call entry tracing * @regs: task_pt_regs() of @task - * @i: argument index [0,5] - * @n: number of arguments; n+i must be [1,6]. * @args: array of argument values to store * - * Changes @n arguments to the system call starting with the @i'th argument. - * Argument @i gets value @args[0], and so on. - * An arch inline version is probably optimal when @i and @n are constants. + * Changes 6 arguments to the system call. The first argument gets value + * @args[0], and so on. * * It's only valid to call this when @task is stopped for tracing on * entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT. - * It's invalid to call this with @i + @n > 6; we only support system calls - * taking up to 6 arguments. */ void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args) { - if (n == 0) - return; - - if (i + n > SYSCALL_MAX_ARGS) { - pr_warn("%s called with max args %d, handling only %d\n", - __func__, i + n, SYSCALL_MAX_ARGS); - n = SYSCALL_MAX_ARGS - i; - } - - if (i == 0) { - regs->orig_r0 = args[0]; - args++; - i++; - n--; - } + regs->orig_r0 = args[0]; + args++; - memcpy(®s->uregs[0] + i, args, n * sizeof(args[0])); + memcpy(®s->uregs[0] + 1, args, 5 * sizeof(args[0])); } #endif /* _ASM_NDS32_SYSCALL_H */ diff --git a/arch/nios2/include/asm/syscall.h b/arch/nios2/include/asm/syscall.h index 792bd449d839..d7624ed06efb 100644 --- a/arch/nios2/include/asm/syscall.h +++ b/arch/nios2/include/asm/syscall.h @@ -69,42 +69,14 @@ static inline void syscall_get_arguments(struct task_struct *task, } static inline void syscall_set_arguments(struct task_struct *task, - struct pt_regs *regs, unsigned int i, unsigned int n, - const unsigned long *args) + struct pt_regs *regs, const unsigned long *args) { - BUG_ON(i + n > 6); - - switch (i) { - case 0: - if (!n--) - break; - regs->r4 = *args++; - case 1: - if (!n--) - break; - regs->r5 = *args++; - case 2: - if (!n--) - break; - regs->r6 = *args++; - case 3: - if (!n--) - break; - regs->r7 = *args++; - case 4: - if (!n--) - break; - regs->r8 = *args++; - case 5: - if (!n--) - break; - regs->r9 = *args++; - case 6: - if (!n) - break; - default: - BUG(); - } + regs->r4 = *args++; + regs->r5 = *args++; + regs->r6 = *args++; + regs->r7 = *args++; + regs->r8 = *args++; + regs->r9 = *args; } #endif diff --git a/arch/openrisc/include/asm/syscall.h b/arch/openrisc/include/asm/syscall.h index 72607860cd55..b4ff07c1baed 100644 --- a/arch/openrisc/include/asm/syscall.h +++ b/arch/openrisc/include/asm/syscall.h @@ -63,11 +63,9 @@ syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args) + const unsigned long *args) { - BUG_ON(i + n > 6); - - memcpy(®s->gpr[3 + i], args, n * sizeof(args[0])); + memcpy(®s->gpr[3], args, 6 * sizeof(args[0])); } static inline int syscall_get_arch(void) diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h index 5c9b9dc82b7e..1243045bad2d 100644 --- a/arch/powerpc/include/asm/syscall.h +++ b/arch/powerpc/include/asm/syscall.h @@ -86,15 +86,12 @@ static inline void syscall_get_arguments(struct task_struct *task, static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args) { - BUG_ON(i + n > 6); - memcpy(®s->gpr[3 + i], args, n * sizeof(args[0])); + memcpy(®s->gpr[3], args, 6 * sizeof(args[0])); /* Also copy the first argument into orig_gpr3 */ - if (i == 0 && n > 0) - regs->orig_gpr3 = args[0]; + regs->orig_gpr3 = args[0]; } static inline int syscall_get_arch(void) diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h index 6adca1804be1..a3d5273ded7c 100644 --- a/arch/riscv/include/asm/syscall.h +++ b/arch/riscv/include/asm/syscall.h @@ -81,18 +81,11 @@ static inline void syscall_get_arguments(struct task_struct *task, static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args) { - BUG_ON(i + n > 6); - if (i == 0) { - regs->orig_a0 = args[0]; - args++; - n--; - } else { - i--; - } - memcpy(®s->a1 + i, args, n * sizeof(regs->a1)); + regs->orig_a0 = args[0]; + args++; + memcpy(®s->a1, args, 5 * sizeof(regs->a1)); } static inline int syscall_get_arch(void) diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index ee0b1f6aa36d..59c3e91f2cdb 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -74,15 +74,14 @@ static inline void syscall_get_arguments(struct task_struct *task, static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args) { - BUG_ON(i + n > 6); + unsigned int n = 6; + while (n-- > 0) - if (i + n > 0) - regs->gprs[2 + i + n] = args[n]; - if (i == 0) - regs->orig_gpr2 = args[0]; + if (n > 0) + regs->gprs[2 + n] = args[n]; + regs->orig_gpr2 = args[0]; } static inline int syscall_get_arch(void) diff --git a/arch/sh/include/asm/syscall_32.h b/arch/sh/include/asm/syscall_32.h index 2bf1199a0595..8c9d7e5e5dcc 100644 --- a/arch/sh/include/asm/syscall_32.h +++ b/arch/sh/include/asm/syscall_32.h @@ -62,23 +62,14 @@ static inline void syscall_get_arguments(struct task_struct *task, static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args) { - /* Same note as above applies */ - BUG_ON(i); - - switch (n) { - case 6: regs->regs[1] = args[5]; - case 5: regs->regs[0] = args[4]; - case 4: regs->regs[7] = args[3]; - case 3: regs->regs[6] = args[2]; - case 2: regs->regs[5] = args[1]; - case 1: regs->regs[4] = args[0]; - break; - default: - BUG(); - } + regs->regs[1] = args[5]; + regs->regs[0] = args[4]; + regs->regs[7] = args[3]; + regs->regs[6] = args[2]; + regs->regs[5] = args[1]; + regs->regs[4] = args[0]; } static inline int syscall_get_arch(void) diff --git a/arch/sh/include/asm/syscall_64.h b/arch/sh/include/asm/syscall_64.h index 4e8f6460c703..22fad97da066 100644 --- a/arch/sh/include/asm/syscall_64.h +++ b/arch/sh/include/asm/syscall_64.h @@ -54,11 +54,9 @@ static inline void syscall_get_arguments(struct task_struct *task, static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args) { - BUG_ON(i + n > 6); - memcpy(®s->regs[2 + i], args, n * sizeof(args[0])); + memcpy(®s->regs[2], args, 6 * sizeof(args[0])); } static inline int syscall_get_arch(void) diff --git a/arch/sparc/include/asm/syscall.h b/arch/sparc/include/asm/syscall.h index 872dfee852d6..4d075434e816 100644 --- a/arch/sparc/include/asm/syscall.h +++ b/arch/sparc/include/asm/syscall.h @@ -119,13 +119,12 @@ static inline void syscall_get_arguments(struct task_struct *task, static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args) { - unsigned int j; + unsigned int i; - for (j = 0; j < n; j++) - regs->u_regs[UREG_I0 + i + j] = args[j]; + for (i = 0; i < 6; i++) + regs->u_regs[UREG_I0 + i] = args[i]; } static inline int syscall_get_arch(void) diff --git a/arch/um/include/asm/syscall-generic.h b/arch/um/include/asm/syscall-generic.h index 25d00acd1322..98e50c50c12e 100644 --- a/arch/um/include/asm/syscall-generic.h +++ b/arch/um/include/asm/syscall-generic.h @@ -67,43 +67,16 @@ static inline void syscall_get_arguments(struct task_struct *task, static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args) { struct uml_pt_regs *r = ®s->regs; - switch (i) { - case 0: - if (!n--) - break; - UPT_SYSCALL_ARG1(r) = *args++; - case 1: - if (!n--) - break; - UPT_SYSCALL_ARG2(r) = *args++; - case 2: - if (!n--) - break; - UPT_SYSCALL_ARG3(r) = *args++; - case 3: - if (!n--) - break; - UPT_SYSCALL_ARG4(r) = *args++; - case 4: - if (!n--) - break; - UPT_SYSCALL_ARG5(r) = *args++; - case 5: - if (!n--) - break; - UPT_SYSCALL_ARG6(r) = *args++; - case 6: - if (!n--) - break; - default: - BUG(); - break; - } + UPT_SYSCALL_ARG1(r) = *args++; + UPT_SYSCALL_ARG2(r) = *args++; + UPT_SYSCALL_ARG3(r) = *args++; + UPT_SYSCALL_ARG4(r) = *args++; + UPT_SYSCALL_ARG5(r) = *args++; + UPT_SYSCALL_ARG6(r) = *args; } /* See arch/x86/um/asm/syscall.h for syscall_get_arch() definition. */ diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 8dbb5c379450..4c305471ec33 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -138,63 +138,26 @@ static inline void syscall_get_arguments(struct task_struct *task, static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args) { # ifdef CONFIG_IA32_EMULATION - if (task->thread_info.status & TS_COMPAT) - switch (i) { - case 0: - if (!n--) break; - regs->bx = *args++; - case 1: - if (!n--) break; - regs->cx = *args++; - case 2: - if (!n--) break; - regs->dx = *args++; - case 3: - if (!n--) break; - regs->si = *args++; - case 4: - if (!n--) break; - regs->di = *args++; - case 5: - if (!n--) break; - regs->bp = *args++; - case 6: - if (!n--) break; - default: - BUG(); - break; - } - else + if (task->thread_info.status & TS_COMPAT) { + regs->bx = *args++; + regs->cx = *args++; + regs->dx = *args++; + regs->si = *args++; + regs->di = *args++; + regs->bp = *args; + } else # endif - switch (i) { - case 0: - if (!n--) break; - regs->di = *args++; - case 1: - if (!n--) break; - regs->si = *args++; - case 2: - if (!n--) break; - regs->dx = *args++; - case 3: - if (!n--) break; - regs->r10 = *args++; - case 4: - if (!n--) break; - regs->r8 = *args++; - case 5: - if (!n--) break; - regs->r9 = *args++; - case 6: - if (!n--) break; - default: - BUG(); - break; - } + { + regs->di = *args++; + regs->si = *args++; + regs->dx = *args++; + regs->r10 = *args++; + regs->r8 = *args++; + regs->r9 = *args; + } } static inline int syscall_get_arch(void) diff --git a/arch/xtensa/include/asm/syscall.h b/arch/xtensa/include/asm/syscall.h index 1504ce9a233a..91dc06d58060 100644 --- a/arch/xtensa/include/asm/syscall.h +++ b/arch/xtensa/include/asm/syscall.h @@ -70,24 +70,13 @@ static inline void syscall_get_arguments(struct task_struct *task, static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args) { static const unsigned int reg[] = XTENSA_SYSCALL_ARGUMENT_REGS; - unsigned int j; - - if (n == 0) - return; - - if (WARN_ON_ONCE(i + n > SYSCALL_MAX_ARGS)) { - if (i < SYSCALL_MAX_ARGS) - n = SYSCALL_MAX_ARGS - i; - else - return; - } + unsigned int i; - for (j = 0; j < n; ++j) - regs->areg[reg[i + j]] = args[j]; + for (i = 0; i < 6; ++i) + regs->areg[reg[i]] = args[i]; } asmlinkage long xtensa_rt_sigreturn(struct pt_regs*); diff --git a/include/asm-generic/syscall.h b/include/asm-generic/syscall.h index 269e9412ef42..b88239e9efe4 100644 --- a/include/asm-generic/syscall.h +++ b/include/asm-generic/syscall.h @@ -120,21 +120,15 @@ void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, * syscall_set_arguments - change system call parameter value * @task: task of interest, must be in system call entry tracing * @regs: task_pt_regs() of @task - * @i: argument index [0,5] - * @n: number of arguments; n+i must be [1,6]. * @args: array of argument values to store * - * Changes @n arguments to the system call starting with the @i'th argument. - * Argument @i gets value @args[0], and so on. - * An arch inline version is probably optimal when @i and @n are constants. + * Changes 6 arguments to the system call. + * The first argument gets value @args[0], and so on. * * It's only valid to call this when @task is stopped for tracing on * entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT. - * It's invalid to call this with @i + @n > 6; we only support system calls - * taking up to 6 arguments. */ void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args); /** -- cgit v1.2.3 From f0d1762554014ce0ae347b9f0d088f2c157c8c72 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 5 Apr 2019 10:14:58 +0800 Subject: paride/pcd: Fix potential NULL pointer dereference and mem leak Syzkaller report this: pcd: pcd version 1.07, major 46, nice 0 pcd0: Autoprobe failed pcd: No CD-ROM drive found kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN PTI CPU: 1 PID: 4525 Comm: syz-executor.0 Not tainted 5.1.0-rc3+ #8 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 RIP: 0010:pcd_init+0x95c/0x1000 [pcd] Code: c4 ab f7 48 89 d8 48 c1 e8 03 80 3c 28 00 74 08 48 89 df e8 56 a3 da f7 4c 8b 23 49 8d bc 24 80 05 00 00 48 89 f8 48 c1 e8 03 <80> 3c 28 00 74 05 e8 39 a3 da f7 49 8b bc 24 80 05 00 00 e8 cc b2 RSP: 0018:ffff8881e84df880 EFLAGS: 00010202 RAX: 00000000000000b0 RBX: ffffffffc155a088 RCX: ffffffffc1508935 RDX: 0000000000040000 RSI: ffffc900014f0000 RDI: 0000000000000580 RBP: dffffc0000000000 R08: ffffed103ee658b8 R09: ffffed103ee658b8 R10: 0000000000000001 R11: ffffed103ee658b7 R12: 0000000000000000 R13: ffffffffc155a778 R14: ffffffffc155a4a8 R15: 0000000000000003 FS: 00007fe71bee3700(0000) GS:ffff8881f7300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055a7334441a8 CR3: 00000001e9674003 CR4: 00000000007606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? 0xffffffffc1508000 ? 0xffffffffc1508000 do_one_initcall+0xbc/0x47d init/main.c:901 do_init_module+0x1b5/0x547 kernel/module.c:3456 load_module+0x6405/0x8c10 kernel/module.c:3804 __do_sys_finit_module+0x162/0x190 kernel/module.c:3898 do_syscall_64+0x9f/0x450 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x462e99 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fe71bee2c58 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 RAX: ffffffffffffffda RBX: 000000000073bf00 RCX: 0000000000462e99 RDX: 0000000000000000 RSI: 0000000020000180 RDI: 0000000000000003 RBP: 00007fe71bee2c70 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fe71bee36bc R13: 00000000004bcefa R14: 00000000006f6fb0 R15: 0000000000000004 Modules linked in: pcd(+) paride solos_pci atm ts_fsm rtc_mt6397 mac80211 nhc_mobility nhc_udp nhc_ipv6 nhc_hop nhc_dest nhc_fragment nhc_routing 6lowpan rtc_cros_ec memconsole intel_xhci_usb_role_switch roles rtc_wm8350 usbcore industrialio_triggered_buffer kfifo_buf industrialio asc7621 dm_era dm_persistent_data dm_bufio dm_mod tpm gnss_ubx gnss_serial serdev gnss max2165 cpufreq_dt hid_penmount hid menf21bmc_wdt rc_core n_tracesink ide_gd_mod cdns_csi2tx v4l2_fwnode videodev media pinctrl_lewisburg pinctrl_intel iptable_security iptable_raw iptable_mangle iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 iptable_filter bpfilter ip6_vti ip_vti ip_gre ipip sit tunnel4 ip_tunnel hsr veth netdevsim vxcan batman_adv cfg80211 rfkill chnl_net caif nlmon dummy team bonding vcan bridge stp llc ip6_gre gre ip6_tunnel tunnel6 tun joydev mousedev ppdev kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel aesni_intel aes_x86_64 crypto_simd ide_pci_generic piix input_leds cryptd glue_helper psmouse ide_core intel_agp serio_raw intel_gtt ata_generic i2c_piix4 agpgart pata_acpi parport_pc parport floppy rtc_cmos sch_fq_codel ip_tables x_tables sha1_ssse3 sha1_generic ipv6 [last unloaded: bmc150_magn] Dumping ftrace buffer: (ftrace buffer empty) ---[ end trace d873691c3cd69f56 ]--- If alloc_disk fails in pcd_init_units, cd->disk will be NULL, however in pcd_detect and pcd_exit, it's not check this before free.It may result a NULL pointer dereference. Also when register_blkdev failed, blk_cleanup_queue() and blk_mq_free_tag_set() should be called to free resources. Reported-by: Hulk Robot Fixes: 81b74ac68c28 ("paride/pcd: cleanup queues when detection fails") Signed-off-by: YueHaibing Signed-off-by: Jens Axboe --- drivers/block/paride/pcd.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index 377a694dc228..6d415b20fb70 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -314,6 +314,7 @@ static void pcd_init_units(void) disk->queue = blk_mq_init_sq_queue(&cd->tag_set, &pcd_mq_ops, 1, BLK_MQ_F_SHOULD_MERGE); if (IS_ERR(disk->queue)) { + put_disk(disk); disk->queue = NULL; continue; } @@ -750,6 +751,8 @@ static int pcd_detect(void) printk("%s: No CD-ROM drive found\n", name); for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) { + if (!cd->disk) + continue; blk_cleanup_queue(cd->disk->queue); cd->disk->queue = NULL; blk_mq_free_tag_set(&cd->tag_set); @@ -1010,8 +1013,14 @@ static int __init pcd_init(void) pcd_probe_capabilities(); if (register_blkdev(major, name)) { - for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) + for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) { + if (!cd->disk) + continue; + + blk_cleanup_queue(cd->disk->queue); + blk_mq_free_tag_set(&cd->tag_set); put_disk(cd->disk); + } return -EBUSY; } @@ -1032,6 +1041,9 @@ static void __exit pcd_exit(void) int unit; for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) { + if (!cd->disk) + continue; + if (cd->present) { del_gendisk(cd->disk); pi_release(cd->pi); -- cgit v1.2.3 From fd9c40f64c514bdc585a21e2e33fa5f83ca8811b Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 4 Apr 2019 10:08:43 -0700 Subject: block: Revert v5.0 blk_mq_request_issue_directly() changes blk_mq_try_issue_directly() can return BLK_STS*_RESOURCE for requests that have been queued. If that happens when blk_mq_try_issue_directly() is called by the dm-mpath driver then dm-mpath will try to resubmit a request that is already queued and a kernel crash follows. Since it is nontrivial to fix blk_mq_request_issue_directly(), revert the blk_mq_request_issue_directly() changes that went into kernel v5.0. This patch reverts the following commits: * d6a51a97c0b2 ("blk-mq: replace and kill blk_mq_request_issue_directly") # v5.0. * 5b7a6f128aad ("blk-mq: issue directly with bypass 'false' in blk_mq_sched_insert_requests") # v5.0. * 7f556a44e61d ("blk-mq: refactor the code of issue request directly") # v5.0. Cc: Christoph Hellwig Cc: Ming Lei Cc: Jianchao Wang Cc: Hannes Reinecke Cc: Johannes Thumshirn Cc: James Smart Cc: Dongli Zhang Cc: Laurence Oberman Cc: Reported-by: Laurence Oberman Tested-by: Laurence Oberman Fixes: 7f556a44e61d ("blk-mq: refactor the code of issue request directly") # v5.0. Signed-off-by: Bart Van Assche Signed-off-by: Jens Axboe --- block/blk-core.c | 4 +- block/blk-mq-sched.c | 8 ++-- block/blk-mq.c | 122 ++++++++++++++++++++++++++------------------------- block/blk-mq.h | 6 +-- 4 files changed, 71 insertions(+), 69 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 4673ebe42255..a55389ba8779 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1245,8 +1245,6 @@ static int blk_cloned_rq_check_limits(struct request_queue *q, */ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq) { - blk_qc_t unused; - if (blk_cloned_rq_check_limits(q, rq)) return BLK_STS_IOERR; @@ -1262,7 +1260,7 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request * * bypass a potential scheduler on the bottom device for * insert. */ - return blk_mq_try_issue_directly(rq->mq_hctx, rq, &unused, true, true); + return blk_mq_request_issue_directly(rq, true); } EXPORT_SYMBOL_GPL(blk_insert_cloned_request); diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 40905539afed..aa6bc5c02643 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -423,10 +423,12 @@ void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx, * busy in case of 'none' scheduler, and this way may save * us one extra enqueue & dequeue to sw queue. */ - if (!hctx->dispatch_busy && !e && !run_queue_async) + if (!hctx->dispatch_busy && !e && !run_queue_async) { blk_mq_try_issue_list_directly(hctx, list); - else - blk_mq_insert_requests(hctx, ctx, list); + if (list_empty(list)) + return; + } + blk_mq_insert_requests(hctx, ctx, list); } blk_mq_run_hw_queue(hctx, run_queue_async); diff --git a/block/blk-mq.c b/block/blk-mq.c index ac61bcc67a89..a9354835cf51 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1801,74 +1801,76 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx, return ret; } -blk_status_t blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, +static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, struct request *rq, blk_qc_t *cookie, - bool bypass, bool last) + bool bypass_insert, bool last) { struct request_queue *q = rq->q; bool run_queue = true; - blk_status_t ret = BLK_STS_RESOURCE; - int srcu_idx; - bool force = false; - hctx_lock(hctx, &srcu_idx); /* - * hctx_lock is needed before checking quiesced flag. + * RCU or SRCU read lock is needed before checking quiesced flag. * - * When queue is stopped or quiesced, ignore 'bypass', insert - * and return BLK_STS_OK to caller, and avoid driver to try to - * dispatch again. + * When queue is stopped or quiesced, ignore 'bypass_insert' from + * blk_mq_request_issue_directly(), and return BLK_STS_OK to caller, + * and avoid driver to try to dispatch again. */ - if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q))) { + if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)) { run_queue = false; - bypass = false; - goto out_unlock; + bypass_insert = false; + goto insert; } - if (unlikely(q->elevator && !bypass)) - goto out_unlock; + if (q->elevator && !bypass_insert) + goto insert; if (!blk_mq_get_dispatch_budget(hctx)) - goto out_unlock; + goto insert; if (!blk_mq_get_driver_tag(rq)) { blk_mq_put_dispatch_budget(hctx); - goto out_unlock; + goto insert; } - /* - * Always add a request that has been through - *.queue_rq() to the hardware dispatch list. - */ - force = true; - ret = __blk_mq_issue_directly(hctx, rq, cookie, last); -out_unlock: + return __blk_mq_issue_directly(hctx, rq, cookie, last); +insert: + if (bypass_insert) + return BLK_STS_RESOURCE; + + blk_mq_request_bypass_insert(rq, run_queue); + return BLK_STS_OK; +} + +static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, + struct request *rq, blk_qc_t *cookie) +{ + blk_status_t ret; + int srcu_idx; + + might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING); + + hctx_lock(hctx, &srcu_idx); + + ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false, true); + if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) + blk_mq_request_bypass_insert(rq, true); + else if (ret != BLK_STS_OK) + blk_mq_end_request(rq, ret); + + hctx_unlock(hctx, srcu_idx); +} + +blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last) +{ + blk_status_t ret; + int srcu_idx; + blk_qc_t unused_cookie; + struct blk_mq_hw_ctx *hctx = rq->mq_hctx; + + hctx_lock(hctx, &srcu_idx); + ret = __blk_mq_try_issue_directly(hctx, rq, &unused_cookie, true, last); hctx_unlock(hctx, srcu_idx); - switch (ret) { - case BLK_STS_OK: - break; - case BLK_STS_DEV_RESOURCE: - case BLK_STS_RESOURCE: - if (force) { - blk_mq_request_bypass_insert(rq, run_queue); - /* - * We have to return BLK_STS_OK for the DM - * to avoid livelock. Otherwise, we return - * the real result to indicate whether the - * request is direct-issued successfully. - */ - ret = bypass ? BLK_STS_OK : ret; - } else if (!bypass) { - blk_mq_sched_insert_request(rq, false, - run_queue, false); - } - break; - default: - if (!bypass) - blk_mq_end_request(rq, ret); - break; - } return ret; } @@ -1876,20 +1878,22 @@ out_unlock: void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, struct list_head *list) { - blk_qc_t unused; - blk_status_t ret = BLK_STS_OK; - while (!list_empty(list)) { + blk_status_t ret; struct request *rq = list_first_entry(list, struct request, queuelist); list_del_init(&rq->queuelist); - if (ret == BLK_STS_OK) - ret = blk_mq_try_issue_directly(hctx, rq, &unused, - false, + ret = blk_mq_request_issue_directly(rq, list_empty(list)); + if (ret != BLK_STS_OK) { + if (ret == BLK_STS_RESOURCE || + ret == BLK_STS_DEV_RESOURCE) { + blk_mq_request_bypass_insert(rq, list_empty(list)); - else - blk_mq_sched_insert_request(rq, false, true, false); + break; + } + blk_mq_end_request(rq, ret); + } } /* @@ -1897,7 +1901,7 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, * the driver there was more coming, but that turned out to * be a lie. */ - if (ret != BLK_STS_OK && hctx->queue->mq_ops->commit_rqs) + if (!list_empty(list) && hctx->queue->mq_ops->commit_rqs) hctx->queue->mq_ops->commit_rqs(hctx); } @@ -2012,13 +2016,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) data.hctx = same_queue_rq->mq_hctx; trace_block_unplug(q, 1, true); blk_mq_try_issue_directly(data.hctx, same_queue_rq, - &cookie, false, true); + &cookie); } } else if ((q->nr_hw_queues > 1 && is_sync) || (!q->elevator && !data.hctx->dispatch_busy)) { blk_mq_put_ctx(data.ctx); blk_mq_bio_to_request(rq, bio); - blk_mq_try_issue_directly(data.hctx, rq, &cookie, false, true); + blk_mq_try_issue_directly(data.hctx, rq, &cookie); } else { blk_mq_put_ctx(data.ctx); blk_mq_bio_to_request(rq, bio); diff --git a/block/blk-mq.h b/block/blk-mq.h index d704fc7766f4..423ea88ab6fb 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -70,10 +70,8 @@ void blk_mq_request_bypass_insert(struct request *rq, bool run_queue); void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, struct list_head *list); -blk_status_t blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, - struct request *rq, - blk_qc_t *cookie, - bool bypass, bool last); +/* Used by blk_insert_cloned_request() to issue request directly */ +blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last); void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, struct list_head *list); -- cgit v1.2.3 From 325aa19598e410672175ed50982f902d4e3f31c5 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 25 Mar 2019 11:10:26 -0700 Subject: genirq: Respect IRQCHIP_SKIP_SET_WAKE in irq_chip_set_wake_parent() If a child irqchip calls irq_chip_set_wake_parent() but its parent irqchip has the IRQCHIP_SKIP_SET_WAKE flag set an error is returned. This is inconsistent behaviour vs. set_irq_wake_real() which returns 0 when the irqchip has the IRQCHIP_SKIP_SET_WAKE flag set. It doesn't attempt to walk the chain of parents and set irq wake on any chips that don't have the flag set either. If the intent is to call the .irq_set_wake() callback of the parent irqchip, then we expect irqchip implementations to omit the IRQCHIP_SKIP_SET_WAKE flag and implement an .irq_set_wake() function that calls irq_chip_set_wake_parent(). The problem has been observed on a Qualcomm sdm845 device where set wake fails on any GPIO interrupts after applying work in progress wakeup irq patches to the GPIO driver. The chain of chips looks like this: QCOM GPIO -> QCOM PDC (SKIP) -> ARM GIC (SKIP) The GPIO controllers parent is the QCOM PDC irqchip which in turn has ARM GIC as parent. The QCOM PDC irqchip has the IRQCHIP_SKIP_SET_WAKE flag set, and so does the grandparent ARM GIC. The GPIO driver doesn't know if the parent needs to set wake or not, so it unconditionally calls irq_chip_set_wake_parent() causing this function to return a failure because the parent irqchip (PDC) doesn't have the .irq_set_wake() callback set. Returning 0 instead makes everything work and irqs from the GPIO controller can be configured for wakeup. Make it consistent by returning 0 (success) from irq_chip_set_wake_parent() when a parent chip has IRQCHIP_SKIP_SET_WAKE set. [ tglx: Massaged changelog ] Fixes: 08b55e2a9208e ("genirq: Add irqchip_set_wake_parent") Signed-off-by: Stephen Boyd Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Cc: linux-arm-kernel@lists.infradead.org Cc: linux-gpio@vger.kernel.org Cc: Lina Iyer Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20190325181026.247796-1-swboyd@chromium.org --- kernel/irq/chip.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 3faef4a77f71..51128bea3846 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -1449,6 +1449,10 @@ int irq_chip_set_vcpu_affinity_parent(struct irq_data *data, void *vcpu_info) int irq_chip_set_wake_parent(struct irq_data *data, unsigned int on) { data = data->parent_data; + + if (data->chip->flags & IRQCHIP_SKIP_SET_WAKE) + return 0; + if (data->chip->irq_set_wake) return data->chip->irq_set_wake(data, on); -- cgit v1.2.3 From c7084edc3f6d67750f50d4183134c4fb5712a5c8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 5 Apr 2019 15:39:26 +0200 Subject: tty: mark Siemens R3964 line discipline as BROKEN The n_r3964 line discipline driver was written in a different time, when SMP machines were rare, and users were trusted to do the right thing. Since then, the world has moved on but not this code, it has stayed rooted in the past with its lovely hand-crafted list structures and loads of "interesting" race conditions all over the place. After attempting to clean up most of the issues, I just gave up and am now marking the driver as BROKEN so that hopefully someone who has this hardware will show up out of the woodwork (I know you are out there!) and will help with debugging a raft of changes that I had laying around for the code, but was too afraid to commit as odds are they would break things. Many thanks to Jann and Linus for pointing out the initial problems in this codebase, as well as many reviews of my attempts to fix the issues. It was a case of whack-a-mole, and as you can see, the mole won. Reported-by: Jann Horn Signed-off-by: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- drivers/char/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 72866a004f07..466ebd84ad17 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -348,7 +348,7 @@ config XILINX_HWICAP config R3964 tristate "Siemens R3964 line discipline" - depends on TTY + depends on TTY && BROKEN ---help--- This driver allows synchronous communication with devices using the Siemens R3964 packet protocol. Unless you are dealing with special -- cgit v1.2.3 From ede885ecb2cdf8a8dd5367702e3d964ec846a2d5 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 19 Mar 2019 15:19:56 -0700 Subject: kvm: svm: fix potential get_num_contig_pages overflow get_num_contig_pages() could potentially overflow int so make its type consistent with its usage. Reported-by: Cfir Cohen Cc: stable@vger.kernel.org Signed-off-by: David Rientjes Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 426039285fd1..947c35a1e545 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -6422,11 +6422,11 @@ e_free: return ret; } -static int get_num_contig_pages(int idx, struct page **inpages, - unsigned long npages) +static unsigned long get_num_contig_pages(unsigned long idx, + struct page **inpages, unsigned long npages) { unsigned long paddr, next_paddr; - int i = idx + 1, pages = 1; + unsigned long i = idx + 1, pages = 1; /* find the number of contiguous pages starting from idx */ paddr = __sme_page_pa(inpages[idx]); @@ -6445,12 +6445,12 @@ static int get_num_contig_pages(int idx, struct page **inpages, static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) { - unsigned long vaddr, vaddr_end, next_vaddr, npages, size; + unsigned long vaddr, vaddr_end, next_vaddr, npages, pages, size, i; struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct kvm_sev_launch_update_data params; struct sev_data_launch_update_data *data; struct page **inpages; - int i, ret, pages; + int ret; if (!sev_guest(kvm)) return -ENOTTY; -- cgit v1.2.3 From b86bc2858b389255cd44555ce4b1e427b2b770c0 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 25 Mar 2019 11:47:31 -0700 Subject: KVM: SVM: prevent DBG_DECRYPT and DBG_ENCRYPT overflow This ensures that the address and length provided to DBG_DECRYPT and DBG_ENCRYPT do not cause an overflow. At the same time, pass the actual number of pages pinned in memory to sev_unpin_memory() as a cleanup. Reported-by: Cfir Cohen Signed-off-by: David Rientjes Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 947c35a1e545..e0a791c3d4fc 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -6799,7 +6799,8 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec) struct page **src_p, **dst_p; struct kvm_sev_dbg debug; unsigned long n; - int ret, size; + unsigned int size; + int ret; if (!sev_guest(kvm)) return -ENOTTY; @@ -6807,6 +6808,11 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec) if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug))) return -EFAULT; + if (!debug.len || debug.src_uaddr + debug.len < debug.src_uaddr) + return -EINVAL; + if (!debug.dst_uaddr) + return -EINVAL; + vaddr = debug.src_uaddr; size = debug.len; vaddr_end = vaddr + size; @@ -6857,8 +6863,8 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec) dst_vaddr, len, &argp->error); - sev_unpin_memory(kvm, src_p, 1); - sev_unpin_memory(kvm, dst_p, 1); + sev_unpin_memory(kvm, src_p, n); + sev_unpin_memory(kvm, dst_p, n); if (ret) goto err; -- cgit v1.2.3 From acff78477b9b4f26ecdf65733a4ed77fe837e9dc Mon Sep 17 00:00:00 2001 From: Marc Orr Date: Mon, 1 Apr 2019 23:55:59 -0700 Subject: KVM: x86: nVMX: close leak of L0's x2APIC MSRs (CVE-2019-3887) The nested_vmx_prepare_msr_bitmap() function doesn't directly guard the x2APIC MSR intercepts with the "virtualize x2APIC mode" MSR. As a result, we discovered the potential for a buggy or malicious L1 to get access to L0's x2APIC MSRs, via an L2, as follows. 1. L1 executes WRMSR(IA32_SPEC_CTRL, 1). This causes the spec_ctrl variable, in nested_vmx_prepare_msr_bitmap() to become true. 2. L1 disables "virtualize x2APIC mode" in VMCS12. 3. L1 enables "APIC-register virtualization" in VMCS12. Now, KVM will set VMCS02's x2APIC MSR intercepts from VMCS12, and then set "virtualize x2APIC mode" to 0 in VMCS02. Oops. This patch closes the leak by explicitly guarding VMCS02's x2APIC MSR intercepts with VMCS12's "virtualize x2APIC mode" control. The scenario outlined above and fix prescribed here, were verified with a related patch in kvm-unit-tests titled "Add leak scenario to virt_x2apic_mode_test". Note, it looks like this issue may have been introduced inadvertently during a merge---see 15303ba5d1cd. Signed-off-by: Marc Orr Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 72 +++++++++++++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 28 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 153e539c29c9..897d70e3d291 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -500,6 +500,17 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1, } } +static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap) { + int msr; + + for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { + unsigned word = msr / BITS_PER_LONG; + + msr_bitmap[word] = ~0; + msr_bitmap[word + (0x800 / sizeof(long))] = ~0; + } +} + /* * Merge L0's and L1's MSR bitmap, return false to indicate that * we do not use the hardware. @@ -541,39 +552,44 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, return false; msr_bitmap_l1 = (unsigned long *)kmap(page); - if (nested_cpu_has_apic_reg_virt(vmcs12)) { - /* - * L0 need not intercept reads for MSRs between 0x800 and 0x8ff, it - * just lets the processor take the value from the virtual-APIC page; - * take those 256 bits directly from the L1 bitmap. - */ - for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { - unsigned word = msr / BITS_PER_LONG; - msr_bitmap_l0[word] = msr_bitmap_l1[word]; - msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0; - } - } else { - for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { - unsigned word = msr / BITS_PER_LONG; - msr_bitmap_l0[word] = ~0; - msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0; - } - } - nested_vmx_disable_intercept_for_msr( - msr_bitmap_l1, msr_bitmap_l0, - X2APIC_MSR(APIC_TASKPRI), - MSR_TYPE_W); + /* + * To keep the control flow simple, pay eight 8-byte writes (sixteen + * 4-byte writes on 32-bit systems) up front to enable intercepts for + * the x2APIC MSR range and selectively disable them below. + */ + enable_x2apic_msr_intercepts(msr_bitmap_l0); + + if (nested_cpu_has_virt_x2apic_mode(vmcs12)) { + if (nested_cpu_has_apic_reg_virt(vmcs12)) { + /* + * L0 need not intercept reads for MSRs between 0x800 + * and 0x8ff, it just lets the processor take the value + * from the virtual-APIC page; take those 256 bits + * directly from the L1 bitmap. + */ + for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { + unsigned word = msr / BITS_PER_LONG; + + msr_bitmap_l0[word] = msr_bitmap_l1[word]; + } + } - if (nested_cpu_has_vid(vmcs12)) { - nested_vmx_disable_intercept_for_msr( - msr_bitmap_l1, msr_bitmap_l0, - X2APIC_MSR(APIC_EOI), - MSR_TYPE_W); nested_vmx_disable_intercept_for_msr( msr_bitmap_l1, msr_bitmap_l0, - X2APIC_MSR(APIC_SELF_IPI), + X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_W); + + if (nested_cpu_has_vid(vmcs12)) { + nested_vmx_disable_intercept_for_msr( + msr_bitmap_l1, msr_bitmap_l0, + X2APIC_MSR(APIC_EOI), + MSR_TYPE_W); + nested_vmx_disable_intercept_for_msr( + msr_bitmap_l1, msr_bitmap_l0, + X2APIC_MSR(APIC_SELF_IPI), + MSR_TYPE_W); + } } if (spec_ctrl) -- cgit v1.2.3 From c73f4c998e1fd4249b9edfa39e23f4fda2b9b041 Mon Sep 17 00:00:00 2001 From: Marc Orr Date: Mon, 1 Apr 2019 23:56:00 -0700 Subject: KVM: x86: nVMX: fix x2APIC VTPR read intercept Referring to the "VIRTUALIZING MSR-BASED APIC ACCESSES" chapter of the SDM, when "virtualize x2APIC mode" is 1 and "APIC-register virtualization" is 0, a RDMSR of 808H should return the VTPR from the virtual APIC page. However, for nested, KVM currently fails to disable the read intercept for this MSR. This means that a RDMSR exit takes precedence over "virtualize x2APIC mode", and KVM passes through L1's TPR to L2, instead of sourcing the value from L2's virtual APIC page. This patch fixes the issue by disabling the read intercept, in VMCS02, for the VTPR when "APIC-register virtualization" is 0. The issue described above and fix prescribed here, were verified with a related patch in kvm-unit-tests titled "Test VMX's virtualize x2APIC mode w/ nested". Signed-off-by: Marc Orr Reviewed-by: Jim Mattson Fixes: c992384bde84f ("KVM: vmx: speed up MSR bitmap merge") Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 897d70e3d291..7ec9bb1dd723 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -578,7 +578,7 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, nested_vmx_disable_intercept_for_msr( msr_bitmap_l1, msr_bitmap_l0, X2APIC_MSR(APIC_TASKPRI), - MSR_TYPE_W); + MSR_TYPE_R | MSR_TYPE_W); if (nested_cpu_has_vid(vmcs12)) { nested_vmx_disable_intercept_for_msr( -- cgit v1.2.3 From 9cde402a59770a0669d895399c13407f63d7d209 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 5 Apr 2019 16:20:47 +0100 Subject: PCI: Add function 1 DMA alias quirk for Marvell 9170 SATA controller There is a Marvell 88SE9170 PCIe SATA controller I found on a board here. Some quick testing with the ARM SMMU enabled reveals that it suffers from the same requester ID mixup problems as the other Marvell chips listed already. Add the PCI vendor/device ID to the list of chips which need the workaround. Signed-off-by: Andre Przywara Signed-off-by: Bjorn Helgaas CC: stable@vger.kernel.org --- drivers/pci/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index a59ad09ce911..a077f67fe1da 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3877,6 +3877,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9128, /* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c14 */ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9130, quirk_dma_func1_alias); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9170, + quirk_dma_func1_alias); /* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c47 + c57 */ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9172, quirk_dma_func1_alias); -- cgit v1.2.3 From 4ed319c6ac08e9a28fca7ac188181ac122f4de84 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 5 Apr 2019 15:26:39 -0400 Subject: dm integrity: fix deadlock with overlapping I/O dm-integrity will deadlock if overlapping I/O is issued to it, the bug was introduced by commit 724376a04d1a ("dm integrity: implement fair range locks"). Users rarely use overlapping I/O so this bug went undetected until now. Fix this bug by correcting, likely cut-n-paste, typos in ranges_overlap() and also remove a flawed ranges_overlap() check in remove_range_unlocked(). This condition could leave unprocessed bios hanging on wait_list forever. Cc: stable@vger.kernel.org # v4.19+ Fixes: 724376a04d1a ("dm integrity: implement fair range locks") Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 94b865c5ce71..7c678f50aaa3 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -913,7 +913,7 @@ static void copy_from_journal(struct dm_integrity_c *ic, unsigned section, unsig static bool ranges_overlap(struct dm_integrity_range *range1, struct dm_integrity_range *range2) { return range1->logical_sector < range2->logical_sector + range2->n_sectors && - range2->logical_sector + range2->n_sectors > range2->logical_sector; + range1->logical_sector + range1->n_sectors > range2->logical_sector; } static bool add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range, bool check_waiting) @@ -959,8 +959,6 @@ static void remove_range_unlocked(struct dm_integrity_c *ic, struct dm_integrity struct dm_integrity_range *last_range = list_first_entry(&ic->wait_list, struct dm_integrity_range, wait_entry); struct task_struct *last_range_task; - if (!ranges_overlap(range, last_range)) - break; last_range_task = last_range->task; list_del(&last_range->wait_entry); if (!add_new_range(ic, last_range, false)) { -- cgit v1.2.3 From d58431eacb226222430940134d97bfd72f292fcd Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 5 Apr 2019 11:34:40 +1100 Subject: sunrpc: don't mark uninitialised items as VALID. A recent commit added a call to cache_fresh_locked() when an expired item was found. The call sets the CACHE_VALID flag, so it is important that the item actually is valid. There are two ways it could be valid: 1/ If ->update has been called to fill in relevant content 2/ if CACHE_NEGATIVE is set, to say that content doesn't exist. An expired item that is waiting for an update will be neither. Setting CACHE_VALID will mean that a subsequent call to cache_put() will be likely to dereference uninitialised pointers. So we must make sure the item is valid, and we already have code to do that in try_to_negate_entry(). This takes the hash lock and so cannot be used directly, so take out the two lines that we need and use them. Now cache_fresh_locked() is certain to be called only on a valid item. Cc: stable@kernel.org # 2.6.35 Fixes: 4ecd55ea0742 ("sunrpc: fix cache_head leak due to queued request") Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- net/sunrpc/cache.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 12bb23b8e0c5..261131dfa1f1 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -54,6 +54,7 @@ static void cache_init(struct cache_head *h, struct cache_detail *detail) h->last_refresh = now; } +static inline int cache_is_valid(struct cache_head *h); static void cache_fresh_locked(struct cache_head *head, time_t expiry, struct cache_detail *detail); static void cache_fresh_unlocked(struct cache_head *head, @@ -105,6 +106,8 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail, if (cache_is_expired(detail, tmp)) { hlist_del_init_rcu(&tmp->cache_list); detail->entries --; + if (cache_is_valid(tmp) == -EAGAIN) + set_bit(CACHE_NEGATIVE, &tmp->flags); cache_fresh_locked(tmp, 0, detail); freeme = tmp; break; -- cgit v1.2.3 From 3c86794ac0e6582eea7733619d58ea150198502f Mon Sep 17 00:00:00 2001 From: Murphy Zhou Date: Thu, 4 Apr 2019 14:57:11 +0800 Subject: nfsd/nfsd3_proc_readdir: fix buffer count and page pointers After this commit f875a79 nfsd: allow nfsv3 readdir request to be larger. nfsv3 readdir request size can be larger than PAGE_SIZE. So if the directory been read is large enough, we can use multiple pages in rq_respages. Update buffer count and page pointers like we do in readdirplus to make this happen. Now listing a directory within 3000 files will panic because we are counting in a wrong way and would write on random page. Fixes: f875a79 "nfsd: allow nfsv3 readdir request to be larger" Signed-off-by: Murphy Zhou Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs3proc.c | 17 +++++++++++++++-- fs/nfsd/nfs3xdr.c | 11 +++++++++-- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 8f933e84cec1..9bc32af4e2da 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -442,7 +442,9 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp) struct nfsd3_readdirargs *argp = rqstp->rq_argp; struct nfsd3_readdirres *resp = rqstp->rq_resp; __be32 nfserr; - int count; + int count = 0; + struct page **p; + caddr_t page_addr = NULL; dprintk("nfsd: READDIR(3) %s %d bytes at %d\n", SVCFH_fmt(&argp->fh), @@ -462,7 +464,18 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp) nfserr = nfsd_readdir(rqstp, &resp->fh, (loff_t*) &argp->cookie, &resp->common, nfs3svc_encode_entry); memcpy(resp->verf, argp->verf, 8); - resp->count = resp->buffer - argp->buffer; + count = 0; + for (p = rqstp->rq_respages + 1; p < rqstp->rq_next_page; p++) { + page_addr = page_address(*p); + + if (((caddr_t)resp->buffer >= page_addr) && + ((caddr_t)resp->buffer < page_addr + PAGE_SIZE)) { + count += (caddr_t)resp->buffer - page_addr; + break; + } + count += PAGE_SIZE; + } + resp->count = count >> 2; if (resp->offset) { loff_t offset = argp->cookie; diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 93fea246f676..8d789124ed3c 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -573,6 +573,7 @@ int nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_readdirargs *args = rqstp->rq_argp; + int len; u32 max_blocksize = svc_max_payload(rqstp); p = decode_fh(p, &args->fh); @@ -582,8 +583,14 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p) args->verf = p; p += 2; args->dircount = ~0; args->count = ntohl(*p++); - args->count = min_t(u32, args->count, max_blocksize); - args->buffer = page_address(*(rqstp->rq_next_page++)); + len = args->count = min_t(u32, args->count, max_blocksize); + + while (len > 0) { + struct page *p = *(rqstp->rq_next_page++); + if (!args->buffer) + args->buffer = page_address(p); + len -= PAGE_SIZE; + } return xdr_argsize_check(rqstp, p); } -- cgit v1.2.3 From 5f074f3e192f10c9fade898b9b3b8812e3d83342 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Fri, 5 Apr 2019 18:38:45 -0700 Subject: lib/string.c: implement a basic bcmp A recent optimization in Clang (r355672) lowers comparisons of the return value of memcmp against zero to comparisons of the return value of bcmp against zero. This helps some platforms that implement bcmp more efficiently than memcmp. glibc simply aliases bcmp to memcmp, but an optimized implementation is in the works. This results in linkage failures for all targets with Clang due to the undefined symbol. For now, just implement bcmp as a tailcail to memcmp to unbreak the build. This routine can be further optimized in the future. Other ideas discussed: * A weak alias was discussed, but breaks for architectures that define their own implementations of memcmp since aliases to declarations are not permitted (only definitions). Arch-specific memcmp implementations typically declare memcmp in C headers, but implement them in assembly. * -ffreestanding also is used sporadically throughout the kernel. * -fno-builtin-bcmp doesn't work when doing LTO. Link: https://bugs.llvm.org/show_bug.cgi?id=41035 Link: https://code.woboq.org/userspace/glibc/string/memcmp.c.html#bcmp Link: https://github.com/llvm/llvm-project/commit/8e16d73346f8091461319a7dfc4ddd18eedcff13 Link: https://github.com/ClangBuiltLinux/linux/issues/416 Link: http://lkml.kernel.org/r/20190313211335.165605-1-ndesaulniers@google.com Signed-off-by: Nick Desaulniers Reported-by: Nathan Chancellor Reported-by: Adhemerval Zanella Suggested-by: Arnd Bergmann Suggested-by: James Y Knight Suggested-by: Masahiro Yamada Suggested-by: Nathan Chancellor Suggested-by: Rasmus Villemoes Acked-by: Steven Rostedt (VMware) Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Reviewed-by: Masahiro Yamada Reviewed-by: Andy Shevchenko Cc: David Laight Cc: Rasmus Villemoes Cc: Namhyung Kim Cc: Greg Kroah-Hartman Cc: Alexander Shishkin Cc: Dan Williams Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/string.h | 3 +++ lib/string.c | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/include/linux/string.h b/include/linux/string.h index 7927b875f80c..6ab0a6fa512e 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -150,6 +150,9 @@ extern void * memscan(void *,int,__kernel_size_t); #ifndef __HAVE_ARCH_MEMCMP extern int memcmp(const void *,const void *,__kernel_size_t); #endif +#ifndef __HAVE_ARCH_BCMP +extern int bcmp(const void *,const void *,__kernel_size_t); +#endif #ifndef __HAVE_ARCH_MEMCHR extern void * memchr(const void *,int,__kernel_size_t); #endif diff --git a/lib/string.c b/lib/string.c index 38e4ca08e757..3ab861c1a857 100644 --- a/lib/string.c +++ b/lib/string.c @@ -866,6 +866,26 @@ __visible int memcmp(const void *cs, const void *ct, size_t count) EXPORT_SYMBOL(memcmp); #endif +#ifndef __HAVE_ARCH_BCMP +/** + * bcmp - returns 0 if and only if the buffers have identical contents. + * @a: pointer to first buffer. + * @b: pointer to second buffer. + * @len: size of buffers. + * + * The sign or magnitude of a non-zero return value has no particular + * meaning, and architectures may implement their own more efficient bcmp(). So + * while this particular implementation is a simple (tail) call to memcmp, do + * not rely on anything but whether the return value is zero or non-zero. + */ +#undef bcmp +int bcmp(const void *a, const void *b, size_t len) +{ + return memcmp(a, b, len); +} +EXPORT_SYMBOL(bcmp); +#endif + #ifndef __HAVE_ARCH_MEMSCAN /** * memscan - Find a character in an area of memory. -- cgit v1.2.3 From 298a32b132087550d3fa80641ca58323c5dfd4d9 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 5 Apr 2019 18:38:49 -0700 Subject: kmemleak: powerpc: skip scanning holes in the .bss section Commit 2d4f567103ff ("KVM: PPC: Introduce kvm_tmp framework") adds kvm_tmp[] into the .bss section and then free the rest of unused spaces back to the page allocator. kernel_init kvm_guest_init kvm_free_tmp free_reserved_area free_unref_page free_unref_page_prepare With DEBUG_PAGEALLOC=y, it will unmap those pages from kernel. As the result, kmemleak scan will trigger a panic when it scans the .bss section with unmapped pages. This patch creates dedicated kmemleak objects for the .data, .bss and potentially .data..ro_after_init sections to allow partial freeing via the kmemleak_free_part() in the powerpc kvm_free_tmp() function. Link: http://lkml.kernel.org/r/20190321171917.62049-1-catalin.marinas@arm.com Signed-off-by: Catalin Marinas Reported-by: Qian Cai Acked-by: Michael Ellerman (powerpc) Tested-by: Qian Cai Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Cc: Avi Kivity Cc: Paolo Bonzini Cc: Radim Krcmar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/kvm.c | 7 +++++++ mm/kmemleak.c | 16 +++++++++++----- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 683b5b3805bd..cd381e2291df 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -712,6 +713,12 @@ static void kvm_use_magic_page(void) static __init void kvm_free_tmp(void) { + /* + * Inform kmemleak about the hole in the .bss section since the + * corresponding pages will be unmapped with DEBUG_PAGEALLOC=y. + */ + kmemleak_free_part(&kvm_tmp[kvm_tmp_index], + ARRAY_SIZE(kvm_tmp) - kvm_tmp_index); free_reserved_area(&kvm_tmp[kvm_tmp_index], &kvm_tmp[ARRAY_SIZE(kvm_tmp)], -1, NULL); } diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 707fa5579f66..6c318f5ac234 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1529,11 +1529,6 @@ static void kmemleak_scan(void) } rcu_read_unlock(); - /* data/bss scanning */ - scan_large_block(_sdata, _edata); - scan_large_block(__bss_start, __bss_stop); - scan_large_block(__start_ro_after_init, __end_ro_after_init); - #ifdef CONFIG_SMP /* per-cpu sections scanning */ for_each_possible_cpu(i) @@ -2071,6 +2066,17 @@ void __init kmemleak_init(void) } local_irq_restore(flags); + /* register the data/bss sections */ + create_object((unsigned long)_sdata, _edata - _sdata, + KMEMLEAK_GREY, GFP_ATOMIC); + create_object((unsigned long)__bss_start, __bss_stop - __bss_start, + KMEMLEAK_GREY, GFP_ATOMIC); + /* only register .data..ro_after_init if not within .data */ + if (__start_ro_after_init < _sdata || __end_ro_after_init > _edata) + create_object((unsigned long)__start_ro_after_init, + __end_ro_after_init - __start_ro_after_init, + KMEMLEAK_GREY, GFP_ATOMIC); + /* * This is the point where tracking allocations is safe. Automatic * scanning is started during the late initcall. Add the early logged -- cgit v1.2.3 From 6147e136ff5071609b54f18982dea87706288e21 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 5 Apr 2019 18:38:53 -0700 Subject: include/linux/bitrev.h: fix constant bitrev clang points out with hundreds of warnings that the bitrev macros have a problem with constant input: drivers/hwmon/sht15.c:187:11: error: variable '__x' is uninitialized when used within its own initialization [-Werror,-Wuninitialized] u8 crc = bitrev8(data->val_status & 0x0F); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/bitrev.h:102:21: note: expanded from macro 'bitrev8' __constant_bitrev8(__x) : \ ~~~~~~~~~~~~~~~~~~~^~~~ include/linux/bitrev.h:67:11: note: expanded from macro '__constant_bitrev8' u8 __x = x; \ ~~~ ^ Both the bitrev and the __constant_bitrev macros use an internal variable named __x, which goes horribly wrong when passing one to the other. The obvious fix is to rename one of the variables, so this adds an extra '_'. It seems we got away with this because - there are only a few drivers using bitrev macros - usually there are no constant arguments to those - when they are constant, they tend to be either 0 or (unsigned)-1 (drivers/isdn/i4l/isdnhdlc.o, drivers/iio/amplifiers/ad8366.c) and give the correct result by pure chance. In fact, the only driver that I could find that gets different results with this is drivers/net/wan/slic_ds26522.c, which in turn is a driver for fairly rare hardware (adding the maintainer to Cc for testing). Link: http://lkml.kernel.org/r/20190322140503.123580-1-arnd@arndb.de Fixes: 556d2f055bf6 ("ARM: 8187/1: add CONFIG_HAVE_ARCH_BITREVERSE to support rbit instruction") Signed-off-by: Arnd Bergmann Reviewed-by: Nick Desaulniers Cc: Zhao Qiang Cc: Yalin Wang Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitrev.h | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/include/linux/bitrev.h b/include/linux/bitrev.h index 50fb0dee23e8..d35b8ec1c485 100644 --- a/include/linux/bitrev.h +++ b/include/linux/bitrev.h @@ -34,41 +34,41 @@ static inline u32 __bitrev32(u32 x) #define __constant_bitrev32(x) \ ({ \ - u32 __x = x; \ - __x = (__x >> 16) | (__x << 16); \ - __x = ((__x & (u32)0xFF00FF00UL) >> 8) | ((__x & (u32)0x00FF00FFUL) << 8); \ - __x = ((__x & (u32)0xF0F0F0F0UL) >> 4) | ((__x & (u32)0x0F0F0F0FUL) << 4); \ - __x = ((__x & (u32)0xCCCCCCCCUL) >> 2) | ((__x & (u32)0x33333333UL) << 2); \ - __x = ((__x & (u32)0xAAAAAAAAUL) >> 1) | ((__x & (u32)0x55555555UL) << 1); \ - __x; \ + u32 ___x = x; \ + ___x = (___x >> 16) | (___x << 16); \ + ___x = ((___x & (u32)0xFF00FF00UL) >> 8) | ((___x & (u32)0x00FF00FFUL) << 8); \ + ___x = ((___x & (u32)0xF0F0F0F0UL) >> 4) | ((___x & (u32)0x0F0F0F0FUL) << 4); \ + ___x = ((___x & (u32)0xCCCCCCCCUL) >> 2) | ((___x & (u32)0x33333333UL) << 2); \ + ___x = ((___x & (u32)0xAAAAAAAAUL) >> 1) | ((___x & (u32)0x55555555UL) << 1); \ + ___x; \ }) #define __constant_bitrev16(x) \ ({ \ - u16 __x = x; \ - __x = (__x >> 8) | (__x << 8); \ - __x = ((__x & (u16)0xF0F0U) >> 4) | ((__x & (u16)0x0F0FU) << 4); \ - __x = ((__x & (u16)0xCCCCU) >> 2) | ((__x & (u16)0x3333U) << 2); \ - __x = ((__x & (u16)0xAAAAU) >> 1) | ((__x & (u16)0x5555U) << 1); \ - __x; \ + u16 ___x = x; \ + ___x = (___x >> 8) | (___x << 8); \ + ___x = ((___x & (u16)0xF0F0U) >> 4) | ((___x & (u16)0x0F0FU) << 4); \ + ___x = ((___x & (u16)0xCCCCU) >> 2) | ((___x & (u16)0x3333U) << 2); \ + ___x = ((___x & (u16)0xAAAAU) >> 1) | ((___x & (u16)0x5555U) << 1); \ + ___x; \ }) #define __constant_bitrev8x4(x) \ ({ \ - u32 __x = x; \ - __x = ((__x & (u32)0xF0F0F0F0UL) >> 4) | ((__x & (u32)0x0F0F0F0FUL) << 4); \ - __x = ((__x & (u32)0xCCCCCCCCUL) >> 2) | ((__x & (u32)0x33333333UL) << 2); \ - __x = ((__x & (u32)0xAAAAAAAAUL) >> 1) | ((__x & (u32)0x55555555UL) << 1); \ - __x; \ + u32 ___x = x; \ + ___x = ((___x & (u32)0xF0F0F0F0UL) >> 4) | ((___x & (u32)0x0F0F0F0FUL) << 4); \ + ___x = ((___x & (u32)0xCCCCCCCCUL) >> 2) | ((___x & (u32)0x33333333UL) << 2); \ + ___x = ((___x & (u32)0xAAAAAAAAUL) >> 1) | ((___x & (u32)0x55555555UL) << 1); \ + ___x; \ }) #define __constant_bitrev8(x) \ ({ \ - u8 __x = x; \ - __x = (__x >> 4) | (__x << 4); \ - __x = ((__x & (u8)0xCCU) >> 2) | ((__x & (u8)0x33U) << 2); \ - __x = ((__x & (u8)0xAAU) >> 1) | ((__x & (u8)0x55U) << 1); \ - __x; \ + u8 ___x = x; \ + ___x = (___x >> 4) | (___x << 4); \ + ___x = ((___x & (u8)0xCCU) >> 2) | ((___x & (u8)0x33U) << 2); \ + ___x = ((___x & (u8)0xAAU) >> 1) | ((___x & (u8)0x55U) << 1); \ + ___x; \ }) #define bitrev32(x) \ -- cgit v1.2.3 From b11ed18efa8f3dc58b259b812588317b765b1cfc Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Fri, 5 Apr 2019 18:38:58 -0700 Subject: lib/lzo: fix bugs for very short or empty input For very short input data (0 - 1 bytes), lzo-rle was not behaving correctly. Fix this behaviour and update documentation accordingly. For zero-length input, lzo v0 outputs an end-of-stream marker only, which was misinterpreted by lzo-rle as a bitstream version number. Ensure bitstream versions > 0 require a minimum stream length of 5. Also fixes a bug in handling the tail for very short inputs when a bitstream version is present. Link: http://lkml.kernel.org/r/20190326165857.34613-1-dave.rodgman@arm.com Signed-off-by: Dave Rodgman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/lzo.txt | 8 +++++--- lib/lzo/lzo1x_compress.c | 9 ++++++--- lib/lzo/lzo1x_decompress_safe.c | 4 +--- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/Documentation/lzo.txt b/Documentation/lzo.txt index f79934225d8d..ca983328976b 100644 --- a/Documentation/lzo.txt +++ b/Documentation/lzo.txt @@ -102,9 +102,11 @@ Byte sequences dictionary which is empty, and that it will always be invalid at this place. - 17 : bitstream version. If the first byte is 17, the next byte - gives the bitstream version (version 1 only). If the first byte - is not 17, the bitstream version is 0. + 17 : bitstream version. If the first byte is 17, and compressed + stream length is at least 5 bytes (length of shortest possible + versioned bitstream), the next byte gives the bitstream version + (version 1 only). + Otherwise, the bitstream version is 0. 18..21 : copy 0..3 literals state = (byte - 17) = 0..3 [ copy literals ] diff --git a/lib/lzo/lzo1x_compress.c b/lib/lzo/lzo1x_compress.c index 4525fb094844..a8ede77afe0d 100644 --- a/lib/lzo/lzo1x_compress.c +++ b/lib/lzo/lzo1x_compress.c @@ -291,13 +291,14 @@ int lzogeneric1x_1_compress(const unsigned char *in, size_t in_len, { const unsigned char *ip = in; unsigned char *op = out; + unsigned char *data_start; size_t l = in_len; size_t t = 0; signed char state_offset = -2; unsigned int m4_max_offset; - // LZO v0 will never write 17 as first byte, - // so this is used to version the bitstream + // LZO v0 will never write 17 as first byte (except for zero-length + // input), so this is used to version the bitstream if (bitstream_version > 0) { *op++ = 17; *op++ = bitstream_version; @@ -306,6 +307,8 @@ int lzogeneric1x_1_compress(const unsigned char *in, size_t in_len, m4_max_offset = M4_MAX_OFFSET_V0; } + data_start = op; + while (l > 20) { size_t ll = l <= (m4_max_offset + 1) ? l : (m4_max_offset + 1); uintptr_t ll_end = (uintptr_t) ip + ll; @@ -324,7 +327,7 @@ int lzogeneric1x_1_compress(const unsigned char *in, size_t in_len, if (t > 0) { const unsigned char *ii = in + in_len - t; - if (op == out && t <= 238) { + if (op == data_start && t <= 238) { *op++ = (17 + t); } else if (t <= 3) { op[state_offset] |= t; diff --git a/lib/lzo/lzo1x_decompress_safe.c b/lib/lzo/lzo1x_decompress_safe.c index 6d2600ea3b55..9e07e9ef1aad 100644 --- a/lib/lzo/lzo1x_decompress_safe.c +++ b/lib/lzo/lzo1x_decompress_safe.c @@ -54,11 +54,9 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len, if (unlikely(in_len < 3)) goto input_overrun; - if (likely(*ip == 17)) { + if (likely(in_len >= 5) && likely(*ip == 17)) { bitstream_version = ip[1]; ip += 2; - if (unlikely(in_len < 5)) - goto input_overrun; } else { bitstream_version = 0; } -- cgit v1.2.3 From fcae96ff96538f66e7acd5d4e0f2e7516ff8cbd0 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 5 Apr 2019 18:39:01 -0700 Subject: mm: fix vm_fault_t cast in VM_FAULT_GET_HINDEX() Symmetrically to VM_FAULT_SET_HINDEX(), we need a force-cast in VM_FAULT_GET_HINDEX() to tell sparse that this is intentional. Sparse complains about the current code when building a kernel with CONFIG_MEMORY_FAILURE: arch/x86/mm/fault.c:1058:53: warning: restricted vm_fault_t degrades to integer Link: http://lkml.kernel.org/r/20190327204117.35215-1-jannh@google.com Fixes: 3d3539018d2c ("mm: create the new vm_fault_t type") Signed-off-by: Jann Horn Reviewed-by: Andrew Morton Cc: Souptick Joarder Cc: Matthew Wilcox Cc: Vlastimil Babka Cc: "Kirill A. Shutemov" Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 7eade9132f02..4ef4bbe78a1d 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -671,7 +671,7 @@ enum vm_fault_reason { /* Encode hstate index for a hwpoisoned large page */ #define VM_FAULT_SET_HINDEX(x) ((__force vm_fault_t)((x) << 16)) -#define VM_FAULT_GET_HINDEX(x) (((x) >> 16) & 0xf) +#define VM_FAULT_GET_HINDEX(x) (((__force unsigned int)(x) >> 16) & 0xf) #define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | \ VM_FAULT_SIGSEGV | VM_FAULT_HWPOISON | \ -- cgit v1.2.3 From 58b6e5e8f1addd44583d61b0a03c0f5519527e35 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Fri, 5 Apr 2019 18:39:06 -0700 Subject: hugetlbfs: fix memory leak for resv_map When mknod is used to create a block special file in hugetlbfs, it will allocate an inode and kmalloc a 'struct resv_map' via resv_map_alloc(). inode->i_mapping->private_data will point the newly allocated resv_map. However, when the device special file is opened bd_acquire() will set inode->i_mapping to bd_inode->i_mapping. Thus the pointer to the allocated resv_map is lost and the structure is leaked. Programs to reproduce: mount -t hugetlbfs nodev hugetlbfs mknod hugetlbfs/dev b 0 0 exec 30<> hugetlbfs/dev umount hugetlbfs/ resv_map structures are only needed for inodes which can have associated page allocations. To fix the leak, only allocate resv_map for those inodes which could possibly be associated with page allocations. Link: http://lkml.kernel.org/r/20190401213101.16476-1-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Reviewed-by: Andrew Morton Reported-by: Yufen Yu Suggested-by: Yufen Yu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index ec32fece5e1e..9285dd4f4b1c 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -755,11 +755,17 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, umode_t mode, dev_t dev) { struct inode *inode; - struct resv_map *resv_map; + struct resv_map *resv_map = NULL; - resv_map = resv_map_alloc(); - if (!resv_map) - return NULL; + /* + * Reserve maps are only needed for inodes that can have associated + * page allocations. + */ + if (S_ISREG(mode) || S_ISLNK(mode)) { + resv_map = resv_map_alloc(); + if (!resv_map) + return NULL; + } inode = new_inode(sb); if (inode) { @@ -794,8 +800,10 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, break; } lockdep_annotate_inode_mutex_key(inode); - } else - kref_put(&resv_map->refs, resv_map_release); + } else { + if (resv_map) + kref_put(&resv_map->refs, resv_map_release); + } return inode; } -- cgit v1.2.3 From c6f3c5ee40c10bb65725047a220570f718507001 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 5 Apr 2019 18:39:10 -0700 Subject: mm/huge_memory.c: fix modifying of page protection by insert_pfn_pmd() With some architectures like ppc64, set_pmd_at() cannot cope with a situation where there is already some (different) valid entry present. Use pmdp_set_access_flags() instead to modify the pfn which is built to deal with modifying existing PMD entries. This is similar to commit cae85cb8add3 ("mm/memory.c: fix modifying of page protection by insert_pfn()") We also do similar update w.r.t insert_pfn_pud eventhough ppc64 don't support pud pfn entries now. Without this patch we also see the below message in kernel log "BUG: non-zero pgtables_bytes on freeing mm:" Link: http://lkml.kernel.org/r/20190402115125.18803-1-aneesh.kumar@linux.ibm.com Signed-off-by: Aneesh Kumar K.V Reported-by: Chandan Rajendra Reviewed-by: Jan Kara Cc: Dan Williams Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 404acdcd0455..165ea46bf149 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -755,6 +755,21 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, spinlock_t *ptl; ptl = pmd_lock(mm, pmd); + if (!pmd_none(*pmd)) { + if (write) { + if (pmd_pfn(*pmd) != pfn_t_to_pfn(pfn)) { + WARN_ON_ONCE(!is_huge_zero_pmd(*pmd)); + goto out_unlock; + } + entry = pmd_mkyoung(*pmd); + entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); + if (pmdp_set_access_flags(vma, addr, pmd, entry, 1)) + update_mmu_cache_pmd(vma, addr, pmd); + } + + goto out_unlock; + } + entry = pmd_mkhuge(pfn_t_pmd(pfn, prot)); if (pfn_t_devmap(pfn)) entry = pmd_mkdevmap(entry); @@ -766,11 +781,16 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, if (pgtable) { pgtable_trans_huge_deposit(mm, pmd, pgtable); mm_inc_nr_ptes(mm); + pgtable = NULL; } set_pmd_at(mm, addr, pmd, entry); update_mmu_cache_pmd(vma, addr, pmd); + +out_unlock: spin_unlock(ptl); + if (pgtable) + pte_free(mm, pgtable); } vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, @@ -821,6 +841,20 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr, spinlock_t *ptl; ptl = pud_lock(mm, pud); + if (!pud_none(*pud)) { + if (write) { + if (pud_pfn(*pud) != pfn_t_to_pfn(pfn)) { + WARN_ON_ONCE(!is_huge_zero_pud(*pud)); + goto out_unlock; + } + entry = pud_mkyoung(*pud); + entry = maybe_pud_mkwrite(pud_mkdirty(entry), vma); + if (pudp_set_access_flags(vma, addr, pud, entry, 1)) + update_mmu_cache_pud(vma, addr, pud); + } + goto out_unlock; + } + entry = pud_mkhuge(pfn_t_pud(pfn, prot)); if (pfn_t_devmap(pfn)) entry = pud_mkdevmap(entry); @@ -830,6 +864,8 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr, } set_pud_at(mm, addr, pud, entry); update_mmu_cache_pud(vma, addr, pud); + +out_unlock: spin_unlock(ptl); } -- cgit v1.2.3 From be87ab0afd680ac35486d16c0963c56d9be1d8a0 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 5 Apr 2019 18:39:14 -0700 Subject: psi: clarify the units used in pressure files The output of the PSI files show a bunch of numbers with no unit. The psi.txt documentation file also does not indicate what units are used. One can only find out by looking at the source code. The units are percentage for the averages and useconds for the total. Make the information easier to find by documenting the units in psi.txt. Link: http://lkml.kernel.org/r/20190402193810.3450-1-longman@redhat.com Signed-off-by: Waiman Long Acked-by: Johannes Weiner Cc: "Peter Zijlstra (Intel)" Cc: Tejun Heo Cc: Jonathan Corbet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/accounting/psi.txt | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Documentation/accounting/psi.txt b/Documentation/accounting/psi.txt index b8ca28b60215..7e71c9c1d8e9 100644 --- a/Documentation/accounting/psi.txt +++ b/Documentation/accounting/psi.txt @@ -56,12 +56,12 @@ situation from a state where some tasks are stalled but the CPU is still doing productive work. As such, time spent in this subset of the stall state is tracked separately and exported in the "full" averages. -The ratios are tracked as recent trends over ten, sixty, and three -hundred second windows, which gives insight into short term events as -well as medium and long term trends. The total absolute stall time is -tracked and exported as well, to allow detection of latency spikes -which wouldn't necessarily make a dent in the time averages, or to -average trends over custom time frames. +The ratios (in %) are tracked as recent trends over ten, sixty, and +three hundred second windows, which gives insight into short term events +as well as medium and long term trends. The total absolute stall time +(in us) is tracked and exported as well, to allow detection of latency +spikes which wouldn't necessarily make a dent in the time averages, +or to average trends over custom time frames. Cgroup2 interface ================= -- cgit v1.2.3 From 0b3d6e6f2dd0a7b697b1aa8c167265908940624b Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Fri, 5 Apr 2019 18:39:18 -0700 Subject: mm: writeback: use exact memcg dirty counts Since commit a983b5ebee57 ("mm: memcontrol: fix excessive complexity in memory.stat reporting") memcg dirty and writeback counters are managed as: 1) per-memcg per-cpu values in range of [-32..32] 2) per-memcg atomic counter When a per-cpu counter cannot fit in [-32..32] it's flushed to the atomic. Stat readers only check the atomic. Thus readers such as balance_dirty_pages() may see a nontrivial error margin: 32 pages per cpu. Assuming 100 cpus: 4k x86 page_size: 13 MiB error per memcg 64k ppc page_size: 200 MiB error per memcg Considering that dirty+writeback are used together for some decisions the errors double. This inaccuracy can lead to undeserved oom kills. One nasty case is when all per-cpu counters hold positive values offsetting an atomic negative value (i.e. per_cpu[*]=32, atomic=n_cpu*-32). balance_dirty_pages() only consults the atomic and does not consider throttling the next n_cpu*32 dirty pages. If the file_lru is in the 13..200 MiB range then there's absolutely no dirty throttling, which burdens vmscan with only dirty+writeback pages thus resorting to oom kill. It could be argued that tiny containers are not supported, but it's more subtle. It's the amount the space available for file lru that matters. If a container has memory.max-200MiB of non reclaimable memory, then it will also suffer such oom kills on a 100 cpu machine. The following test reliably ooms without this patch. This patch avoids oom kills. $ cat test mount -t cgroup2 none /dev/cgroup cd /dev/cgroup echo +io +memory > cgroup.subtree_control mkdir test cd test echo 10M > memory.max (echo $BASHPID > cgroup.procs && exec /memcg-writeback-stress /foo) (echo $BASHPID > cgroup.procs && exec dd if=/dev/zero of=/foo bs=2M count=100) $ cat memcg-writeback-stress.c /* * Dirty pages from all but one cpu. * Clean pages from the non dirtying cpu. * This is to stress per cpu counter imbalance. * On a 100 cpu machine: * - per memcg per cpu dirty count is 32 pages for each of 99 cpus * - per memcg atomic is -99*32 pages * - thus the complete dirty limit: sum of all counters 0 * - balance_dirty_pages() only sees atomic count -99*32 pages, which * it max()s to 0. * - So a workload can dirty -99*32 pages before balance_dirty_pages() * cares. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include static char *buf; static int bufSize; static void set_affinity(int cpu) { cpu_set_t affinity; CPU_ZERO(&affinity); CPU_SET(cpu, &affinity); if (sched_setaffinity(0, sizeof(affinity), &affinity)) err(1, "sched_setaffinity"); } static void dirty_on(int output_fd, int cpu) { int i, wrote; set_affinity(cpu); for (i = 0; i < 32; i++) { for (wrote = 0; wrote < bufSize; ) { int ret = write(output_fd, buf+wrote, bufSize-wrote); if (ret == -1) err(1, "write"); wrote += ret; } } } int main(int argc, char **argv) { int cpu, flush_cpu = 1, output_fd; const char *output; if (argc != 2) errx(1, "usage: output_file"); output = argv[1]; bufSize = getpagesize(); buf = malloc(getpagesize()); if (buf == NULL) errx(1, "malloc failed"); output_fd = open(output, O_CREAT|O_RDWR); if (output_fd == -1) err(1, "open(%s)", output); for (cpu = 0; cpu < get_nprocs(); cpu++) { if (cpu != flush_cpu) dirty_on(output_fd, cpu); } set_affinity(flush_cpu); if (fsync(output_fd)) err(1, "fsync(%s)", output); if (close(output_fd)) err(1, "close(%s)", output); free(buf); } Make balance_dirty_pages() and wb_over_bg_thresh() work harder to collect exact per memcg counters. This avoids the aforementioned oom kills. This does not affect the overhead of memory.stat, which still reads the single atomic counter. Why not use percpu_counter? memcg already handles cpus going offline, so no need for that overhead from percpu_counter. And the percpu_counter spinlocks are more heavyweight than is required. It probably also makes sense to use exact dirty and writeback counters in memcg oom reports. But that is saved for later. Link: http://lkml.kernel.org/r/20190329174609.164344-1-gthelen@google.com Signed-off-by: Greg Thelen Reviewed-by: Roman Gushchin Acked-by: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Cc: Tejun Heo Cc: [4.16+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 5 ++++- mm/memcontrol.c | 20 ++++++++++++++++++-- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 1f3d880b7ca1..dbb6118370c1 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -566,7 +566,10 @@ struct mem_cgroup *lock_page_memcg(struct page *page); void __unlock_page_memcg(struct mem_cgroup *memcg); void unlock_page_memcg(struct page *page); -/* idx can be of type enum memcg_stat_item or node_stat_item */ +/* + * idx can be of type enum memcg_stat_item or node_stat_item. + * Keep in sync with memcg_exact_page_state(). + */ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx) { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 532e0e2a4817..81a0d3914ec9 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3882,6 +3882,22 @@ struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb) return &memcg->cgwb_domain; } +/* + * idx can be of type enum memcg_stat_item or node_stat_item. + * Keep in sync with memcg_exact_page(). + */ +static unsigned long memcg_exact_page_state(struct mem_cgroup *memcg, int idx) +{ + long x = atomic_long_read(&memcg->stat[idx]); + int cpu; + + for_each_online_cpu(cpu) + x += per_cpu_ptr(memcg->stat_cpu, cpu)->count[idx]; + if (x < 0) + x = 0; + return x; +} + /** * mem_cgroup_wb_stats - retrieve writeback related stats from its memcg * @wb: bdi_writeback in question @@ -3907,10 +3923,10 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages, struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css); struct mem_cgroup *parent; - *pdirty = memcg_page_state(memcg, NR_FILE_DIRTY); + *pdirty = memcg_exact_page_state(memcg, NR_FILE_DIRTY); /* this should eventually include NR_UNSTABLE_NFS */ - *pwriteback = memcg_page_state(memcg, NR_WRITEBACK); + *pwriteback = memcg_exact_page_state(memcg, NR_WRITEBACK); *pfilepages = mem_cgroup_nr_lru_pages(memcg, (1 << LRU_INACTIVE_FILE) | (1 << LRU_ACTIVE_FILE)); *pheadroom = PAGE_COUNTER_MAX; -- cgit v1.2.3 From 166dbd930c99f640fa8a9beead7b9f5f5b016fa0 Mon Sep 17 00:00:00 2001 From: Tomer Maimon Date: Fri, 5 Apr 2019 18:39:22 -0700 Subject: MAINTAINERS: fix bad pattern in ARM/NUVOTON NPCM In the process of upstreaming architecture support for ARM/NUVOTON NPCM include/dt-bindings/clock/nuvoton,npcm7xx-clks.h was renamed include/dt-bindings/clock/nuvoton,npcm7xx-clock.h without updating MAINTAINERS. This updates the MAINTAINERS pattern to match the new name of this file. Link: http://lkml.kernel.org/r/20190328235752.334462-1-tmaimon77@gmail.com Fixes: 6a498e06ba22 ("MAINTAINERS: Add entry for the Nuvoton NPCM architecture") Signed-off-by: Brendan Higgins Signed-off-by: Tomer Maimon Reported-by: Joe Perches Reviewed-by: Benjamin Fair Cc: Avi Fishman Cc: Mukesh Ojha Cc: Nancy Yuen Cc: Patrick Venture Cc: Tali Perry Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 6771bd784f5f..63bfdaf13f28 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1900,7 +1900,7 @@ L: openbmc@lists.ozlabs.org (moderated for non-subscribers) S: Supported F: arch/arm/mach-npcm/ F: arch/arm/boot/dts/nuvoton-npcm* -F: include/dt-bindings/clock/nuvoton,npcm7xx-clks.h +F: include/dt-bindings/clock/nuvoton,npcm7xx-clock.h F: drivers/*/*npcm* F: Documentation/devicetree/bindings/*/*npcm* F: Documentation/devicetree/bindings/*/*/*npcm* -- cgit v1.2.3 From 803cfadcb6c5518ebb5a9a398d56b9418ad65585 Mon Sep 17 00:00:00 2001 From: Tomer Maimon Date: Fri, 5 Apr 2019 18:39:26 -0700 Subject: MAINTAINERS: add maintainer and replacing reviewer ARM/NUVOTON NPCM Add Tali Perry as Nuvoton NPCM maintainer, replace Brendan Higgins Nuvoton NPCM reviewer with Benjamin Fair. Link: http://lkml.kernel.org/r/20190328235752.334462-2-tmaimon77@gmail.com Signed-off-by: Tomer Maimon Reviewed-by: Brendan Higgins Reviewed-by: Benjamin Fair Reviewed-by: Mukesh Ojha Cc: Joe Perches Cc: Avi Fishman Cc: Patrick Venture Cc: Nancy Yuen Cc: Tali Perry Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 63bfdaf13f28..2359e12e4c41 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1893,9 +1893,10 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-nomadik.git ARM/NUVOTON NPCM ARCHITECTURE M: Avi Fishman M: Tomer Maimon +M: Tali Perry R: Patrick Venture R: Nancy Yuen -R: Brendan Higgins +R: Benjamin Fair L: openbmc@lists.ozlabs.org (moderated for non-subscribers) S: Supported F: arch/arm/mach-npcm/ -- cgit v1.2.3 From acaf892ecbf5be7710ae05a61fd43c668f68ad95 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 5 Apr 2019 18:39:30 -0700 Subject: sh: fix multiple function definition build errors Many of the sh CPU-types have their own plat_irq_setup() and arch_init_clk_ops() functions, so these same (empty) functions in arch/sh/boards/of-generic.c are not needed and cause build errors. If there is some case where these empty functions are needed, they can be retained by marking them as "__weak" while at the same time making builds that do not need them succeed. Fixes these build errors: arch/sh/boards/of-generic.o: In function `plat_irq_setup': (.init.text+0x134): multiple definition of `plat_irq_setup' arch/sh/kernel/cpu/sh2/setup-sh7619.o:(.init.text+0x30): first defined here arch/sh/boards/of-generic.o: In function `arch_init_clk_ops': (.init.text+0x118): multiple definition of `arch_init_clk_ops' arch/sh/kernel/cpu/sh2/clock-sh7619.o:(.init.text+0x0): first defined here Link: http://lkml.kernel.org/r/9ee4e0c5-f100-86a2-bd4d-1d3287ceab31@infradead.org Signed-off-by: Randy Dunlap Reported-by: kbuild test robot Cc: Takashi Iwai Cc: Yoshinori Sato Cc: Rich Felker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sh/boards/of-generic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sh/boards/of-generic.c b/arch/sh/boards/of-generic.c index 958f46da3a79..d91065e81a4e 100644 --- a/arch/sh/boards/of-generic.c +++ b/arch/sh/boards/of-generic.c @@ -164,10 +164,10 @@ static struct sh_machine_vector __initmv sh_of_generic_mv = { struct sh_clk_ops; -void __init arch_init_clk_ops(struct sh_clk_ops **ops, int idx) +void __init __weak arch_init_clk_ops(struct sh_clk_ops **ops, int idx) { } -void __init plat_irq_setup(void) +void __init __weak plat_irq_setup(void) { } -- cgit v1.2.3 From e91455217d8c7b128c158432869f6e697283f3ec Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 5 Apr 2019 18:39:34 -0700 Subject: mm/util.c: fix strndup_user() comment The kerneldoc misdescribes strndup_user()'s return value. Cc: Dan Carpenter Cc: Timur Tabi Cc: Mihai Caraman Cc: Kumar Gala Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/util.c b/mm/util.c index d559bde497a9..43a2984bccaa 100644 --- a/mm/util.c +++ b/mm/util.c @@ -204,7 +204,7 @@ EXPORT_SYMBOL(vmemdup_user); * @s: The string to duplicate * @n: Maximum number of bytes to copy, including the trailing NUL. * - * Return: newly allocated copy of @s or %NULL in case of error + * Return: newly allocated copy of @s or an ERR_PTR() in case of error */ char *strndup_user(const char __user *s, long n) { -- cgit v1.2.3 From 9002b21465fa4d829edfc94a5a441005cffaa972 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 5 Apr 2019 18:39:38 -0700 Subject: kernel/sysctl.c: fix out-of-bounds access when setting file-max Commit 32a5ad9c2285 ("sysctl: handle overflow for file-max") hooked up min/max values for the file-max sysctl parameter via the .extra1 and .extra2 fields in the corresponding struct ctl_table entry. Unfortunately, the minimum value points at the global 'zero' variable, which is an int. This results in a KASAN splat when accessed as a long by proc_doulongvec_minmax on 64-bit architectures: | BUG: KASAN: global-out-of-bounds in __do_proc_doulongvec_minmax+0x5d8/0x6a0 | Read of size 8 at addr ffff2000133d1c20 by task systemd/1 | | CPU: 0 PID: 1 Comm: systemd Not tainted 5.1.0-rc3-00012-g40b114779944 #2 | Hardware name: linux,dummy-virt (DT) | Call trace: | dump_backtrace+0x0/0x228 | show_stack+0x14/0x20 | dump_stack+0xe8/0x124 | print_address_description+0x60/0x258 | kasan_report+0x140/0x1a0 | __asan_report_load8_noabort+0x18/0x20 | __do_proc_doulongvec_minmax+0x5d8/0x6a0 | proc_doulongvec_minmax+0x4c/0x78 | proc_sys_call_handler.isra.19+0x144/0x1d8 | proc_sys_write+0x34/0x58 | __vfs_write+0x54/0xe8 | vfs_write+0x124/0x3c0 | ksys_write+0xbc/0x168 | __arm64_sys_write+0x68/0x98 | el0_svc_common+0x100/0x258 | el0_svc_handler+0x48/0xc0 | el0_svc+0x8/0xc | | The buggy address belongs to the variable: | zero+0x0/0x40 | | Memory state around the buggy address: | ffff2000133d1b00: 00 00 00 00 00 00 00 00 fa fa fa fa 04 fa fa fa | ffff2000133d1b80: fa fa fa fa 04 fa fa fa fa fa fa fa 04 fa fa fa | >ffff2000133d1c00: fa fa fa fa 04 fa fa fa fa fa fa fa 00 00 00 00 | ^ | ffff2000133d1c80: fa fa fa fa 00 fa fa fa fa fa fa fa 00 00 00 00 | ffff2000133d1d00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 Fix the splat by introducing a unsigned long 'zero_ul' and using that instead. Link: http://lkml.kernel.org/r/20190403153409.17307-1-will.deacon@arm.com Fixes: 32a5ad9c2285 ("sysctl: handle overflow for file-max") Signed-off-by: Will Deacon Acked-by: Christian Brauner Cc: Kees Cook Cc: Alexey Dobriyan Cc: Matteo Croce Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sysctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index e5da394d1ca3..c9ec050bcf46 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -128,6 +128,7 @@ static int zero; static int __maybe_unused one = 1; static int __maybe_unused two = 2; static int __maybe_unused four = 4; +static unsigned long zero_ul; static unsigned long one_ul = 1; static unsigned long long_max = LONG_MAX; static int one_hundred = 100; @@ -1750,7 +1751,7 @@ static struct ctl_table fs_table[] = { .maxlen = sizeof(files_stat.max_files), .mode = 0644, .proc_handler = proc_doulongvec_minmax, - .extra1 = &zero, + .extra1 = &zero_ul, .extra2 = &long_max, }, { -- cgit v1.2.3 From 5b77e95dd7790ff6c8fbf1cd8d0104ebed818a03 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Tue, 2 Apr 2019 13:28:13 +0200 Subject: x86/asm: Use stricter assembly constraints in bitops There's a number of problems with how arch/x86/include/asm/bitops.h is currently using assembly constraints for the memory region bitops are modifying: 1) Use memory clobber in bitops that touch arbitrary memory Certain bit operations that read/write bits take a base pointer and an arbitrarily large offset to address the bit relative to that base. Inline assembly constraints aren't expressive enough to tell the compiler that the assembly directive is going to touch a specific memory location of unknown size, therefore we have to use the "memory" clobber to indicate that the assembly is going to access memory locations other than those listed in the inputs/outputs. To indicate that BTR/BTS instructions don't necessarily touch the first sizeof(long) bytes of the argument, we also move the address to assembly inputs. This particular change leads to size increase of 124 kernel functions in a defconfig build. For some of them the diff is in NOP operations, other end up re-reading values from memory and may potentially slow down the execution. But without these clobbers the compiler is free to cache the contents of the bitmaps and use them as if they weren't changed by the inline assembly. 2) Use byte-sized arguments for operations touching single bytes. Passing a long value to ANDB/ORB/XORB instructions makes the compiler treat sizeof(long) bytes as being clobbered, which isn't the case. This may theoretically lead to worse code in the case of heavy optimization. Practical impact: I've built a defconfig kernel and looked through some of the functions generated by GCC 7.3.0 with and without this clobber, and didn't spot any miscompilations. However there is a (trivial) theoretical case where this code leads to miscompilation: https://lkml.org/lkml/2019/3/28/393 using just GCC 8.3.0 with -O2. It isn't hard to imagine someone writes such a function in the kernel someday. So the primary motivation is to fix an existing misuse of the asm directive, which happens to work in certain configurations now, but isn't guaranteed to work under different circumstances. [ --mingo: Added -stable tag because defconfig only builds a fraction of the kernel and the trivial testcase looks normal enough to be used in existing or in-development code. ] Signed-off-by: Alexander Potapenko Cc: Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: James Y Knight Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20190402112813.193378-1-glider@google.com [ Edited the changelog, tidied up one of the defines. ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bitops.h | 41 ++++++++++++++++++----------------------- 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index d153d570bb04..8e790ec219a5 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -36,16 +36,17 @@ * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). */ -#define BITOP_ADDR(x) "+m" (*(volatile long *) (x)) +#define RLONG_ADDR(x) "m" (*(volatile long *) (x)) +#define WBYTE_ADDR(x) "+m" (*(volatile char *) (x)) -#define ADDR BITOP_ADDR(addr) +#define ADDR RLONG_ADDR(addr) /* * We do the locked ops that don't return the old value as * a mask operation on a byte. */ #define IS_IMMEDIATE(nr) (__builtin_constant_p(nr)) -#define CONST_MASK_ADDR(nr, addr) BITOP_ADDR((void *)(addr) + ((nr)>>3)) +#define CONST_MASK_ADDR(nr, addr) WBYTE_ADDR((void *)(addr) + ((nr)>>3)) #define CONST_MASK(nr) (1 << ((nr) & 7)) /** @@ -73,7 +74,7 @@ set_bit(long nr, volatile unsigned long *addr) : "memory"); } else { asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0" - : BITOP_ADDR(addr) : "Ir" (nr) : "memory"); + : : RLONG_ADDR(addr), "Ir" (nr) : "memory"); } } @@ -88,7 +89,7 @@ set_bit(long nr, volatile unsigned long *addr) */ static __always_inline void __set_bit(long nr, volatile unsigned long *addr) { - asm volatile(__ASM_SIZE(bts) " %1,%0" : ADDR : "Ir" (nr) : "memory"); + asm volatile(__ASM_SIZE(bts) " %1,%0" : : ADDR, "Ir" (nr) : "memory"); } /** @@ -110,8 +111,7 @@ clear_bit(long nr, volatile unsigned long *addr) : "iq" ((u8)~CONST_MASK(nr))); } else { asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0" - : BITOP_ADDR(addr) - : "Ir" (nr)); + : : RLONG_ADDR(addr), "Ir" (nr) : "memory"); } } @@ -131,7 +131,7 @@ static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *ad static __always_inline void __clear_bit(long nr, volatile unsigned long *addr) { - asm volatile(__ASM_SIZE(btr) " %1,%0" : ADDR : "Ir" (nr)); + asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory"); } static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr) @@ -139,7 +139,7 @@ static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile bool negative; asm volatile(LOCK_PREFIX "andb %2,%1" CC_SET(s) - : CC_OUT(s) (negative), ADDR + : CC_OUT(s) (negative), WBYTE_ADDR(addr) : "ir" ((char) ~(1 << nr)) : "memory"); return negative; } @@ -155,13 +155,9 @@ static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile * __clear_bit() is non-atomic and implies release semantics before the memory * operation. It can be used for an unlock if no other CPUs can concurrently * modify other bits in the word. - * - * No memory barrier is required here, because x86 cannot reorder stores past - * older loads. Same principle as spin_unlock. */ static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr) { - barrier(); __clear_bit(nr, addr); } @@ -176,7 +172,7 @@ static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long * */ static __always_inline void __change_bit(long nr, volatile unsigned long *addr) { - asm volatile(__ASM_SIZE(btc) " %1,%0" : ADDR : "Ir" (nr)); + asm volatile(__ASM_SIZE(btc) " %1,%0" : : ADDR, "Ir" (nr) : "memory"); } /** @@ -196,8 +192,7 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr) : "iq" ((u8)CONST_MASK(nr))); } else { asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0" - : BITOP_ADDR(addr) - : "Ir" (nr)); + : : RLONG_ADDR(addr), "Ir" (nr) : "memory"); } } @@ -242,8 +237,8 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long * asm(__ASM_SIZE(bts) " %2,%1" CC_SET(c) - : CC_OUT(c) (oldbit), ADDR - : "Ir" (nr)); + : CC_OUT(c) (oldbit) + : ADDR, "Ir" (nr) : "memory"); return oldbit; } @@ -282,8 +277,8 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long asm volatile(__ASM_SIZE(btr) " %2,%1" CC_SET(c) - : CC_OUT(c) (oldbit), ADDR - : "Ir" (nr)); + : CC_OUT(c) (oldbit) + : ADDR, "Ir" (nr) : "memory"); return oldbit; } @@ -294,8 +289,8 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon asm volatile(__ASM_SIZE(btc) " %2,%1" CC_SET(c) - : CC_OUT(c) (oldbit), ADDR - : "Ir" (nr) : "memory"); + : CC_OUT(c) (oldbit) + : ADDR, "Ir" (nr) : "memory"); return oldbit; } @@ -326,7 +321,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l asm volatile(__ASM_SIZE(bt) " %2,%1" CC_SET(c) : CC_OUT(c) (oldbit) - : "m" (*(unsigned long *)addr), "Ir" (nr)); + : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory"); return oldbit; } -- cgit v1.2.3 From 3ace6891ce8bb9e1267358cb58f93b4fd8b72b69 Mon Sep 17 00:00:00 2001 From: Laurentiu Tudor Date: Mon, 1 Apr 2019 13:14:37 +0300 Subject: i2c: imx: don't leak the i2c adapter on error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure to free the i2c adapter on the error exit path. Signed-off-by: Laurentiu Tudor Reviewed-by: Mukesh Ojha Reviewed-by: Uwe Kleine-König Fixes: e1ab9a468e3b ("i2c: imx: improve the error handling in i2c_imx_dma_request()") Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-imx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index 42fed40198a0..c0c3043b5d61 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -1169,11 +1169,13 @@ static int i2c_imx_probe(struct platform_device *pdev) /* Init DMA config if supported */ ret = i2c_imx_dma_request(i2c_imx, phy_addr); if (ret < 0) - goto clk_notifier_unregister; + goto del_adapter; dev_info(&i2c_imx->adapter.dev, "IMX I2C adapter registered\n"); return 0; /* Return OK */ +del_adapter: + i2c_del_adapter(&i2c_imx->adapter); clk_notifier_unregister: clk_notifier_unregister(i2c_imx->clk, &i2c_imx->clk_change_nb); rpm_disable: -- cgit v1.2.3 From 7ff684a683d777c4956fce93e60accbab2bd7696 Mon Sep 17 00:00:00 2001 From: John Pittman Date: Fri, 5 Apr 2019 17:42:45 -0400 Subject: null_blk: prevent crash from bad home_node value At module load, if the selected home_node value is greater than the available numa nodes, the system will crash in __alloc_pages_nodemask() due to a bad paging request. Prevent this user error crash by detecting the bad value, logging an error, and setting g_home_node back to the default of NUMA_NO_NODE. Signed-off-by: John Pittman Signed-off-by: Jens Axboe --- drivers/block/null_blk_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c index 417a9f15c116..d7ac09c092f2 100644 --- a/drivers/block/null_blk_main.c +++ b/drivers/block/null_blk_main.c @@ -1748,6 +1748,11 @@ static int __init null_init(void) return -EINVAL; } + if (g_home_node != NUMA_NO_NODE && g_home_node >= nr_online_nodes) { + pr_err("null_blk: invalid home_node value\n"); + g_home_node = NUMA_NO_NODE; + } + if (g_queue_mode == NULL_Q_RQ) { pr_err("null_blk: legacy IO path no longer available\n"); return -EINVAL; -- cgit v1.2.3 From 47b16820c490149c2923e8474048f2c6e7557cab Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 19 Feb 2019 08:49:56 -0800 Subject: xsysace: Fix error handling in ace_setup If xace hardware reports a bad version number, the error handling code in ace_setup() calls put_disk(), followed by queue cleanup. However, since the disk data structure has the queue pointer set, put_disk() also cleans and releases the queue. This results in blk_cleanup_queue() accessing an already released data structure, which in turn may result in a crash such as the following. [ 10.681671] BUG: Kernel NULL pointer dereference at 0x00000040 [ 10.681826] Faulting instruction address: 0xc0431480 [ 10.682072] Oops: Kernel access of bad area, sig: 11 [#1] [ 10.682251] BE PAGE_SIZE=4K PREEMPT Xilinx Virtex440 [ 10.682387] Modules linked in: [ 10.682528] CPU: 0 PID: 1 Comm: swapper Tainted: G W 5.0.0-rc6-next-20190218+ #2 [ 10.682733] NIP: c0431480 LR: c043147c CTR: c0422ad8 [ 10.682863] REGS: cf82fbe0 TRAP: 0300 Tainted: G W (5.0.0-rc6-next-20190218+) [ 10.683065] MSR: 00029000 CR: 22000222 XER: 00000000 [ 10.683236] DEAR: 00000040 ESR: 00000000 [ 10.683236] GPR00: c043147c cf82fc90 cf82ccc0 00000000 00000000 00000000 00000002 00000000 [ 10.683236] GPR08: 00000000 00000000 c04310bc 00000000 22000222 00000000 c0002c54 00000000 [ 10.683236] GPR16: 00000000 00000001 c09aa39c c09021b0 c09021dc 00000007 c0a68c08 00000000 [ 10.683236] GPR24: 00000001 ced6d400 ced6dcf0 c0815d9c 00000000 00000000 00000000 cedf0800 [ 10.684331] NIP [c0431480] blk_mq_run_hw_queue+0x28/0x114 [ 10.684473] LR [c043147c] blk_mq_run_hw_queue+0x24/0x114 [ 10.684602] Call Trace: [ 10.684671] [cf82fc90] [c043147c] blk_mq_run_hw_queue+0x24/0x114 (unreliable) [ 10.684854] [cf82fcc0] [c04315bc] blk_mq_run_hw_queues+0x50/0x7c [ 10.685002] [cf82fce0] [c0422b24] blk_set_queue_dying+0x30/0x68 [ 10.685154] [cf82fcf0] [c0423ec0] blk_cleanup_queue+0x34/0x14c [ 10.685306] [cf82fd10] [c054d73c] ace_probe+0x3dc/0x508 [ 10.685445] [cf82fd50] [c052d740] platform_drv_probe+0x4c/0xb8 [ 10.685592] [cf82fd70] [c052abb0] really_probe+0x20c/0x32c [ 10.685728] [cf82fda0] [c052ae58] driver_probe_device+0x68/0x464 [ 10.685877] [cf82fdc0] [c052b500] device_driver_attach+0xb4/0xe4 [ 10.686024] [cf82fde0] [c052b5dc] __driver_attach+0xac/0xfc [ 10.686161] [cf82fe00] [c0528428] bus_for_each_dev+0x80/0xc0 [ 10.686314] [cf82fe30] [c0529b3c] bus_add_driver+0x144/0x234 [ 10.686457] [cf82fe50] [c052c46c] driver_register+0x88/0x15c [ 10.686610] [cf82fe60] [c09de288] ace_init+0x4c/0xac [ 10.686742] [cf82fe80] [c0002730] do_one_initcall+0xac/0x330 [ 10.686888] [cf82fee0] [c09aafd0] kernel_init_freeable+0x34c/0x478 [ 10.687043] [cf82ff30] [c0002c6c] kernel_init+0x18/0x114 [ 10.687188] [cf82ff40] [c000f2f0] ret_from_kernel_thread+0x14/0x1c [ 10.687349] Instruction dump: [ 10.687435] 3863ffd4 4bfffd70 9421ffd0 7c0802a6 93c10028 7c9e2378 93e1002c 38810008 [ 10.687637] 7c7f1b78 90010034 4bfffc25 813f008c <81290040> 75290100 4182002c 80810008 [ 10.688056] ---[ end trace 13c9ff51d41b9d40 ]--- Fix the problem by setting the disk queue pointer to NULL before calling put_disk(). A more comprehensive fix might be to rearrange the code to check the hardware version before initializing data structures, but I don't know if this would have undesirable side effects, and it would increase the complexity of backporting the fix to older kernels. Fixes: 74489a91dd43a ("Add support for Xilinx SystemACE CompactFlash interface") Acked-by: Michal Simek Signed-off-by: Guenter Roeck Signed-off-by: Jens Axboe --- drivers/block/xsysace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c index 87ccef4bd69e..32a21b8d1d85 100644 --- a/drivers/block/xsysace.c +++ b/drivers/block/xsysace.c @@ -1090,6 +1090,8 @@ static int ace_setup(struct ace_device *ace) return 0; err_read: + /* prevent double queue cleanup */ + ace->gd->queue = NULL; put_disk(ace->gd); err_alloc_disk: blk_cleanup_queue(ace->queue); -- cgit v1.2.3 From 10dce8af34226d90fa56746a934f8da5dcdba3df Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Tue, 26 Mar 2019 22:20:43 +0000 Subject: fs: stream_open - opener for stream-like files so that read and write can run simultaneously without deadlock Commit 9c225f2655e3 ("vfs: atomic f_pos accesses as per POSIX") added locking for file.f_pos access and in particular made concurrent read and write not possible - now both those functions take f_pos lock for the whole run, and so if e.g. a read is blocked waiting for data, write will deadlock waiting for that read to complete. This caused regression for stream-like files where previously read and write could run simultaneously, but after that patch could not do so anymore. See e.g. commit 581d21a2d02a ("xenbus: fix deadlock on writes to /proc/xen/xenbus") which fixes such regression for particular case of /proc/xen/xenbus. The patch that added f_pos lock in 2014 did so to guarantee POSIX thread safety for read/write/lseek and added the locking to file descriptors of all regular files. In 2014 that thread-safety problem was not new as it was already discussed earlier in 2006. However even though 2006'th version of Linus's patch was adding f_pos locking "only for files that are marked seekable with FMODE_LSEEK (thus avoiding the stream-like objects like pipes and sockets)", the 2014 version - the one that actually made it into the tree as 9c225f2655e3 - is doing so irregardless of whether a file is seekable or not. See https://lore.kernel.org/lkml/53022DB1.4070805@gmail.com/ https://lwn.net/Articles/180387 https://lwn.net/Articles/180396 for historic context. The reason that it did so is, probably, that there are many files that are marked non-seekable, but e.g. their read implementation actually depends on knowing current position to correctly handle the read. Some examples: kernel/power/user.c snapshot_read fs/debugfs/file.c u32_array_read fs/fuse/control.c fuse_conn_waiting_read + ... drivers/hwmon/asus_atk0110.c atk_debugfs_ggrp_read arch/s390/hypfs/inode.c hypfs_read_iter ... Despite that, many nonseekable_open users implement read and write with pure stream semantics - they don't depend on passed ppos at all. And for those cases where read could wait for something inside, it creates a situation similar to xenbus - the write could be never made to go until read is done, and read is waiting for some, potentially external, event, for potentially unbounded time -> deadlock. Besides xenbus, there are 14 such places in the kernel that I've found with semantic patch (see below): drivers/xen/evtchn.c:667:8-24: ERROR: evtchn_fops: .read() can deadlock .write() drivers/isdn/capi/capi.c:963:8-24: ERROR: capi_fops: .read() can deadlock .write() drivers/input/evdev.c:527:1-17: ERROR: evdev_fops: .read() can deadlock .write() drivers/char/pcmcia/cm4000_cs.c:1685:7-23: ERROR: cm4000_fops: .read() can deadlock .write() net/rfkill/core.c:1146:8-24: ERROR: rfkill_fops: .read() can deadlock .write() drivers/s390/char/fs3270.c:488:1-17: ERROR: fs3270_fops: .read() can deadlock .write() drivers/usb/misc/ldusb.c:310:1-17: ERROR: ld_usb_fops: .read() can deadlock .write() drivers/hid/uhid.c:635:1-17: ERROR: uhid_fops: .read() can deadlock .write() net/batman-adv/icmp_socket.c:80:1-17: ERROR: batadv_fops: .read() can deadlock .write() drivers/media/rc/lirc_dev.c:198:1-17: ERROR: lirc_fops: .read() can deadlock .write() drivers/leds/uleds.c:77:1-17: ERROR: uleds_fops: .read() can deadlock .write() drivers/input/misc/uinput.c:400:1-17: ERROR: uinput_fops: .read() can deadlock .write() drivers/infiniband/core/user_mad.c:985:7-23: ERROR: umad_fops: .read() can deadlock .write() drivers/gnss/core.c:45:1-17: ERROR: gnss_fops: .read() can deadlock .write() In addition to the cases above another regression caused by f_pos locking is that now FUSE filesystems that implement open with FOPEN_NONSEEKABLE flag, can no longer implement bidirectional stream-like files - for the same reason as above e.g. read can deadlock write locking on file.f_pos in the kernel. FUSE's FOPEN_NONSEEKABLE was added in 2008 in a7c1b990f715 ("fuse: implement nonseekable open") to support OSSPD. OSSPD implements /dev/dsp in userspace with FOPEN_NONSEEKABLE flag, with corresponding read and write routines not depending on current position at all, and with both read and write being potentially blocking operations: See https://github.com/libfuse/osspd https://lwn.net/Articles/308445 https://github.com/libfuse/osspd/blob/14a9cff0/osspd.c#L1406 https://github.com/libfuse/osspd/blob/14a9cff0/osspd.c#L1438-L1477 https://github.com/libfuse/osspd/blob/14a9cff0/osspd.c#L1479-L1510 Corresponding libfuse example/test also describes FOPEN_NONSEEKABLE as "somewhat pipe-like files ..." with read handler not using offset. However that test implements only read without write and cannot exercise the deadlock scenario: https://github.com/libfuse/libfuse/blob/fuse-3.4.2-3-ga1bff7d/example/poll.c#L124-L131 https://github.com/libfuse/libfuse/blob/fuse-3.4.2-3-ga1bff7d/example/poll.c#L146-L163 https://github.com/libfuse/libfuse/blob/fuse-3.4.2-3-ga1bff7d/example/poll.c#L209-L216 I've actually hit the read vs write deadlock for real while implementing my FUSE filesystem where there is /head/watch file, for which open creates separate bidirectional socket-like stream in between filesystem and its user with both read and write being later performed simultaneously. And there it is semantically not easy to split the stream into two separate read-only and write-only channels: https://lab.nexedi.com/kirr/wendelin.core/blob/f13aa600/wcfs/wcfs.go#L88-169 Let's fix this regression. The plan is: 1. We can't change nonseekable_open to include &~FMODE_ATOMIC_POS - doing so would break many in-kernel nonseekable_open users which actually use ppos in read/write handlers. 2. Add stream_open() to kernel to open stream-like non-seekable file descriptors. Read and write on such file descriptors would never use nor change ppos. And with that property on stream-like files read and write will be running without taking f_pos lock - i.e. read and write could be running simultaneously. 3. With semantic patch search and convert to stream_open all in-kernel nonseekable_open users for which read and write actually do not depend on ppos and where there is no other methods in file_operations which assume @offset access. 4. Add FOPEN_STREAM to fs/fuse/ and open in-kernel file-descriptors via steam_open if that bit is present in filesystem open reply. It was tempting to change fs/fuse/ open handler to use stream_open instead of nonseekable_open on just FOPEN_NONSEEKABLE flags, but grepping through Debian codesearch shows users of FOPEN_NONSEEKABLE, and in particular GVFS which actually uses offset in its read and write handlers https://codesearch.debian.net/search?q=-%3Enonseekable+%3D https://gitlab.gnome.org/GNOME/gvfs/blob/1.40.0-6-gcbc54396/client/gvfsfusedaemon.c#L1080 https://gitlab.gnome.org/GNOME/gvfs/blob/1.40.0-6-gcbc54396/client/gvfsfusedaemon.c#L1247-1346 https://gitlab.gnome.org/GNOME/gvfs/blob/1.40.0-6-gcbc54396/client/gvfsfusedaemon.c#L1399-1481 so if we would do such a change it will break a real user. 5. Add stream_open and FOPEN_STREAM handling to stable kernels starting from v3.14+ (the kernel where 9c225f2655 first appeared). This will allow to patch OSSPD and other FUSE filesystems that provide stream-like files to return FOPEN_STREAM | FOPEN_NONSEEKABLE in their open handler and this way avoid the deadlock on all kernel versions. This should work because fs/fuse/ ignores unknown open flags returned from a filesystem and so passing FOPEN_STREAM to a kernel that is not aware of this flag cannot hurt. In turn the kernel that is not aware of FOPEN_STREAM will be < v3.14 where just FOPEN_NONSEEKABLE is sufficient to implement streams without read vs write deadlock. This patch adds stream_open, converts /proc/xen/xenbus to it and adds semantic patch to automatically locate in-kernel places that are either required to be converted due to read vs write deadlock, or that are just safe to be converted because read and write do not use ppos and there are no other funky methods in file_operations. Regarding semantic patch I've verified each generated change manually - that it is correct to convert - and each other nonseekable_open instance left - that it is either not correct to convert there, or that it is not converted due to current stream_open.cocci limitations. The script also does not convert files that should be valid to convert, but that currently have .llseek = noop_llseek or generic_file_llseek for unknown reason despite file being opened with nonseekable_open (e.g. drivers/input/mousedev.c) Cc: Michael Kerrisk Cc: Yongzhi Pan Cc: Jonathan Corbet Cc: David Vrabel Cc: Juergen Gross Cc: Miklos Szeredi Cc: Tejun Heo Cc: Kirill Tkhai Cc: Arnd Bergmann Cc: Christoph Hellwig Cc: Greg Kroah-Hartman Cc: Julia Lawall Cc: Nikolaus Rath Cc: Han-Wen Nienhuys Signed-off-by: Kirill Smelkov Signed-off-by: Linus Torvalds --- drivers/xen/xenbus/xenbus_dev_frontend.c | 4 +- fs/open.c | 18 ++ fs/read_write.c | 5 +- include/linux/fs.h | 4 + scripts/coccinelle/api/stream_open.cocci | 363 +++++++++++++++++++++++++++++++ 5 files changed, 389 insertions(+), 5 deletions(-) create mode 100644 scripts/coccinelle/api/stream_open.cocci diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index c3e201025ef0..0782ff3c2273 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -622,9 +622,7 @@ static int xenbus_file_open(struct inode *inode, struct file *filp) if (xen_store_evtchn == 0) return -ENOENT; - nonseekable_open(inode, filp); - - filp->f_mode &= ~FMODE_ATOMIC_POS; /* cdev-style semantics */ + stream_open(inode, filp); u = kzalloc(sizeof(*u), GFP_KERNEL); if (u == NULL) diff --git a/fs/open.c b/fs/open.c index f1c2f855fd43..a00350018a47 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1215,3 +1215,21 @@ int nonseekable_open(struct inode *inode, struct file *filp) } EXPORT_SYMBOL(nonseekable_open); + +/* + * stream_open is used by subsystems that want stream-like file descriptors. + * Such file descriptors are not seekable and don't have notion of position + * (file.f_pos is always 0). Contrary to file descriptors of other regular + * files, .read() and .write() can run simultaneously. + * + * stream_open never fails and is marked to return int so that it could be + * directly used as file_operations.open . + */ +int stream_open(struct inode *inode, struct file *filp) +{ + filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE | FMODE_ATOMIC_POS); + filp->f_mode |= FMODE_STREAM; + return 0; +} + +EXPORT_SYMBOL(stream_open); diff --git a/fs/read_write.c b/fs/read_write.c index 177ccc3d405a..61b43ad7608e 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -560,12 +560,13 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_ static inline loff_t file_pos_read(struct file *file) { - return file->f_pos; + return file->f_mode & FMODE_STREAM ? 0 : file->f_pos; } static inline void file_pos_write(struct file *file, loff_t pos) { - file->f_pos = pos; + if ((file->f_mode & FMODE_STREAM) == 0) + file->f_pos = pos; } ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count) diff --git a/include/linux/fs.h b/include/linux/fs.h index 8b42df09b04c..dd28e7679089 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -158,6 +158,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define FMODE_OPENED ((__force fmode_t)0x80000) #define FMODE_CREATED ((__force fmode_t)0x100000) +/* File is stream-like */ +#define FMODE_STREAM ((__force fmode_t)0x200000) + /* File was opened by fanotify and shouldn't generate fanotify events */ #define FMODE_NONOTIFY ((__force fmode_t)0x4000000) @@ -3074,6 +3077,7 @@ extern loff_t no_seek_end_llseek_size(struct file *, loff_t, int, loff_t); extern loff_t no_seek_end_llseek(struct file *, loff_t, int); extern int generic_file_open(struct inode * inode, struct file * filp); extern int nonseekable_open(struct inode * inode, struct file * filp); +extern int stream_open(struct inode * inode, struct file * filp); #ifdef CONFIG_BLOCK typedef void (dio_submit_t)(struct bio *bio, struct inode *inode, diff --git a/scripts/coccinelle/api/stream_open.cocci b/scripts/coccinelle/api/stream_open.cocci new file mode 100644 index 000000000000..350145da7669 --- /dev/null +++ b/scripts/coccinelle/api/stream_open.cocci @@ -0,0 +1,363 @@ +// SPDX-License-Identifier: GPL-2.0 +// Author: Kirill Smelkov (kirr@nexedi.com) +// +// Search for stream-like files that are using nonseekable_open and convert +// them to stream_open. A stream-like file is a file that does not use ppos in +// its read and write. Rationale for the conversion is to avoid deadlock in +// between read and write. + +virtual report +virtual patch +virtual explain // explain decisions in the patch (SPFLAGS="-D explain") + +// stream-like reader & writer - ones that do not depend on f_pos. +@ stream_reader @ +identifier readstream, ppos; +identifier f, buf, len; +type loff_t; +@@ + ssize_t readstream(struct file *f, char *buf, size_t len, loff_t *ppos) + { + ... when != ppos + } + +@ stream_writer @ +identifier writestream, ppos; +identifier f, buf, len; +type loff_t; +@@ + ssize_t writestream(struct file *f, const char *buf, size_t len, loff_t *ppos) + { + ... when != ppos + } + + +// a function that blocks +@ blocks @ +identifier block_f; +identifier wait_event =~ "^wait_event_.*"; +@@ + block_f(...) { + ... when exists + wait_event(...) + ... when exists + } + +// stream_reader that can block inside. +// +// XXX wait_* can be called not directly from current function (e.g. func -> f -> g -> wait()) +// XXX currently reader_blocks supports only direct and 1-level indirect cases. +@ reader_blocks_direct @ +identifier stream_reader.readstream; +identifier wait_event =~ "^wait_event_.*"; +@@ + readstream(...) + { + ... when exists + wait_event(...) + ... when exists + } + +@ reader_blocks_1 @ +identifier stream_reader.readstream; +identifier blocks.block_f; +@@ + readstream(...) + { + ... when exists + block_f(...) + ... when exists + } + +@ reader_blocks depends on reader_blocks_direct || reader_blocks_1 @ +identifier stream_reader.readstream; +@@ + readstream(...) { + ... + } + + +// file_operations + whether they have _any_ .read, .write, .llseek ... at all. +// +// XXX add support for file_operations xxx[N] = ... (sound/core/pcm_native.c) +@ fops0 @ +identifier fops; +@@ + struct file_operations fops = { + ... + }; + +@ has_read @ +identifier fops0.fops; +identifier read_f; +@@ + struct file_operations fops = { + .read = read_f, + }; + +@ has_read_iter @ +identifier fops0.fops; +identifier read_iter_f; +@@ + struct file_operations fops = { + .read_iter = read_iter_f, + }; + +@ has_write @ +identifier fops0.fops; +identifier write_f; +@@ + struct file_operations fops = { + .write = write_f, + }; + +@ has_write_iter @ +identifier fops0.fops; +identifier write_iter_f; +@@ + struct file_operations fops = { + .write_iter = write_iter_f, + }; + +@ has_llseek @ +identifier fops0.fops; +identifier llseek_f; +@@ + struct file_operations fops = { + .llseek = llseek_f, + }; + +@ has_no_llseek @ +identifier fops0.fops; +@@ + struct file_operations fops = { + .llseek = no_llseek, + }; + +@ has_mmap @ +identifier fops0.fops; +identifier mmap_f; +@@ + struct file_operations fops = { + .mmap = mmap_f, + }; + +@ has_copy_file_range @ +identifier fops0.fops; +identifier copy_file_range_f; +@@ + struct file_operations fops = { + .copy_file_range = copy_file_range_f, + }; + +@ has_remap_file_range @ +identifier fops0.fops; +identifier remap_file_range_f; +@@ + struct file_operations fops = { + .remap_file_range = remap_file_range_f, + }; + +@ has_splice_read @ +identifier fops0.fops; +identifier splice_read_f; +@@ + struct file_operations fops = { + .splice_read = splice_read_f, + }; + +@ has_splice_write @ +identifier fops0.fops; +identifier splice_write_f; +@@ + struct file_operations fops = { + .splice_write = splice_write_f, + }; + + +// file_operations that is candidate for stream_open conversion - it does not +// use mmap and other methods that assume @offset access to file. +// +// XXX for simplicity require no .{read/write}_iter and no .splice_{read/write} for now. +// XXX maybe_steam.fops cannot be used in other rules - it gives "bad rule maybe_stream or bad variable fops". +@ maybe_stream depends on (!has_llseek || has_no_llseek) && !has_mmap && !has_copy_file_range && !has_remap_file_range && !has_read_iter && !has_write_iter && !has_splice_read && !has_splice_write @ +identifier fops0.fops; +@@ + struct file_operations fops = { + }; + + +// ---- conversions ---- + +// XXX .open = nonseekable_open -> .open = stream_open +// XXX .open = func -> openfunc -> nonseekable_open + +// read & write +// +// if both are used in the same file_operations together with an opener - +// under that conditions we can use stream_open instead of nonseekable_open. +@ fops_rw depends on maybe_stream @ +identifier fops0.fops, openfunc; +identifier stream_reader.readstream; +identifier stream_writer.writestream; +@@ + struct file_operations fops = { + .open = openfunc, + .read = readstream, + .write = writestream, + }; + +@ report_rw depends on report @ +identifier fops_rw.openfunc; +position p1; +@@ + openfunc(...) { + <... + nonseekable_open@p1 + ...> + } + +@ script:python depends on report && reader_blocks @ +fops << fops0.fops; +p << report_rw.p1; +@@ +coccilib.report.print_report(p[0], + "ERROR: %s: .read() can deadlock .write(); change nonseekable_open -> stream_open to fix." % (fops,)) + +@ script:python depends on report && !reader_blocks @ +fops << fops0.fops; +p << report_rw.p1; +@@ +coccilib.report.print_report(p[0], + "WARNING: %s: .read() and .write() have stream semantic; safe to change nonseekable_open -> stream_open." % (fops,)) + + +@ explain_rw_deadlocked depends on explain && reader_blocks @ +identifier fops_rw.openfunc; +@@ + openfunc(...) { + <... +- nonseekable_open ++ nonseekable_open /* read & write (was deadlock) */ + ...> + } + + +@ explain_rw_nodeadlock depends on explain && !reader_blocks @ +identifier fops_rw.openfunc; +@@ + openfunc(...) { + <... +- nonseekable_open ++ nonseekable_open /* read & write (no direct deadlock) */ + ...> + } + +@ patch_rw depends on patch @ +identifier fops_rw.openfunc; +@@ + openfunc(...) { + <... +- nonseekable_open ++ stream_open + ...> + } + + +// read, but not write +@ fops_r depends on maybe_stream && !has_write @ +identifier fops0.fops, openfunc; +identifier stream_reader.readstream; +@@ + struct file_operations fops = { + .open = openfunc, + .read = readstream, + }; + +@ report_r depends on report @ +identifier fops_r.openfunc; +position p1; +@@ + openfunc(...) { + <... + nonseekable_open@p1 + ...> + } + +@ script:python depends on report @ +fops << fops0.fops; +p << report_r.p1; +@@ +coccilib.report.print_report(p[0], + "WARNING: %s: .read() has stream semantic; safe to change nonseekable_open -> stream_open." % (fops,)) + +@ explain_r depends on explain @ +identifier fops_r.openfunc; +@@ + openfunc(...) { + <... +- nonseekable_open ++ nonseekable_open /* read only */ + ...> + } + +@ patch_r depends on patch @ +identifier fops_r.openfunc; +@@ + openfunc(...) { + <... +- nonseekable_open ++ stream_open + ...> + } + + +// write, but not read +@ fops_w depends on maybe_stream && !has_read @ +identifier fops0.fops, openfunc; +identifier stream_writer.writestream; +@@ + struct file_operations fops = { + .open = openfunc, + .write = writestream, + }; + +@ report_w depends on report @ +identifier fops_w.openfunc; +position p1; +@@ + openfunc(...) { + <... + nonseekable_open@p1 + ...> + } + +@ script:python depends on report @ +fops << fops0.fops; +p << report_w.p1; +@@ +coccilib.report.print_report(p[0], + "WARNING: %s: .write() has stream semantic; safe to change nonseekable_open -> stream_open." % (fops,)) + +@ explain_w depends on explain @ +identifier fops_w.openfunc; +@@ + openfunc(...) { + <... +- nonseekable_open ++ nonseekable_open /* write only */ + ...> + } + +@ patch_w depends on patch @ +identifier fops_w.openfunc; +@@ + openfunc(...) { + <... +- nonseekable_open ++ stream_open + ...> + } + + +// no read, no write - don't change anything -- cgit v1.2.3 From c2f8d7cb32cd95e3005bed58ce02afa686b9f357 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 18 Mar 2019 22:56:15 +0100 Subject: Revert: parisc: Use F_EXTEND() macro in iosapic code Revert parts of commit 97d7e2e3fd8a ("parisc: Use F_EXTEND() macro in iosapic code"). It breaks booting the 32-bit kernel on some machines. Reported-by: Sven Schnelle Tested-by: Sven Schnelle Fixes: 97d7e2e3fd8a ("parisc: Use F_EXTEND() macro in iosapic code") Signed-off-by: Helge Deller --- drivers/parisc/iosapic.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/parisc/iosapic.c b/drivers/parisc/iosapic.c index 1be571c20062..6bad04cbb1d3 100644 --- a/drivers/parisc/iosapic.c +++ b/drivers/parisc/iosapic.c @@ -157,8 +157,12 @@ #define DBG_IRT(x...) #endif +#ifdef CONFIG_64BIT +#define COMPARE_IRTE_ADDR(irte, hpa) ((irte)->dest_iosapic_addr == (hpa)) +#else #define COMPARE_IRTE_ADDR(irte, hpa) \ - ((irte)->dest_iosapic_addr == F_EXTEND(hpa)) + ((irte)->dest_iosapic_addr == ((hpa) | 0xffffffff00000000ULL)) +#endif #define IOSAPIC_REG_SELECT 0x00 #define IOSAPIC_REG_WINDOW 0x10 -- cgit v1.2.3 From 45efd871bf0a47648f119d1b41467f70484de5bc Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Thu, 4 Apr 2019 18:16:03 +0200 Subject: parisc: regs_return_value() should return gpr28 While working on kretprobes for PA-RISC I was wondering while the kprobes sanity test always fails on kretprobes. This is caused by returning gpr20 instead of gpr28. Signed-off-by: Sven Schnelle Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # 4.14+ --- arch/parisc/include/asm/ptrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/include/asm/ptrace.h b/arch/parisc/include/asm/ptrace.h index 2a27b275ab09..4a87b3d600c6 100644 --- a/arch/parisc/include/asm/ptrace.h +++ b/arch/parisc/include/asm/ptrace.h @@ -22,7 +22,7 @@ unsigned long profile_pc(struct pt_regs *); static inline unsigned long regs_return_value(struct pt_regs *regs) { - return regs->gr[20]; + return regs->gr[28]; } static inline void instruction_pointer_set(struct pt_regs *regs, -- cgit v1.2.3 From f324fa58327791b2696628b31480e7e21c745706 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Thu, 4 Apr 2019 18:16:04 +0200 Subject: parisc: also set iaoq_b in instruction_pointer_set() When setting the instruction pointer on PA-RISC we also need to set the back of the instruction queue to the new offset, otherwise we will execute on instruction from the new location, and jumping back to the old location stored in iaoq_b. Signed-off-by: Sven Schnelle Signed-off-by: Helge Deller Fixes: 75ebedf1d263 ("parisc: Add HAVE_REGS_AND_STACK_ACCESS_API feature") Cc: stable@vger.kernel.org # 4.19+ --- arch/parisc/include/asm/ptrace.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/parisc/include/asm/ptrace.h b/arch/parisc/include/asm/ptrace.h index 4a87b3d600c6..9ff033d261ab 100644 --- a/arch/parisc/include/asm/ptrace.h +++ b/arch/parisc/include/asm/ptrace.h @@ -28,7 +28,8 @@ static inline unsigned long regs_return_value(struct pt_regs *regs) static inline void instruction_pointer_set(struct pt_regs *regs, unsigned long val) { - regs->iaoq[0] = val; + regs->iaoq[0] = val; + regs->iaoq[1] = val + 4; } /* Query offset/name of register from its name/offset */ -- cgit v1.2.3 From d006e95b5561f708d0385e9677ffe2c46f2ae345 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 2 Apr 2019 12:13:27 +0200 Subject: parisc: Detect QEMU earlier in boot process While adding LASI support to QEMU, I noticed that the QEMU detection in the kernel happens much too late. For example, when a LASI chip is found by the kernel, it registers the LASI LED driver as well. But when we run on QEMU it makes sense to avoid spending unnecessary CPU cycles, so we need to access the running_on_QEMU flag earlier than before. This patch now makes the QEMU detection the fist task of the Linux kernel by moving it to where the kernel enters the C-coding. Fixes: 310d82784fb4 ("parisc: qemu idle sleep support") Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v4.14+ --- arch/parisc/kernel/process.c | 6 ------ arch/parisc/kernel/setup.c | 3 +++ 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index eb39e7e380d7..841db71958cd 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -210,12 +210,6 @@ void __cpuidle arch_cpu_idle(void) static int __init parisc_idle_init(void) { - const char *marker; - - /* check QEMU/SeaBIOS marker in PAGE0 */ - marker = (char *) &PAGE0->pad0; - running_on_qemu = (memcmp(marker, "SeaBIOS", 8) == 0); - if (!running_on_qemu) cpu_idle_poll_ctrl(1); diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index 15dd9e21be7e..d908058d05c1 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -397,6 +397,9 @@ void __init start_parisc(void) int ret, cpunum; struct pdc_coproc_cfg coproc_cfg; + /* check QEMU/SeaBIOS marker in PAGE0 */ + running_on_qemu = (memcmp(&PAGE0->pad0, "SeaBIOS", 8) == 0); + cpunum = smp_processor_id(); init_cpu_topology(); -- cgit v1.2.3 From d7ee81ad09f072eab1681877fc71ec05f9c1ae92 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 3 Apr 2019 10:12:48 +0300 Subject: NFC: nci: Add some bounds checking in nci_hci_cmd_received() This is similar to commit 674d9de02aa7 ("NFC: Fix possible memory corruption when handling SHDLC I-Frame commands"). I'm not totally sure, but I think that commit description may have overstated the danger. I was under the impression that this data came from the firmware? If you can't trust your networking firmware, then you're already in trouble. Anyway, these days we add bounds checking where ever we can and we call it kernel hardening. Better safe than sorry. Fixes: 11f54f228643 ("NFC: nci: Add HCI over NCI protocol support") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- net/nfc/nci/hci.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index ddfc52ac1f9b..c0d323b58e73 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -312,6 +312,10 @@ static void nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe, create_info = (struct nci_hci_create_pipe_resp *)skb->data; dest_gate = create_info->dest_gate; new_pipe = create_info->pipe; + if (new_pipe >= NCI_HCI_MAX_PIPES) { + status = NCI_HCI_ANY_E_NOK; + goto exit; + } /* Save the new created pipe and bind with local gate, * the description for skb->data[3] is destination gate id @@ -336,6 +340,10 @@ static void nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe, goto exit; } delete_info = (struct nci_hci_delete_pipe_noti *)skb->data; + if (delete_info->pipe >= NCI_HCI_MAX_PIPES) { + status = NCI_HCI_ANY_E_NOK; + goto exit; + } ndev->hci_dev->pipes[delete_info->pipe].gate = NCI_HCI_INVALID_GATE; -- cgit v1.2.3 From 6491d698396fd5da4941980a35ca7c162a672016 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 3 Apr 2019 10:13:51 +0300 Subject: nfc: nci: Potential off by one in ->pipes[] array This is similar to commit e285d5bfb7e9 ("NFC: Fix the number of pipes") where we changed NFC_HCI_MAX_PIPES from 127 to 128. As the comment next to the define explains, the pipe identifier is 7 bits long. The highest possible pipe is 127, but the number of possible pipes is 128. As the code is now, then there is potential for an out of bounds array access: net/nfc/nci/hci.c:297 nci_hci_cmd_received() warn: array off by one? 'ndev->hci_dev->pipes[pipe]' '0-127 == 127' Fixes: 11f54f228643 ("NFC: nci: Add HCI over NCI protocol support") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- include/net/nfc/nci_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index 87499b6b35d6..df5c69db68af 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -166,7 +166,7 @@ struct nci_conn_info { * According to specification 102 622 chapter 4.4 Pipes, * the pipe identifier is 7 bits long. */ -#define NCI_HCI_MAX_PIPES 127 +#define NCI_HCI_MAX_PIPES 128 struct nci_hci_gate { u8 gate; -- cgit v1.2.3 From dd9a994fc68d196a052b73747e3366c57d14a09e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 4 Apr 2019 12:20:05 +0000 Subject: powerpc/vdso32: fix CLOCK_MONOTONIC on PPC64 Commit b5b4453e7912 ("powerpc/vdso64: Fix CLOCK_MONOTONIC inconsistencies across Y2038") changed the type of wtom_clock_sec to s64 on PPC64. Therefore, VDSO32 needs to read it with a 4 bytes shift in order to retrieve the lower part of it. Fixes: b5b4453e7912 ("powerpc/vdso64: Fix CLOCK_MONOTONIC inconsistencies across Y2038") Reported-by: Christian Zigotzky Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/vdso32/gettimeofday.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index 1e0bc5955a40..afd516b572f8 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -98,7 +98,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) * can be used, r7 contains NSEC_PER_SEC. */ - lwz r5,WTOM_CLOCK_SEC(r9) + lwz r5,(WTOM_CLOCK_SEC+LOPART)(r9) lwz r6,WTOM_CLOCK_NSEC(r9) /* We now have our offset in r5,r6. We create a fake dependency -- cgit v1.2.3 From 9dc6488e84b0f64df17672271664752488cd6a25 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Thu, 4 Apr 2019 10:58:01 +0800 Subject: libnvdimm/pmem: fix a possible OOB access when read and write pmem If offset is not zero and length is bigger than PAGE_SIZE, this will cause to out of boundary access to a page memory Fixes: 98cc093cba1e ("block, THP: make block_device_operations.rw_page support THP") Co-developed-by: Liang ZhiCheng Signed-off-by: Liang ZhiCheng Signed-off-by: Li RongQing Reviewed-by: Ira Weiny Reviewed-by: Jeff Moyer Signed-off-by: Dan Williams --- drivers/nvdimm/pmem.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index bc2f700feef8..0279eb1da3ef 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -113,13 +113,13 @@ static void write_pmem(void *pmem_addr, struct page *page, while (len) { mem = kmap_atomic(page); - chunk = min_t(unsigned int, len, PAGE_SIZE); + chunk = min_t(unsigned int, len, PAGE_SIZE - off); memcpy_flushcache(pmem_addr, mem + off, chunk); kunmap_atomic(mem); len -= chunk; off = 0; page++; - pmem_addr += PAGE_SIZE; + pmem_addr += chunk; } } @@ -132,7 +132,7 @@ static blk_status_t read_pmem(struct page *page, unsigned int off, while (len) { mem = kmap_atomic(page); - chunk = min_t(unsigned int, len, PAGE_SIZE); + chunk = min_t(unsigned int, len, PAGE_SIZE - off); rem = memcpy_mcsafe(mem + off, pmem_addr, chunk); kunmap_atomic(mem); if (rem) @@ -140,7 +140,7 @@ static blk_status_t read_pmem(struct page *page, unsigned int off, len -= chunk; off = 0; page++; - pmem_addr += PAGE_SIZE; + pmem_addr += chunk; } return BLK_STS_OK; } -- cgit v1.2.3 From ac0722f23ff5bc1b15e268564a4d56d35cd4a1b5 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 18 Mar 2019 11:05:21 +0100 Subject: dt-bindings: cpu: Fix JSON schema Commit fd73403a4862 ("dt-bindings: arm: Add SMP enable-method for Milbeaut") added support for a new cpu enable-method, but did so using tabulations to ident. This is however invalid in the syntax, and resulted in a failure when trying to use that schemas for validation. Use spaces instead of tabs to indent to fix this. Fixes: fd73403a4862 ("dt-bindings: arm: Add SMP enable-method for Milbeaut") Signed-off-by: Maxime Ripard Reviewed-by: Rob Herring Acked-by: Sugaya Taichi Signed-off-by: Olof Johansson --- Documentation/devicetree/bindings/arm/cpus.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/arm/cpus.yaml b/Documentation/devicetree/bindings/arm/cpus.yaml index 365dcf384d73..82dd7582e945 100644 --- a/Documentation/devicetree/bindings/arm/cpus.yaml +++ b/Documentation/devicetree/bindings/arm/cpus.yaml @@ -228,7 +228,7 @@ patternProperties: - renesas,r9a06g032-smp - rockchip,rk3036-smp - rockchip,rk3066-smp - - socionext,milbeaut-m10v-smp + - socionext,milbeaut-m10v-smp - ste,dbx500-smp cpu-release-addr: -- cgit v1.2.3 From fbe8758f931ff5468aaeb4b304fc3edb70c908d6 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Sun, 7 Apr 2019 15:18:41 -0700 Subject: Revert "ARM: dts: nomadik: Fix polarity of SPI CS" This reverts commit fa9463564e77067df81b0b8dec91adbbbc47bfb4. Per Linus Walleij: Dear ARM SoC maintainers, can you please revert this patch. It was the wrong solution to the wrong problem, and I must have acted in stress. Andrey fixed the real bug in a proper way in these commits: commit e5545c94e43b8f6599ffc01df8d1aedf18ee912a "gpio: of: Check propname before applying "cs-gpios" quirks" commit 7ce40277bf848391705011ba37eac2e377cbd9e6 "gpio: of: Check for "spi-cs-high" in child instead of parent node" Signed-off-by: Olof Johansson --- arch/arm/boot/dts/ste-nomadik-nhk15.dts | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/arm/boot/dts/ste-nomadik-nhk15.dts b/arch/arm/boot/dts/ste-nomadik-nhk15.dts index f2f6558a00f1..04066f9cb8a3 100644 --- a/arch/arm/boot/dts/ste-nomadik-nhk15.dts +++ b/arch/arm/boot/dts/ste-nomadik-nhk15.dts @@ -213,13 +213,12 @@ gpio-sck = <&gpio0 5 GPIO_ACTIVE_HIGH>; gpio-mosi = <&gpio0 4 GPIO_ACTIVE_HIGH>; /* - * This chipselect is active high. Just setting the flags - * to GPIO_ACTIVE_HIGH is not enough for the SPI DT bindings, - * it will be ignored, only the special "spi-cs-high" flag - * really counts. + * It's not actually active high, but the frameworks assume + * the polarity of the passed-in GPIO is "normal" (active + * high) then actively drives the line low to select the + * chip. */ cs-gpios = <&gpio0 6 GPIO_ACTIVE_HIGH>; - spi-cs-high; num-chipselects = <1>; /* -- cgit v1.2.3 From cd92d74d67c811dc22544430b9ac3029f5bd64c5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 25 Mar 2019 16:50:42 +0100 Subject: ARM: orion: don't use using 64-bit DMA masks clang warns about statically defined DMA masks from the DMA_BIT_MASK macro with length 64: arch/arm/plat-orion/common.c:625:29: error: shift count >= width of type [-Werror,-Wshift-count-overflow] .coherent_dma_mask = DMA_BIT_MASK(64), ^~~~~~~~~~~~~~~~ include/linux/dma-mapping.h:141:54: note: expanded from macro 'DMA_BIT_MASK' #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) The ones in orion shouldn't really be 64 bit masks, so changing them to what the driver can support avoids the warning. Signed-off-by: Arnd Bergmann Signed-off-by: Olof Johansson --- arch/arm/plat-orion/common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c index a6c81ce00f52..8647cb80a93b 100644 --- a/arch/arm/plat-orion/common.c +++ b/arch/arm/plat-orion/common.c @@ -622,7 +622,7 @@ static struct platform_device orion_xor0_shared = { .resource = orion_xor0_shared_resources, .dev = { .dma_mask = &orion_xor_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), .platform_data = &orion_xor0_pdata, }, }; @@ -683,7 +683,7 @@ static struct platform_device orion_xor1_shared = { .resource = orion_xor1_shared_resources, .dev = { .dma_mask = &orion_xor_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), .platform_data = &orion_xor1_pdata, }, }; -- cgit v1.2.3 From 2125801ccce19249708ca3245d48998e70569ab8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 25 Mar 2019 16:50:43 +0100 Subject: ARM: iop: don't use using 64-bit DMA masks clang warns about statically defined DMA masks from the DMA_BIT_MASK macro with length 64: arch/arm/mach-iop13xx/setup.c:303:35: error: shift count >= width of type [-Werror,-Wshift-count-overflow] static u64 iop13xx_adma_dmamask = DMA_BIT_MASK(64); ^~~~~~~~~~~~~~~~ include/linux/dma-mapping.h:141:54: note: expanded from macro 'DMA_BIT_MASK' #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) ^ ~~~ The ones in iop shouldn't really be 64 bit masks, so changing them to what the driver can support avoids the warning. Signed-off-by: Arnd Bergmann Signed-off-by: Olof Johansson --- arch/arm/mach-iop13xx/setup.c | 8 ++++---- arch/arm/mach-iop13xx/tpmi.c | 10 +++++----- arch/arm/plat-iop/adma.c | 6 +++--- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/arm/mach-iop13xx/setup.c b/arch/arm/mach-iop13xx/setup.c index 53c316f7301e..fe4932fda01d 100644 --- a/arch/arm/mach-iop13xx/setup.c +++ b/arch/arm/mach-iop13xx/setup.c @@ -300,7 +300,7 @@ static struct resource iop13xx_adma_2_resources[] = { } }; -static u64 iop13xx_adma_dmamask = DMA_BIT_MASK(64); +static u64 iop13xx_adma_dmamask = DMA_BIT_MASK(32); static struct iop_adma_platform_data iop13xx_adma_0_data = { .hw_id = 0, .pool_size = PAGE_SIZE, @@ -324,7 +324,7 @@ static struct platform_device iop13xx_adma_0_channel = { .resource = iop13xx_adma_0_resources, .dev = { .dma_mask = &iop13xx_adma_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), .platform_data = (void *) &iop13xx_adma_0_data, }, }; @@ -336,7 +336,7 @@ static struct platform_device iop13xx_adma_1_channel = { .resource = iop13xx_adma_1_resources, .dev = { .dma_mask = &iop13xx_adma_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), .platform_data = (void *) &iop13xx_adma_1_data, }, }; @@ -348,7 +348,7 @@ static struct platform_device iop13xx_adma_2_channel = { .resource = iop13xx_adma_2_resources, .dev = { .dma_mask = &iop13xx_adma_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), .platform_data = (void *) &iop13xx_adma_2_data, }, }; diff --git a/arch/arm/mach-iop13xx/tpmi.c b/arch/arm/mach-iop13xx/tpmi.c index db511ec2b1df..116feb6b261e 100644 --- a/arch/arm/mach-iop13xx/tpmi.c +++ b/arch/arm/mach-iop13xx/tpmi.c @@ -152,7 +152,7 @@ static struct resource iop13xx_tpmi_3_resources[] = { } }; -u64 iop13xx_tpmi_mask = DMA_BIT_MASK(64); +u64 iop13xx_tpmi_mask = DMA_BIT_MASK(32); static struct platform_device iop13xx_tpmi_0_device = { .name = "iop-tpmi", .id = 0, @@ -160,7 +160,7 @@ static struct platform_device iop13xx_tpmi_0_device = { .resource = iop13xx_tpmi_0_resources, .dev = { .dma_mask = &iop13xx_tpmi_mask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), }, }; @@ -171,7 +171,7 @@ static struct platform_device iop13xx_tpmi_1_device = { .resource = iop13xx_tpmi_1_resources, .dev = { .dma_mask = &iop13xx_tpmi_mask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), }, }; @@ -182,7 +182,7 @@ static struct platform_device iop13xx_tpmi_2_device = { .resource = iop13xx_tpmi_2_resources, .dev = { .dma_mask = &iop13xx_tpmi_mask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), }, }; @@ -193,7 +193,7 @@ static struct platform_device iop13xx_tpmi_3_device = { .resource = iop13xx_tpmi_3_resources, .dev = { .dma_mask = &iop13xx_tpmi_mask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), }, }; diff --git a/arch/arm/plat-iop/adma.c b/arch/arm/plat-iop/adma.c index a4d1f8de3b5b..d9612221e484 100644 --- a/arch/arm/plat-iop/adma.c +++ b/arch/arm/plat-iop/adma.c @@ -143,7 +143,7 @@ struct platform_device iop3xx_dma_0_channel = { .resource = iop3xx_dma_0_resources, .dev = { .dma_mask = &iop3xx_adma_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), .platform_data = (void *) &iop3xx_dma_0_data, }, }; @@ -155,7 +155,7 @@ struct platform_device iop3xx_dma_1_channel = { .resource = iop3xx_dma_1_resources, .dev = { .dma_mask = &iop3xx_adma_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), .platform_data = (void *) &iop3xx_dma_1_data, }, }; @@ -167,7 +167,7 @@ struct platform_device iop3xx_aau_channel = { .resource = iop3xx_aau_resources, .dev = { .dma_mask = &iop3xx_adma_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), .platform_data = (void *) &iop3xx_aau_data, }, }; -- cgit v1.2.3 From 9a8f32038a74cb800e9649afbf4b3dba2b7d6539 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 Mar 2019 22:19:16 +0100 Subject: ARM: milbeaut: fix build with !CONFIG_HOTPLUG_CPU When HOTPLUG_CPU is disabled, some fields in the smp operations are not available or needed: arch/arm/mach-milbeaut/platsmp.c:90:3: error: field designator 'cpu_die' does not refer to any field in type 'struct smp_operations' .cpu_die = m10v_cpu_die, ^ arch/arm/mach-milbeaut/platsmp.c:91:3: error: field designator 'cpu_kill' does not refer to any field in type 'struct smp_operations' .cpu_kill = m10v_cpu_kill, ^ Hide them in an #ifdef like the other platforms do. Fixes: 9fb29c734f9e ("ARM: milbeaut: Add basic support for Milbeaut m10v SoC") Signed-off-by: Arnd Bergmann Signed-off-by: Olof Johansson --- arch/arm/mach-milbeaut/platsmp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/mach-milbeaut/platsmp.c b/arch/arm/mach-milbeaut/platsmp.c index 591543c81399..3ea880f5fcb7 100644 --- a/arch/arm/mach-milbeaut/platsmp.c +++ b/arch/arm/mach-milbeaut/platsmp.c @@ -65,6 +65,7 @@ static void m10v_smp_init(unsigned int max_cpus) writel(KERNEL_UNBOOT_FLAG, m10v_smp_base + cpu * 4); } +#ifdef CONFIG_HOTPLUG_CPU static void m10v_cpu_die(unsigned int l_cpu) { gic_cpu_if_down(0); @@ -83,12 +84,15 @@ static int m10v_cpu_kill(unsigned int l_cpu) return 1; } +#endif static struct smp_operations m10v_smp_ops __initdata = { .smp_prepare_cpus = m10v_smp_init, .smp_boot_secondary = m10v_boot_secondary, +#ifdef CONFIG_HOTPLUG_CPU .cpu_die = m10v_cpu_die, .cpu_kill = m10v_cpu_kill, +#endif }; CPU_METHOD_OF_DECLARE(m10v_smp, "socionext,milbeaut-m10v-smp", &m10v_smp_ops); -- cgit v1.2.3 From 15ade5d2e7775667cf191cf2f94327a4889f8b9d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 7 Apr 2019 14:09:59 -1000 Subject: Linux 5.1-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 026fbc450906..15c8251d4d5e 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Shy Crocodile # *DOCUMENTATION* -- cgit v1.2.3 From b959ecf8f953701a19970e5db7e427c05143f303 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 5 Apr 2019 14:20:24 +0200 Subject: selftests: add a tc matchall test case This is a follow up of the commit 0db6f8befc32 ("net/sched: fix ->get helper of the matchall cls"). To test it: $ cd tools/testing/selftests/tc-testing $ ln -s ../plugin-lib/nsPlugin.py plugins/20-nsPlugin.py $ ./tdc.py -n -e 2638 Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- .../selftests/tc-testing/tc-tests/filters/tests.json | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json index 99a5ffca1088..2d096b2abf2c 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json @@ -18,6 +18,26 @@ "$TC qdisc del dev $DEV1 ingress" ] }, + { + "id": "2638", + "name": "Add matchall and try to get it", + "category": [ + "filter", + "matchall" + ], + "setup": [ + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 protocol all pref 1 ingress handle 0x1234 matchall action ok" + ], + "cmdUnderTest": "$TC filter get dev $DEV1 protocol all pref 1 ingress handle 0x1234 matchall", + "expExitCode": "0", + "verifyCmd": "$TC filter show dev $DEV1 ingress", + "matchPattern": "filter protocol all pref 1 matchall chain 0 handle 0x1234", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 clsact" + ] + }, { "id": "d052", "name": "Add 1M filters with the same action", -- cgit v1.2.3 From 196a66275520ffc27513c56ecc06a2d9450fd12f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Apr 2019 08:14:25 +0100 Subject: drm/i915/gvt: Annotate iomem usage Fix the sparse warning for blithely using iomem with normal memcpy: drivers/gpu/drm/i915/gvt/kvmgt.c:916:21: warning: incorrect type in assignment (different address spaces) drivers/gpu/drm/i915/gvt/kvmgt.c:916:21: expected void *aperture_va drivers/gpu/drm/i915/gvt/kvmgt.c:916:21: got void [noderef] * drivers/gpu/drm/i915/gvt/kvmgt.c:927:26: warning: incorrect type in argument 1 (different address spaces) drivers/gpu/drm/i915/gvt/kvmgt.c:927:26: expected void [noderef] *vaddr drivers/gpu/drm/i915/gvt/kvmgt.c:927:26: got void *aperture_va Fixes: d480b28a41a6 ("drm/i915/gvt: Fix aperture read/write emulation when enable x-no-mmap=on") Reviewed-by: Zhenyu Wang Signed-off-by: Chris Wilson Cc: Zhenyu Wang Cc: Changbin Du Cc: Zhi Wang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/kvmgt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index d5fcc447d22f..a68addf95c23 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -905,7 +905,7 @@ static inline bool intel_vgpu_in_aperture(struct intel_vgpu *vgpu, u64 off) static int intel_vgpu_aperture_rw(struct intel_vgpu *vgpu, u64 off, void *buf, unsigned long count, bool is_write) { - void *aperture_va; + void __iomem *aperture_va; if (!intel_vgpu_in_aperture(vgpu, off) || !intel_vgpu_in_aperture(vgpu, off + count)) { @@ -920,9 +920,9 @@ static int intel_vgpu_aperture_rw(struct intel_vgpu *vgpu, u64 off, return -EIO; if (is_write) - memcpy(aperture_va + offset_in_page(off), buf, count); + memcpy_toio(aperture_va + offset_in_page(off), buf, count); else - memcpy(buf, aperture_va + offset_in_page(off), count); + memcpy_fromio(buf, aperture_va + offset_in_page(off), count); io_mapping_unmap(aperture_va); -- cgit v1.2.3 From 968a85b19d0a79dd8ed85f39e23eacd34b503e72 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Apr 2019 08:30:56 +0100 Subject: drm/i915/gvt: Prevent use-after-free in ppgtt_free_all_spt() ppgtt_free_all_spt() iterates the radixtree as it is deleting it, forgoing all protection against the leaves being freed in the process (leaving the iter pointing into the void). A minimal fix seems to be to use the available post_shadow_list to decompose the tree into a list prior to destroying the radixtree. Alerted by the sparse warnings: drivers/gpu/drm/i915/gvt/gtt.c:757:9: warning: incorrect type in assignment (different address spaces) drivers/gpu/drm/i915/gvt/gtt.c:757:9: expected void **slot drivers/gpu/drm/i915/gvt/gtt.c:757:9: got void [noderef] ** drivers/gpu/drm/i915/gvt/gtt.c:757:9: warning: incorrect type in assignment (different address spaces) drivers/gpu/drm/i915/gvt/gtt.c:757:9: expected void **slot drivers/gpu/drm/i915/gvt/gtt.c:757:9: got void [noderef] ** drivers/gpu/drm/i915/gvt/gtt.c:758:45: warning: incorrect type in argument 1 (different address spaces) drivers/gpu/drm/i915/gvt/gtt.c:758:45: expected void [noderef] **slot drivers/gpu/drm/i915/gvt/gtt.c:758:45: got void **slot drivers/gpu/drm/i915/gvt/gtt.c:757:9: warning: incorrect type in argument 1 (different address spaces) drivers/gpu/drm/i915/gvt/gtt.c:757:9: expected void [noderef] **slot drivers/gpu/drm/i915/gvt/gtt.c:757:9: got void **slot drivers/gpu/drm/i915/gvt/gtt.c:757:9: warning: incorrect type in assignment (different address spaces) drivers/gpu/drm/i915/gvt/gtt.c:757:9: expected void **slot drivers/gpu/drm/i915/gvt/gtt.c:757:9: got void [noderef] ** This would also have been loudly warning if run through CI for the invalid RCU dereferences. Fixes: b6c126a39345 ("drm/i915/gvt: Manage shadow pages with radix tree") Reviewed-by: Zhenyu Wang Signed-off-by: Chris Wilson Cc: Changbin Du Cc: Zhenyu Wang Cc: Zhi Wang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index cf133ef03873..9814773882ec 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -750,14 +750,20 @@ static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt) static void ppgtt_free_all_spt(struct intel_vgpu *vgpu) { - struct intel_vgpu_ppgtt_spt *spt; + struct intel_vgpu_ppgtt_spt *spt, *spn; struct radix_tree_iter iter; - void **slot; + LIST_HEAD(all_spt); + void __rcu **slot; + rcu_read_lock(); radix_tree_for_each_slot(slot, &vgpu->gtt.spt_tree, &iter, 0) { spt = radix_tree_deref_slot(slot); - ppgtt_free_spt(spt); + list_move(&spt->post_shadow_list, &all_spt); } + rcu_read_unlock(); + + list_for_each_entry_safe(spt, spn, &all_spt, post_shadow_list) + ppgtt_free_spt(spt); } static int ppgtt_handle_guest_write_page_table_bytes( -- cgit v1.2.3 From fcf88917dd435c6a4cb2830cb086ee58605a1d85 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Sat, 6 Apr 2019 18:59:01 -0400 Subject: slab: fix a crash by reading /proc/slab_allocators The commit 510ded33e075 ("slab: implement slab_root_caches list") changes the name of the list node within "struct kmem_cache" from "list" to "root_caches_node", but leaks_show() still use the "list" which causes a crash when reading /proc/slab_allocators. You need to have CONFIG_SLAB=y and CONFIG_MEMCG=y to see the problem, because without MEMCG all slab caches are root caches, and the "list" node happens to be the right one. Fixes: 510ded33e075 ("slab: implement slab_root_caches list") Signed-off-by: Qian Cai Reviewed-by: Tobin C. Harding Cc: Tejun Heo Cc: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/slab.c b/mm/slab.c index 329bfe67f2ca..47a380a486ee 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -4308,7 +4308,8 @@ static void show_symbol(struct seq_file *m, unsigned long address) static int leaks_show(struct seq_file *m, void *p) { - struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list); + struct kmem_cache *cachep = list_entry(p, struct kmem_cache, + root_caches_node); struct page *page; struct kmem_cache_node *n; const char *name; -- cgit v1.2.3 From 5055376a3b44c4021de8830c9157f086a97731df Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Mon, 8 Apr 2019 10:04:20 +0800 Subject: net: vrf: Fix ping failed when vrf mtu is set to 0 When the mtu of a vrf device is set to 0, it would cause ping failed. So I think we should limit vrf mtu in a reasonable range to solve this problem. I set dev->min_mtu to IPV6_MIN_MTU, so it will works for both ipv4 and ipv6. And if dev->max_mtu still be 0 can be confusing, so I set dev->max_mtu to ETH_MAX_MTU. Here is the reproduce step: 1.Config vrf interface and set mtu to 0: 3: enp4s0: mtu 1500 qdisc fq_codel master vrf1 state UP mode DEFAULT group default qlen 1000 link/ether 52:54:00:9e:dd:c1 brd ff:ff:ff:ff:ff:ff 2.Ping peer: 3: enp4s0: mtu 1500 qdisc fq_codel master vrf1 state UP group default qlen 1000 link/ether 52:54:00:9e:dd:c1 brd ff:ff:ff:ff:ff:ff inet 10.0.0.1/16 scope global enp4s0 valid_lft forever preferred_lft forever connect: Network is unreachable 3.Set mtu to default value, ping works: PING 10.0.0.2 (10.0.0.2) 56(84) bytes of data. 64 bytes from 10.0.0.2: icmp_seq=1 ttl=64 time=1.88 ms Fixes: ad49bc6361ca2 ("net: vrf: remove MTU limits for vrf device") Signed-off-by: Miaohe Lin Reviewed-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 6d1a1abbed27..cd15c32b2e43 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1275,8 +1275,12 @@ static void vrf_setup(struct net_device *dev) dev->priv_flags |= IFF_NO_QUEUE; dev->priv_flags |= IFF_NO_RX_HANDLER; - dev->min_mtu = 0; - dev->max_mtu = 0; + /* VRF devices do not care about MTU, but if the MTU is set + * too low then the ipv4 and ipv6 protocols are disabled + * which breaks networking. + */ + dev->min_mtu = IPV6_MIN_MTU; + dev->max_mtu = ETH_MAX_MTU; } static int vrf_validate(struct nlattr *tb[], struct nlattr *data[], -- cgit v1.2.3 From 9b39b013037fbfa8d4b999345d9e904d8a336fc2 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 5 Apr 2019 13:17:13 +1000 Subject: drm/udl: add a release method and delay modeset teardown If we unplug a udl device, the usb callback with deinit the mode_config struct, however userspace will still have an open file descriptor and a framebuffer on that device. When userspace closes the fd, we'll oops because it'll try and look stuff up in the object idr which we've destroyed. This punts destroying the mode objects until release time instead. Cc: stable@vger.kernel.org Reviewed-by: Daniel Vetter Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20190405031715.5959-2-airlied@gmail.com --- drivers/gpu/drm/udl/udl_drv.c | 1 + drivers/gpu/drm/udl/udl_drv.h | 1 + drivers/gpu/drm/udl/udl_main.c | 8 +++++++- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/udl/udl_drv.c b/drivers/gpu/drm/udl/udl_drv.c index 22cd2d13e272..ff47f890e6ad 100644 --- a/drivers/gpu/drm/udl/udl_drv.c +++ b/drivers/gpu/drm/udl/udl_drv.c @@ -52,6 +52,7 @@ static struct drm_driver driver = { .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME, .load = udl_driver_load, .unload = udl_driver_unload, + .release = udl_driver_release, /* gem hooks */ .gem_free_object_unlocked = udl_gem_free_object, diff --git a/drivers/gpu/drm/udl/udl_drv.h b/drivers/gpu/drm/udl/udl_drv.h index e9e9b1ff678e..4ae67d882eae 100644 --- a/drivers/gpu/drm/udl/udl_drv.h +++ b/drivers/gpu/drm/udl/udl_drv.h @@ -104,6 +104,7 @@ void udl_urb_completion(struct urb *urb); int udl_driver_load(struct drm_device *dev, unsigned long flags); void udl_driver_unload(struct drm_device *dev); +void udl_driver_release(struct drm_device *dev); int udl_fbdev_init(struct drm_device *dev); void udl_fbdev_cleanup(struct drm_device *dev); diff --git a/drivers/gpu/drm/udl/udl_main.c b/drivers/gpu/drm/udl/udl_main.c index 9086d0d1b880..1f8ef34ade24 100644 --- a/drivers/gpu/drm/udl/udl_main.c +++ b/drivers/gpu/drm/udl/udl_main.c @@ -379,6 +379,12 @@ void udl_driver_unload(struct drm_device *dev) udl_free_urb_list(dev); udl_fbdev_cleanup(dev); - udl_modeset_cleanup(dev); kfree(udl); } + +void udl_driver_release(struct drm_device *dev) +{ + udl_modeset_cleanup(dev); + drm_dev_fini(dev); + kfree(dev); +} -- cgit v1.2.3 From 54f8844e3f6cf898450a6c85f70fa997f0aa72b9 Mon Sep 17 00:00:00 2001 From: Ranjani Sridharan Date: Thu, 4 Apr 2019 19:48:33 -0700 Subject: ASoC: topology: Use the correct dobj to free enum control values and texts The control values and texts of the enum kcontrol associated with a widget need to be freed when the widget is removed. However, both struct snd_soc_dapm_widget and struct soc_enum contain a dobj member, which resulted in a confusion. The existing code generates a null pointer dereference by attempting to free the values and texts from the dobj which belongs to the widget instead of the dobj belonging to the enum kcontrol. The suggested fix is to use the correct dobj member (se->dobj) of the enum kcontrol. Signed-off-by: Ranjani Sridharan Signed-off-by: Mark Brown --- sound/soc/soc-topology.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 25fca7055464..96852d250619 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -482,10 +482,11 @@ static void remove_widget(struct snd_soc_component *comp, snd_ctl_remove(card, kcontrol); - kfree(dobj->control.dvalues); + /* free enum kcontrol's dvalues and dtexts */ + kfree(se->dobj.control.dvalues); for (j = 0; j < se->items; j++) - kfree(dobj->control.dtexts[j]); - kfree(dobj->control.dtexts); + kfree(se->dobj.control.dtexts[j]); + kfree(se->dobj.control.dtexts); kfree(se); kfree(w->kcontrol_news[i].name); -- cgit v1.2.3 From 17d3069ccf06970e2db3f7cbf4335f207524279e Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Fri, 5 Apr 2019 11:19:11 +0200 Subject: ASoC: stm32: fix sai driver name initialisation This patch fixes the sai driver structure overwriting which results in a cpu dai name equal NULL. Fixes: 3e086ed ("ASoC: stm32: add SAI driver") Signed-off-by: Arnaud Pouliquen Signed-off-by: Mark Brown --- sound/soc/stm/stm32_sai_sub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c index 55d802f51c15..83d8a7ac56f4 100644 --- a/sound/soc/stm/stm32_sai_sub.c +++ b/sound/soc/stm/stm32_sai_sub.c @@ -1419,7 +1419,6 @@ static int stm32_sai_sub_dais_init(struct platform_device *pdev, if (!sai->cpu_dai_drv) return -ENOMEM; - sai->cpu_dai_drv->name = dev_name(&pdev->dev); if (STM_SAI_IS_PLAYBACK(sai)) { memcpy(sai->cpu_dai_drv, &stm32_sai_playback_dai, sizeof(stm32_sai_playback_dai)); @@ -1429,6 +1428,7 @@ static int stm32_sai_sub_dais_init(struct platform_device *pdev, sizeof(stm32_sai_capture_dai)); sai->cpu_dai_drv->capture.stream_name = sai->cpu_dai_drv->name; } + sai->cpu_dai_drv->name = dev_name(&pdev->dev); return 0; } -- cgit v1.2.3 From 678cce4019d746da6c680c48ba9e6d417803e127 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 31 Mar 2019 13:04:11 -0700 Subject: crypto: x86/poly1305 - fix overflow during partial reduction The x86_64 implementation of Poly1305 produces the wrong result on some inputs because poly1305_4block_avx2() incorrectly assumes that when partially reducing the accumulator, the bits carried from limb 'd4' to limb 'h0' fit in a 32-bit integer. This is true for poly1305-generic which processes only one block at a time. However, it's not true for the AVX2 implementation, which processes 4 blocks at a time and therefore can produce intermediate limbs about 4x larger. Fix it by making the relevant calculations use 64-bit arithmetic rather than 32-bit. Note that most of the carries already used 64-bit arithmetic, but the d4 -> h0 carry was different for some reason. To be safe I also made the same change to the corresponding SSE2 code, though that only operates on 1 or 2 blocks at a time. I don't think it's really needed for poly1305_block_sse2(), but it doesn't hurt because it's already x86_64 code. It *might* be needed for poly1305_2block_sse2(), but overflows aren't easy to reproduce there. This bug was originally detected by my patches that improve testmgr to fuzz algorithms against their generic implementation. But also add a test vector which reproduces it directly (in the AVX2 case). Fixes: b1ccc8f4b631 ("crypto: poly1305 - Add a four block AVX2 variant for x86_64") Fixes: c70f4abef07a ("crypto: poly1305 - Add a SSE2 SIMD variant for x86_64") Cc: # v4.3+ Cc: Martin Willi Cc: Jason A. Donenfeld Signed-off-by: Eric Biggers Reviewed-by: Martin Willi Signed-off-by: Herbert Xu --- arch/x86/crypto/poly1305-avx2-x86_64.S | 14 +++++++---- arch/x86/crypto/poly1305-sse2-x86_64.S | 22 ++++++++++------- crypto/testmgr.h | 44 +++++++++++++++++++++++++++++++++- 3 files changed, 67 insertions(+), 13 deletions(-) diff --git a/arch/x86/crypto/poly1305-avx2-x86_64.S b/arch/x86/crypto/poly1305-avx2-x86_64.S index 3b6e70d085da..8457cdd47f75 100644 --- a/arch/x86/crypto/poly1305-avx2-x86_64.S +++ b/arch/x86/crypto/poly1305-avx2-x86_64.S @@ -323,6 +323,12 @@ ENTRY(poly1305_4block_avx2) vpaddq t2,t1,t1 vmovq t1x,d4 + # Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 -> + # h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small + # amount. Careful: we must not assume the carry bits 'd0 >> 26', + # 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit + # integers. It's true in a single-block implementation, but not here. + # d1 += d0 >> 26 mov d0,%rax shr $26,%rax @@ -361,16 +367,16 @@ ENTRY(poly1305_4block_avx2) # h0 += (d4 >> 26) * 5 mov d4,%rax shr $26,%rax - lea (%eax,%eax,4),%eax - add %eax,%ebx + lea (%rax,%rax,4),%rax + add %rax,%rbx # h4 = d4 & 0x3ffffff mov d4,%rax and $0x3ffffff,%eax mov %eax,h4 # h1 += h0 >> 26 - mov %ebx,%eax - shr $26,%eax + mov %rbx,%rax + shr $26,%rax add %eax,h1 # h0 = h0 & 0x3ffffff andl $0x3ffffff,%ebx diff --git a/arch/x86/crypto/poly1305-sse2-x86_64.S b/arch/x86/crypto/poly1305-sse2-x86_64.S index e6add74d78a5..6f0be7a86964 100644 --- a/arch/x86/crypto/poly1305-sse2-x86_64.S +++ b/arch/x86/crypto/poly1305-sse2-x86_64.S @@ -253,16 +253,16 @@ ENTRY(poly1305_block_sse2) # h0 += (d4 >> 26) * 5 mov d4,%rax shr $26,%rax - lea (%eax,%eax,4),%eax - add %eax,%ebx + lea (%rax,%rax,4),%rax + add %rax,%rbx # h4 = d4 & 0x3ffffff mov d4,%rax and $0x3ffffff,%eax mov %eax,h4 # h1 += h0 >> 26 - mov %ebx,%eax - shr $26,%eax + mov %rbx,%rax + shr $26,%rax add %eax,h1 # h0 = h0 & 0x3ffffff andl $0x3ffffff,%ebx @@ -524,6 +524,12 @@ ENTRY(poly1305_2block_sse2) paddq t2,t1 movq t1,d4 + # Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 -> + # h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small + # amount. Careful: we must not assume the carry bits 'd0 >> 26', + # 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit + # integers. It's true in a single-block implementation, but not here. + # d1 += d0 >> 26 mov d0,%rax shr $26,%rax @@ -562,16 +568,16 @@ ENTRY(poly1305_2block_sse2) # h0 += (d4 >> 26) * 5 mov d4,%rax shr $26,%rax - lea (%eax,%eax,4),%eax - add %eax,%ebx + lea (%rax,%rax,4),%rax + add %rax,%rbx # h4 = d4 & 0x3ffffff mov d4,%rax and $0x3ffffff,%eax mov %eax,h4 # h1 += h0 >> 26 - mov %ebx,%eax - shr $26,%eax + mov %rbx,%rax + shr $26,%rax add %eax,h1 # h0 = h0 & 0x3ffffff andl $0x3ffffff,%ebx diff --git a/crypto/testmgr.h b/crypto/testmgr.h index f267633cf13a..d18a37629f05 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -5634,7 +5634,49 @@ static const struct hash_testvec poly1305_tv_template[] = { .psize = 80, .digest = "\x13\x00\x00\x00\x00\x00\x00\x00" "\x00\x00\x00\x00\x00\x00\x00\x00", - }, + }, { /* Regression test for overflow in AVX2 implementation */ + .plaintext = "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff", + .psize = 300, + .digest = "\xfb\x5e\x96\xd8\x61\xd5\xc7\xc8" + "\x78\xe5\x87\xcc\x2d\x5a\x22\xe1", + } }; /* NHPoly1305 test vectors from https://github.com/google/adiantum */ -- cgit v1.2.3 From b4ed6b51f356224c6c71540ed94087f7f09b84af Mon Sep 17 00:00:00 2001 From: Ranjani Sridharan Date: Fri, 5 Apr 2019 09:57:08 -0700 Subject: ASoC: core: conditionally increase module refcount on component open Recently, for Intel platforms the "ignore_module_refcount" field was introduced for the component driver. In order to avoid a deadlock preventing the PCI modules from being removed even when the card was idle, the refcounts were not incremented for the device driver module during component probe. However, this change introduced a nasty side effect: the device driver module can be unloaded while a pcm stream is open. This patch proposes to change the field to be renamed as "module_get_upon_open". When this field is set, the module refcount should be incremented on pcm open amd decremented upon pcm close. This will enable modules to be removed when no PCM playback/capture happens and prevent removal when the component is actually in use. Also, align with the skylake component driver with the new name. Fixes: b450b878('ASoC: core: don't increase component module refcount unconditionally' Signed-off-by: Ranjani Sridharan Acked-by: Pierre-Louis Bossart Signed-off-by: Mark Brown --- include/sound/soc.h | 9 +++++++-- sound/soc/intel/skylake/skl-pcm.c | 2 +- sound/soc/soc-core.c | 4 ++-- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/include/sound/soc.h b/include/sound/soc.h index 1e2be35ed36f..482b4ea87c3c 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -802,8 +802,13 @@ struct snd_soc_component_driver { int probe_order; int remove_order; - /* signal if the module handling the component cannot be removed */ - unsigned int ignore_module_refcount:1; + /* + * signal if the module handling the component should not be removed + * if a pcm is open. Setting this would prevent the module + * refcount being incremented in probe() but allow it be incremented + * when a pcm is opened and decremented when it is closed. + */ + unsigned int module_get_upon_open:1; /* bits */ unsigned int idle_bias_on:1; diff --git a/sound/soc/intel/skylake/skl-pcm.c b/sound/soc/intel/skylake/skl-pcm.c index 57031b6d4d45..9735e2412251 100644 --- a/sound/soc/intel/skylake/skl-pcm.c +++ b/sound/soc/intel/skylake/skl-pcm.c @@ -1475,7 +1475,7 @@ static const struct snd_soc_component_driver skl_component = { .ops = &skl_platform_ops, .pcm_new = skl_pcm_new, .pcm_free = skl_pcm_free, - .ignore_module_refcount = 1, /* do not increase the refcount in core */ + .module_get_upon_open = 1, /* increment refcount when a pcm is opened */ }; int skl_platform_register(struct device *dev) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index d88757659729..46e3ab0fced4 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -947,7 +947,7 @@ static void soc_cleanup_component(struct snd_soc_component *component) snd_soc_dapm_free(snd_soc_component_get_dapm(component)); soc_cleanup_component_debugfs(component); component->card = NULL; - if (!component->driver->ignore_module_refcount) + if (!component->driver->module_get_upon_open) module_put(component->dev->driver->owner); } @@ -1381,7 +1381,7 @@ static int soc_probe_component(struct snd_soc_card *card, return 0; } - if (!component->driver->ignore_module_refcount && + if (!component->driver->module_get_upon_open && !try_module_get(component->dev->driver->owner)) return -ENODEV; -- cgit v1.2.3 From 52034add758e268c39110f33d46e2a9492e82aef Mon Sep 17 00:00:00 2001 From: Ranjani Sridharan Date: Fri, 5 Apr 2019 09:57:09 -0700 Subject: ASoC: pcm: update module refcount if module_get_upon_open is set Setting the module_get_upon_open field for component driver prevents the module refcount from being incremented during component probe(). This could lead to the module being allowed to be unloaded when a pcm stream is open. So, if this field is set, the module's refcount should be incremented during pcm open to prevent module removal when the component is in use. And, the refcount should be decremented upon pcm close. Signed-off-by: Ranjani Sridharan Acked-by: Pierre-Louis Bossart Signed-off-by: Mark Brown --- sound/soc/soc-pcm.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 2d5d5cac4ba6..d21247546f7f 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -463,6 +464,9 @@ static int soc_pcm_components_close(struct snd_pcm_substream *substream, continue; component->driver->ops->close(substream); + + if (component->driver->module_get_upon_open) + module_put(component->dev->driver->owner); } return 0; @@ -513,6 +517,10 @@ static int soc_pcm_open(struct snd_pcm_substream *substream) !component->driver->ops->open) continue; + if (component->driver->module_get_upon_open && + !try_module_get(component->dev->driver->owner)) + return -ENODEV; + ret = component->driver->ops->open(substream); if (ret < 0) { dev_err(component->dev, -- cgit v1.2.3 From 1a07a94b47b1f528f39c3e6187b5eaf02efe44ea Mon Sep 17 00:00:00 2001 From: Ondrej Jirman Date: Sat, 6 Apr 2019 01:30:48 +0200 Subject: drm/sun4i: tcon top: Fix NULL/invalid pointer dereference in sun8i_tcon_top_un/bind There are two problems here: 1. Not all clk_data->hws[] need to be initialized, depending on various configured quirks. This leads to NULL ptr deref in clk_hw_unregister_gate() in sun8i_tcon_top_unbind() 2. If there is error when registering the clk_data->hws[], err_unregister_gates error path will try to unregister IS_ERR()=true (invalid) pointer. For problem (1) I have this stack trace: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008 Call trace: clk_hw_unregister+0x8/0x18 clk_hw_unregister_gate+0x14/0x28 sun8i_tcon_top_unbind+0x2c/0x60 component_unbind.isra.4+0x2c/0x50 component_bind_all+0x1d4/0x230 sun4i_drv_bind+0xc4/0x1a0 try_to_bring_up_master+0x164/0x1c0 __component_add+0xa0/0x168 component_add+0x10/0x18 sun8i_dw_hdmi_probe+0x18/0x20 platform_drv_probe+0x3c/0x70 really_probe+0xcc/0x278 driver_probe_device+0x34/0xa8 Problem (2) was identified by head scratching. Signed-off-by: Ondrej Jirman Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20190405233048.3823-1-megous@megous.com --- drivers/gpu/drm/sun4i/sun8i_tcon_top.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun8i_tcon_top.c b/drivers/gpu/drm/sun4i/sun8i_tcon_top.c index fc36e0c10a37..b1e7c76e9c17 100644 --- a/drivers/gpu/drm/sun4i/sun8i_tcon_top.c +++ b/drivers/gpu/drm/sun4i/sun8i_tcon_top.c @@ -227,7 +227,7 @@ static int sun8i_tcon_top_bind(struct device *dev, struct device *master, err_unregister_gates: for (i = 0; i < CLK_NUM; i++) - if (clk_data->hws[i]) + if (!IS_ERR_OR_NULL(clk_data->hws[i])) clk_hw_unregister_gate(clk_data->hws[i]); clk_disable_unprepare(tcon_top->bus); err_assert_reset: @@ -245,7 +245,8 @@ static void sun8i_tcon_top_unbind(struct device *dev, struct device *master, of_clk_del_provider(dev->of_node); for (i = 0; i < CLK_NUM; i++) - clk_hw_unregister_gate(clk_data->hws[i]); + if (clk_data->hws[i]) + clk_hw_unregister_gate(clk_data->hws[i]); clk_disable_unprepare(tcon_top->bus); reset_control_assert(tcon_top->rst); -- cgit v1.2.3 From a58d7525b8014115d57fd30186a84f6d30783f2c Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Tue, 12 Mar 2019 10:51:40 +0100 Subject: cfg80211: add ratelimited variants of err and warn wiphy_{err,warn}_ratelimited will be used by rt2x00 Signed-off-by: Stanislaw Gruszka Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index bb307a11ee63..13bfeb712d36 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -7183,6 +7183,11 @@ void cfg80211_pmsr_complete(struct wireless_dev *wdev, #define wiphy_info(wiphy, format, args...) \ dev_info(&(wiphy)->dev, format, ##args) +#define wiphy_err_ratelimited(wiphy, format, args...) \ + dev_err_ratelimited(&(wiphy)->dev, format, ##args) +#define wiphy_warn_ratelimited(wiphy, format, args...) \ + dev_warn_ratelimited(&(wiphy)->dev, format, ##args) + #define wiphy_debug(wiphy, format, args...) \ wiphy_printk(KERN_DEBUG, wiphy, format, ##args) -- cgit v1.2.3 From 45fcef8b727b6f171bc5443e8153181a367d7a15 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 29 Mar 2019 08:56:22 +0100 Subject: mac80211_hwsim: calculate if_combination.max_interfaces If we just set this to 2048, and have multiple limits you can select from, the total number might run over and cause a warning in cfg80211. This doesn't make sense, so we just calculate the total max_interfaces now. Reported-by: syzbot+8f91bd563bbff230d0ee@syzkaller.appspotmail.com Fixes: 99e3a44bac37 ("mac80211_hwsim: allow setting iftype support") Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 0838af04d681..524eb5805995 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -2644,7 +2644,7 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, enum nl80211_band band; const struct ieee80211_ops *ops = &mac80211_hwsim_ops; struct net *net; - int idx; + int idx, i; int n_limits = 0; if (WARN_ON(param->channels > 1 && !param->use_chanctx)) @@ -2768,12 +2768,23 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, goto failed_hw; } + data->if_combination.max_interfaces = 0; + for (i = 0; i < n_limits; i++) + data->if_combination.max_interfaces += + data->if_limits[i].max; + data->if_combination.n_limits = n_limits; - data->if_combination.max_interfaces = 2048; data->if_combination.limits = data->if_limits; - hw->wiphy->iface_combinations = &data->if_combination; - hw->wiphy->n_iface_combinations = 1; + /* + * If we actually were asked to support combinations, + * advertise them - if there's only a single thing like + * only IBSS then don't advertise it as combinations. + */ + if (data->if_combination.max_interfaces > 1) { + hw->wiphy->iface_combinations = &data->if_combination; + hw->wiphy->n_iface_combinations = 1; + } if (param->ciphers) { memcpy(data->ciphers, param->ciphers, -- cgit v1.2.3 From 2b4a66980217332d91ab1785e1750857d6d52bc8 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 18 Mar 2019 12:00:58 +0100 Subject: mac80211: make ieee80211_schedule_txq schedule empty TXQs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently there is no way for the driver to signal to mac80211 that it should schedule a TXQ even if there are no packets on the mac80211 part of that queue. This is problematic if the driver has an internal retry queue to deal with software A-MPDU retry. This patch changes the behavior of ieee80211_schedule_txq to always schedule the queue, as its only user (ath9k) seems to expect such behavior already: it calls this function on tx status and on powersave wakeup whenever its internal retry queue is not empty. Also add an extra argument to ieee80211_return_txq to get the same behavior. This fixes an issue on ath9k where tx queues with packets to retry (and no new packets in mac80211) would not get serviced. Fixes: 89cea7493a346 ("ath9k: Switch to mac80211 TXQ scheduling and airtime APIs") Signed-off-by: Felix Fietkau Acked-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath10k/htt_rx.c | 2 +- drivers/net/wireless/ath/ath10k/mac.c | 4 ++-- drivers/net/wireless/ath/ath9k/xmit.c | 5 ++++- include/net/mac80211.h | 24 ++++++++++++++++++++---- net/mac80211/tx.c | 10 ++++++---- 5 files changed, 33 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index a20ea270d519..1acc622d2183 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -2728,7 +2728,7 @@ static void ath10k_htt_rx_tx_fetch_ind(struct ath10k *ar, struct sk_buff *skb) num_msdus++; num_bytes += ret; } - ieee80211_return_txq(hw, txq); + ieee80211_return_txq(hw, txq, false); ieee80211_txq_schedule_end(hw, txq->ac); record->num_msdus = cpu_to_le16(num_msdus); diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index b73c23d4ce86..41e89db244d2 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -4089,7 +4089,7 @@ static int ath10k_mac_schedule_txq(struct ieee80211_hw *hw, u32 ac) if (ret < 0) break; } - ieee80211_return_txq(hw, txq); + ieee80211_return_txq(hw, txq, false); ath10k_htt_tx_txq_update(hw, txq); if (ret == -EBUSY) break; @@ -4374,7 +4374,7 @@ static void ath10k_mac_op_wake_tx_queue(struct ieee80211_hw *hw, if (ret < 0) break; } - ieee80211_return_txq(hw, txq); + ieee80211_return_txq(hw, txq, false); ath10k_htt_tx_txq_update(hw, txq); out: ieee80211_txq_schedule_end(hw, ac); diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index 773d428ff1b0..b17e1ca40995 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -1938,12 +1938,15 @@ void ath_txq_schedule(struct ath_softc *sc, struct ath_txq *txq) goto out; while ((queue = ieee80211_next_txq(hw, txq->mac80211_qnum))) { + bool force; + tid = (struct ath_atx_tid *)queue->drv_priv; ret = ath_tx_sched_aggr(sc, txq, tid); ath_dbg(common, QUEUE, "ath_tx_sched_aggr returned %d\n", ret); - ieee80211_return_txq(hw, queue); + force = !skb_queue_empty(&tid->retry_q); + ieee80211_return_txq(hw, queue, force); } out: diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 616998252dc7..112dc18c658f 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -6253,26 +6253,42 @@ static inline void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac) { } +void __ieee80211_schedule_txq(struct ieee80211_hw *hw, + struct ieee80211_txq *txq, bool force); + /** * ieee80211_schedule_txq - schedule a TXQ for transmission * * @hw: pointer as obtained from ieee80211_alloc_hw() * @txq: pointer obtained from station or virtual interface * - * Schedules a TXQ for transmission if it is not already scheduled. + * Schedules a TXQ for transmission if it is not already scheduled, + * even if mac80211 does not have any packets buffered. + * + * The driver may call this function if it has buffered packets for + * this TXQ internally. */ -void ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq); +static inline void +ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq) +{ + __ieee80211_schedule_txq(hw, txq, true); +} /** * ieee80211_return_txq - return a TXQ previously acquired by ieee80211_next_txq() * * @hw: pointer as obtained from ieee80211_alloc_hw() * @txq: pointer obtained from station or virtual interface + * @force: schedule txq even if mac80211 does not have any buffered packets. + * + * The driver may set force=true if it has buffered packets for this TXQ + * internally. */ static inline void -ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq) +ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq, + bool force) { - ieee80211_schedule_txq(hw, txq); + __ieee80211_schedule_txq(hw, txq, force); } /** diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 134a3da147c6..2e816dd67be7 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3688,8 +3688,9 @@ out: } EXPORT_SYMBOL(ieee80211_next_txq); -void ieee80211_schedule_txq(struct ieee80211_hw *hw, - struct ieee80211_txq *txq) +void __ieee80211_schedule_txq(struct ieee80211_hw *hw, + struct ieee80211_txq *txq, + bool force) { struct ieee80211_local *local = hw_to_local(hw); struct txq_info *txqi = to_txq_info(txq); @@ -3697,7 +3698,8 @@ void ieee80211_schedule_txq(struct ieee80211_hw *hw, spin_lock_bh(&local->active_txq_lock[txq->ac]); if (list_empty(&txqi->schedule_order) && - (!skb_queue_empty(&txqi->frags) || txqi->tin.backlog_packets)) { + (force || !skb_queue_empty(&txqi->frags) || + txqi->tin.backlog_packets)) { /* If airtime accounting is active, always enqueue STAs at the * head of the list to ensure that they only get moved to the * back by the airtime DRR scheduler once they have a negative @@ -3717,7 +3719,7 @@ void ieee80211_schedule_txq(struct ieee80211_hw *hw, spin_unlock_bh(&local->active_txq_lock[txq->ac]); } -EXPORT_SYMBOL(ieee80211_schedule_txq); +EXPORT_SYMBOL(__ieee80211_schedule_txq); bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw, struct ieee80211_txq *txq) -- cgit v1.2.3 From 7100e8704b61247649c50551b965e71d168df30b Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 29 Mar 2019 17:42:57 +1000 Subject: powerpc/64s/radix: Fix radix segment exception handling Commit 48e7b76957 ("powerpc/64s/hash: Convert SLB miss handlers to C") broke the radix-mode segment exception handler. In radix mode, this is exception is not an SLB miss, rather it signals that the EA is outside the range translated by any page table. The commit lost the radix feature alternate code patch, which can cause faults to some EAs to kernel BUG at arch/powerpc/mm/slb.c:639! The original radix code would send faults to slb_miss_large_addr, which would end up faulting due to slb_addr_limit being 0. This patch sends radix directly to do_bad_slb_fault, which is a bit clearer. Fixes: 48e7b7695745 ("powerpc/64s/hash: Convert SLB miss handlers to C") Cc: stable@vger.kernel.org # v4.20+ Reported-by: Anton Blanchard Signed-off-by: Nicholas Piggin Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/exceptions-64s.S | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index a5b8fbae56a0..9481a117e242 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -656,11 +656,17 @@ EXC_COMMON_BEGIN(data_access_slb_common) ld r4,PACA_EXSLB+EX_DAR(r13) std r4,_DAR(r1) addi r3,r1,STACK_FRAME_OVERHEAD +BEGIN_MMU_FTR_SECTION + /* HPT case, do SLB fault */ bl do_slb_fault cmpdi r3,0 bne- 1f b fast_exception_return 1: /* Error case */ +MMU_FTR_SECTION_ELSE + /* Radix case, access is outside page table range */ + li r3,-EFAULT +ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) std r3,RESULT(r1) bl save_nvgprs RECONCILE_IRQ_STATE(r10, r11) @@ -705,11 +711,17 @@ EXC_COMMON_BEGIN(instruction_access_slb_common) EXCEPTION_PROLOG_COMMON(0x480, PACA_EXSLB) ld r4,_NIP(r1) addi r3,r1,STACK_FRAME_OVERHEAD +BEGIN_MMU_FTR_SECTION + /* HPT case, do SLB fault */ bl do_slb_fault cmpdi r3,0 bne- 1f b fast_exception_return 1: /* Error case */ +MMU_FTR_SECTION_ELSE + /* Radix case, access is outside page table range */ + li r3,-EFAULT +ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) std r3,RESULT(r1) bl save_nvgprs RECONCILE_IRQ_STATE(r10, r11) -- cgit v1.2.3 From cae30527901d9590db0e12ace994c1d58bea87fd Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Mon, 8 Apr 2019 15:58:11 +0800 Subject: ALSA: hda - Add two more machines to the power_save_blacklist Recently we set CONFIG_SND_HDA_POWER_SAVE_DEFAULT to 1 when configuring the kernel, then two machines were reported to have noise after installing the new kernel. Put them in the blacklist, the noise disappears. https://bugs.launchpad.net/bugs/1821663 Cc: Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index ece256a3b48f..2ec91085fa3e 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2142,6 +2142,8 @@ static struct snd_pci_quirk power_save_blacklist[] = { SND_PCI_QUIRK(0x8086, 0x2040, "Intel DZ77BH-55K", 0), /* https://bugzilla.kernel.org/show_bug.cgi?id=199607 */ SND_PCI_QUIRK(0x8086, 0x2057, "Intel NUC5i7RYB", 0), + /* https://bugs.launchpad.net/bugs/1821663 */ + SND_PCI_QUIRK(0x8086, 0x2064, "Intel SDP 8086:2064", 0), /* https://bugzilla.redhat.com/show_bug.cgi?id=1520902 */ SND_PCI_QUIRK(0x8086, 0x2068, "Intel NUC7i3BNB", 0), /* https://bugzilla.kernel.org/show_bug.cgi?id=198611 */ @@ -2150,6 +2152,8 @@ static struct snd_pci_quirk power_save_blacklist[] = { SND_PCI_QUIRK(0x17aa, 0x367b, "Lenovo IdeaCentre B550", 0), /* https://bugzilla.redhat.com/show_bug.cgi?id=1572975 */ SND_PCI_QUIRK(0x17aa, 0x36a7, "Lenovo C50 All in one", 0), + /* https://bugs.launchpad.net/bugs/1821663 */ + SND_PCI_QUIRK(0x1631, 0xe017, "Packard Bell NEC IMEDIA 5204", 0), {} }; #endif /* CONFIG_PM */ -- cgit v1.2.3 From 6a8aae68c87349dbbcd46eac380bc43cdb98a13b Mon Sep 17 00:00:00 2001 From: Longpeng Date: Sat, 9 Mar 2019 15:17:40 +0800 Subject: virtio_pci: fix a NULL pointer reference in vp_del_vqs If the msix_affinity_masks is alloced failed, then we'll try to free some resources in vp_free_vectors() that may access it directly. We met the following stack in our production: [ 29.296767] BUG: unable to handle kernel NULL pointer dereference at (null) [ 29.311151] IP: [] vp_free_vectors+0x6a/0x150 [virtio_pci] [ 29.324787] PGD 0 [ 29.333224] Oops: 0000 [#1] SMP [...] [ 29.425175] RIP: 0010:[] [] vp_free_vectors+0x6a/0x150 [virtio_pci] [ 29.441405] RSP: 0018:ffff9a55c2dcfa10 EFLAGS: 00010206 [ 29.453491] RAX: 0000000000000000 RBX: ffff9a55c322c400 RCX: 0000000000000000 [ 29.467488] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff9a55c322c400 [ 29.481461] RBP: ffff9a55c2dcfa20 R08: 0000000000000000 R09: ffffc1b6806ff020 [ 29.495427] R10: 0000000000000e95 R11: 0000000000aaaaaa R12: 0000000000000000 [ 29.509414] R13: 0000000000010000 R14: ffff9a55bd2d9e98 R15: ffff9a55c322c400 [ 29.523407] FS: 00007fdcba69f8c0(0000) GS:ffff9a55c2840000(0000) knlGS:0000000000000000 [ 29.538472] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 29.551621] CR2: 0000000000000000 CR3: 000000003ce52000 CR4: 00000000003607a0 [ 29.565886] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 29.580055] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 29.594122] Call Trace: [ 29.603446] [] vp_request_msix_vectors+0xe2/0x260 [virtio_pci] [ 29.618017] [] vp_try_to_find_vqs+0x95/0x3b0 [virtio_pci] [ 29.632152] [] vp_find_vqs+0x37/0xb0 [virtio_pci] [ 29.645582] [] init_vq+0x153/0x260 [virtio_blk] [ 29.658831] [] virtblk_probe+0xe8/0x87f [virtio_blk] [...] Cc: Gonglei Signed-off-by: Longpeng Signed-off-by: Michael S. Tsirkin Reviewed-by: Gonglei --- drivers/virtio/virtio_pci_common.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index d0584c040c60..7a0398bb84f7 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -255,9 +255,11 @@ void vp_del_vqs(struct virtio_device *vdev) for (i = 0; i < vp_dev->msix_used_vectors; ++i) free_irq(pci_irq_vector(vp_dev->pci_dev, i), vp_dev); - for (i = 0; i < vp_dev->msix_vectors; i++) - if (vp_dev->msix_affinity_masks[i]) - free_cpumask_var(vp_dev->msix_affinity_masks[i]); + if (vp_dev->msix_affinity_masks) { + for (i = 0; i < vp_dev->msix_vectors; i++) + if (vp_dev->msix_affinity_masks[i]) + free_cpumask_var(vp_dev->msix_affinity_masks[i]); + } if (vp_dev->msix_enabled) { /* Disable the vector used for configuration */ -- cgit v1.2.3 From 5e9642a2e14cd0f5be14186681f280979bb3f3cd Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 27 Mar 2019 11:37:26 -0400 Subject: MAiNTAINERS: add Paolo, Stefan for virtio blk/scsi Jason doesn't really have the time to review blk/scsi patches. Paolo and Setfan agreed to help out. Thanks guys! Signed-off-by: Michael S. Tsirkin --- MAINTAINERS | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2359e12e4c41..09c546312bc8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16509,7 +16509,7 @@ F: drivers/char/virtio_console.c F: include/linux/virtio_console.h F: include/uapi/linux/virtio_console.h -VIRTIO CORE, NET AND BLOCK DRIVERS +VIRTIO CORE AND NET DRIVERS M: "Michael S. Tsirkin" M: Jason Wang L: virtualization@lists.linux-foundation.org @@ -16524,6 +16524,19 @@ F: include/uapi/linux/virtio_*.h F: drivers/crypto/virtio/ F: mm/balloon_compaction.c +VIRTIO BLOCK AND SCSI DRIVERS +M: "Michael S. Tsirkin" +M: Jason Wang +R: Paolo Bonzini +R: Stefan Hajnoczi +L: virtualization@lists.linux-foundation.org +S: Maintained +F: drivers/block/virtio_blk.c +F: drivers/scsi/virtio_scsi.c +F: include/uapi/linux/virtio_blk.h +F: include/uapi/linux/virtio_scsi.h +F: drivers/vhost/scsi.c + VIRTIO CRYPTO DRIVER M: Gonglei L: virtualization@lists.linux-foundation.org -- cgit v1.2.3 From 1200e07f3ad4b9d976cf2fff3a0c3d9a1faecb3e Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 8 Apr 2019 19:02:38 +0800 Subject: block: don't use for-inside-for in bio_for_each_segment_all Commit 6dc4f100c175 ("block: allow bio_for_each_segment_all() to iterate over multi-page bvec") changes bio_for_each_segment_all() to use for-inside-for. This way breaks all bio_for_each_segment_all() call with error out branch via 'break', since now 'break' can only break from the inner loop. Fixes this issue by implementing bio_for_each_segment_all() via single 'for' loop, and now the logic is very similar with normal bvec iterator. Cc: Qu Wenruo Cc: linux-btrfs@vger.kernel.org Cc: linux-fsdevel@vger.kernel.org Cc: Omar Sandoval Reviewed-by: Johannes Thumshirn Reported-and-Tested-by: Qu Wenruo Fixes: 6dc4f100c175 ("block: allow bio_for_each_segment_all() to iterate over multi-page bvec") Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/bio.h | 20 ++++++++++++-------- include/linux/bvec.h | 14 ++++++++++---- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/include/linux/bio.h b/include/linux/bio.h index bb6090aa165d..e584673c1881 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -120,19 +120,23 @@ static inline bool bio_full(struct bio *bio) return bio->bi_vcnt >= bio->bi_max_vecs; } -#define mp_bvec_for_each_segment(bv, bvl, i, iter_all) \ - for (bv = bvec_init_iter_all(&iter_all); \ - (iter_all.done < (bvl)->bv_len) && \ - (mp_bvec_next_segment((bvl), &iter_all), 1); \ - iter_all.done += bv->bv_len, i += 1) +static inline bool bio_next_segment(const struct bio *bio, + struct bvec_iter_all *iter) +{ + if (iter->idx >= bio->bi_vcnt) + return false; + + bvec_advance(&bio->bi_io_vec[iter->idx], iter); + return true; +} /* * drivers should _never_ use the all version - the bio may have been split * before it got to the driver and the driver won't own all of it */ -#define bio_for_each_segment_all(bvl, bio, i, iter_all) \ - for (i = 0, iter_all.idx = 0; iter_all.idx < (bio)->bi_vcnt; iter_all.idx++) \ - mp_bvec_for_each_segment(bvl, &((bio)->bi_io_vec[iter_all.idx]), i, iter_all) +#define bio_for_each_segment_all(bvl, bio, i, iter) \ + for (i = 0, bvl = bvec_init_iter_all(&iter); \ + bio_next_segment((bio), &iter); i++) static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter, unsigned bytes) diff --git a/include/linux/bvec.h b/include/linux/bvec.h index f6275c4da13a..3bc91879e1e2 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -145,18 +145,18 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv, static inline struct bio_vec *bvec_init_iter_all(struct bvec_iter_all *iter_all) { - iter_all->bv.bv_page = NULL; iter_all->done = 0; + iter_all->idx = 0; return &iter_all->bv; } -static inline void mp_bvec_next_segment(const struct bio_vec *bvec, - struct bvec_iter_all *iter_all) +static inline void bvec_advance(const struct bio_vec *bvec, + struct bvec_iter_all *iter_all) { struct bio_vec *bv = &iter_all->bv; - if (bv->bv_page) { + if (iter_all->done) { bv->bv_page = nth_page(bv->bv_page, 1); bv->bv_offset = 0; } else { @@ -165,6 +165,12 @@ static inline void mp_bvec_next_segment(const struct bio_vec *bvec, } bv->bv_len = min_t(unsigned int, PAGE_SIZE - bv->bv_offset, bvec->bv_len - iter_all->done); + iter_all->done += bv->bv_len; + + if (iter_all->done == bvec->bv_len) { + iter_all->idx++; + iter_all->done = 0; + } } /* -- cgit v1.2.3 From bd13b2b874eceb4677cd26eebdc5f45cc52fa400 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Fri, 1 Feb 2019 09:36:59 -0500 Subject: drm/amd/display: Fix negative cursor pos programming (v2) [Why] If the cursor pos passed from DM is less than the plane_state->dst_rect top left corner then the unsigned cursor pos wraps around to a large positive number since cursor pos is a u32. There was an attempt to guard against this in hubp1_cursor_set_position by checking the src_x_offset and src_y_offset and offseting the cursor hotspot within hubp1_cursor_set_position. However, the cursor position itself is still being programmed incorrectly as a large value. This manifests itself visually as the cursor disappearing or containing strange artifacts near the middle of the screen on raven. [How] Don't subtract the destination rect top left corner from the pos but add it to the hotspot instead. This happens before the pos gets passed into hubp1_cursor_set_position. This achieves the same result but avoids the subtraction wrap around. With this fix the original cursor programming logic can be used again. v2: add hunk that got dropped accidently when this patch was originally committed. (Alex) Fixes: 0921c41e1902831 ("drm/amd/display: Fix negative cursor pos programming") Signed-off-by: Nicholas Kazlauskas Reviewed-by: Charlene Liu Acked-by: Leo Li Acked-by: Murton Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c index 683829466a44..0ba68d41b9c3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c @@ -1150,28 +1150,9 @@ void hubp1_cursor_set_position( REG_UPDATE(CURSOR_CONTROL, CURSOR_ENABLE, cur_en); - //account for cases where we see negative offset relative to overlay plane - if (src_x_offset < 0 && src_y_offset < 0) { - REG_SET_2(CURSOR_POSITION, 0, - CURSOR_X_POSITION, 0, - CURSOR_Y_POSITION, 0); - x_hotspot -= src_x_offset; - y_hotspot -= src_y_offset; - } else if (src_x_offset < 0) { - REG_SET_2(CURSOR_POSITION, 0, - CURSOR_X_POSITION, 0, - CURSOR_Y_POSITION, pos->y); - x_hotspot -= src_x_offset; - } else if (src_y_offset < 0) { - REG_SET_2(CURSOR_POSITION, 0, + REG_SET_2(CURSOR_POSITION, 0, CURSOR_X_POSITION, pos->x, - CURSOR_Y_POSITION, 0); - y_hotspot -= src_y_offset; - } else { - REG_SET_2(CURSOR_POSITION, 0, - CURSOR_X_POSITION, pos->x, - CURSOR_Y_POSITION, pos->y); - } + CURSOR_Y_POSITION, pos->y); REG_SET_2(CURSOR_HOT_SPOT, 0, CURSOR_HOT_SPOT_X, x_hotspot, -- cgit v1.2.3 From e1b7058ece718c0350ad2e5bfbdab17885bd9f39 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 5 Apr 2019 17:13:49 +0300 Subject: drm/i915: Fix pipe_bpp readout for BXT/GLK DSI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The only bpc information in pipe registers for BXT/GLK DSI is the PIPEMISC dither bpc. Let's try to use that to read out pipe_bpp on these platforms. However, I'm not sure if this will be correctly populated by the GOP since bspec suggests it's only needed if dithering is actually enabled. If not I guess we'll have to go one step further and extract pipe_bpp from the DSI pixel format when dithering is disabled. Cc: Hans de Goede Fixes: ca0b04db14a5 ("drm/i915/dsi: Fix pipe_bpp for handling for 6 bpc pixel-formats") References: https://bugs.freedesktop.org/show_bug.cgi?id=109516 Signed-off-by: Ville Syrjälä Reviewed-by: Imre Deak Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190405141349.11950-1-ville.syrjala@linux.intel.com (cherry picked from commit 499653501baf27d26e73cb5ce744869df3400509) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/vlv_dsi.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/gpu/drm/i915/vlv_dsi.c b/drivers/gpu/drm/i915/vlv_dsi.c index 6403728fe778..31c93c3ccd00 100644 --- a/drivers/gpu/drm/i915/vlv_dsi.c +++ b/drivers/gpu/drm/i915/vlv_dsi.c @@ -256,6 +256,28 @@ static void band_gap_reset(struct drm_i915_private *dev_priv) mutex_unlock(&dev_priv->sb_lock); } +static int bdw_get_pipemisc_bpp(struct intel_crtc *crtc) +{ + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + u32 tmp; + + tmp = I915_READ(PIPEMISC(crtc->pipe)); + + switch (tmp & PIPEMISC_DITHER_BPC_MASK) { + case PIPEMISC_DITHER_6_BPC: + return 18; + case PIPEMISC_DITHER_8_BPC: + return 24; + case PIPEMISC_DITHER_10_BPC: + return 30; + case PIPEMISC_DITHER_12_BPC: + return 36; + default: + MISSING_CASE(tmp); + return 0; + } +} + static int intel_dsi_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) @@ -1071,6 +1093,8 @@ static void bxt_dsi_get_pipe_config(struct intel_encoder *encoder, bpp = mipi_dsi_pixel_format_to_bpp( pixel_format_from_register_bits(fmt)); + pipe_config->pipe_bpp = bdw_get_pipemisc_bpp(crtc); + /* Enable Frame time stamo based scanline reporting */ adjusted_mode->private_flags |= I915_MODE_FLAG_GET_SCANLINE_FROM_TIMESTAMP; -- cgit v1.2.3 From 55c0c4c793b538fb438bcc72481b9dc2f79fe5a9 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Mon, 8 Apr 2019 16:04:38 +0300 Subject: ARC: memset: fix build with L1_CACHE_SHIFT != 6 In case of 'L1_CACHE_SHIFT != 6' we define dummy assembly macroses PREALLOC_INSTR and PREFETCHW_INSTR without arguments. However we pass arguments to them in code which cause build errors. Fix that. Signed-off-by: Eugeniy Paltsev Cc: [5.0] Signed-off-by: Vineet Gupta --- arch/arc/lib/memset-archs.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arc/lib/memset-archs.S b/arch/arc/lib/memset-archs.S index f230bb7092fd..b3373f5c88e0 100644 --- a/arch/arc/lib/memset-archs.S +++ b/arch/arc/lib/memset-archs.S @@ -30,10 +30,10 @@ #else -.macro PREALLOC_INSTR +.macro PREALLOC_INSTR reg, off .endm -.macro PREFETCHW_INSTR +.macro PREFETCHW_INSTR reg, off .endm #endif -- cgit v1.2.3 From 5a3ae7b314a2259b1188b22b392f5eba01e443ee Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 7 Apr 2019 21:06:16 +0200 Subject: arm64/ftrace: fix inadvertent BUG() in trampoline check The ftrace trampoline code (which deals with modules loaded out of BL range of the core kernel) uses plt_entries_equal() to check whether the per-module trampoline equals a zero buffer, to decide whether the trampoline has already been initialized. This triggers a BUG() in the opcode manipulation code, since we end up checking the ADRP offset of a 0x0 opcode, which is not an ADRP instruction. So instead, add a helper to check whether a PLT is initialized, and call that from the frace code. Cc: # v5.0 Fixes: bdb85cd1d206 ("arm64/module: switch to ADRP/ADD sequences for PLT entries") Acked-by: Mark Rutland Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/include/asm/module.h | 5 +++++ arch/arm64/kernel/ftrace.c | 3 +-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 905e1bb0e7bd..cd9f4e9d04d3 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -73,4 +73,9 @@ static inline bool is_forbidden_offset_for_adrp(void *place) struct plt_entry get_plt_entry(u64 dst, void *pc); bool plt_entries_equal(const struct plt_entry *a, const struct plt_entry *b); +static inline bool plt_entry_is_initialized(const struct plt_entry *e) +{ + return e->adrp || e->add || e->br; +} + #endif /* __ASM_MODULE_H */ diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 8e4431a8821f..07b298120182 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -107,8 +107,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) trampoline = get_plt_entry(addr, mod->arch.ftrace_trampoline); if (!plt_entries_equal(mod->arch.ftrace_trampoline, &trampoline)) { - if (!plt_entries_equal(mod->arch.ftrace_trampoline, - &(struct plt_entry){})) { + if (plt_entry_is_initialized(mod->arch.ftrace_trampoline)) { pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n"); return -EINVAL; } -- cgit v1.2.3 From ea7a5c706fa49273cf6d1d9def053ecb50db2076 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 3 Apr 2019 16:52:54 +0300 Subject: RDMA/vmw_pvrdma: Fix memory leak on pvrdma_pci_remove Make sure to free the DSR on pvrdma_pci_remove() to avoid the memory leak. Fixes: 29c8d9eba550 ("IB: Add vmw_pvrdma driver") Signed-off-by: Kamal Heib Acked-by: Adit Ranadive Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 6d8b3e0de57a..ec41400fec0c 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -1131,6 +1131,8 @@ static void pvrdma_pci_remove(struct pci_dev *pdev) pvrdma_page_dir_cleanup(dev, &dev->cq_pdir); pvrdma_page_dir_cleanup(dev, &dev->async_pdir); pvrdma_free_slots(dev); + dma_free_coherent(&pdev->dev, sizeof(*dev->dsr), dev->dsr, + dev->dsrbase); iounmap(dev->regs); kfree(dev->sgid_tbl); -- cgit v1.2.3 From 4772e03d239484f3461e33c79d721c8ea03f7416 Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Sun, 7 Apr 2019 13:23:38 +0800 Subject: RDMA/hns: Fix bug that caused srq creation to fail Due to the incorrect use of the seg and obj information, the position of the mtt is calculated incorrectly, and the free space of the page is not enough to store the entire mtt, resulting in access to the next page. This patch fixes this problem. Unable to handle kernel paging request at virtual address ffff00006e3cd000 ... Call trace: hns_roce_write_mtt+0x154/0x2f0 [hns_roce] hns_roce_buf_write_mtt+0xa8/0xd8 [hns_roce] hns_roce_create_srq+0x74c/0x808 [hns_roce] ib_create_srq+0x28/0xc8 Fixes: 0203b14c4f32 ("RDMA/hns: Unify the calculation for hem index in hip08") Signed-off-by: chenglang Signed-off-by: Lijun Ou Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hem.c | 6 ++++-- drivers/infiniband/hw/hns/hns_roce_mr.c | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index f1fec56f3ff4..8e29dbb5b5fb 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -792,6 +792,8 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev, idx_offset = (obj & (table->num_obj - 1)) % obj_per_chunk; dma_offset = offset = idx_offset * table->obj_size; } else { + u32 seg_size = 64; /* 8 bytes per BA and 8 BA per segment */ + hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, &mhop); /* mtt mhop */ i = mhop.l0_idx; @@ -803,8 +805,8 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev, hem_idx = i; hem = table->hem[hem_idx]; - dma_offset = offset = (obj & (table->num_obj - 1)) * - table->obj_size % mhop.bt_chunk_size; + dma_offset = offset = (obj & (table->num_obj - 1)) * seg_size % + mhop.bt_chunk_size; if (mhop.hop_num == 2) dma_offset = offset = 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index b09f1cde2ff5..08be0e4eabcd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -746,7 +746,6 @@ static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table; dma_addr_t dma_handle; __le64 *mtts; - u32 s = start_index * sizeof(u64); u32 bt_page_size; u32 i; @@ -780,7 +779,8 @@ static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev, return -EINVAL; mtts = hns_roce_table_find(hr_dev, table, - mtt->first_seg + s / hr_dev->caps.mtt_entry_sz, + mtt->first_seg + + start_index / HNS_ROCE_MTT_ENTRY_PER_SEG, &dma_handle); if (!mtts) return -ENOMEM; -- cgit v1.2.3 From 2170a0d53bee1a6c1a4ebd042f99d85aafc6c0ea Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 11 Mar 2019 12:47:14 -0700 Subject: tools/testing/nvdimm: Retain security state after overwrite Overwrite retains the security state after completion of operation. Fix nfit_test to reflect this so that the kernel can test the behavior it is more likely to see in practice. Fixes: 926f74802cb1 ("tools/testing/nvdimm: Add overwrite support for nfit_test") Signed-off-by: Dave Jiang Signed-off-by: Dan Williams --- tools/testing/nvdimm/test/nfit.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index cad719876ef4..85ffdcfa596b 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -146,6 +146,7 @@ static int dimm_fail_cmd_code[ARRAY_SIZE(handle)]; struct nfit_test_sec { u8 state; u8 ext_state; + u8 old_state; u8 passphrase[32]; u8 master_passphrase[32]; u64 overwrite_end_time; @@ -1100,7 +1101,7 @@ static int nd_intel_test_cmd_overwrite(struct nfit_test *t, return 0; } - memset(sec->passphrase, 0, ND_INTEL_PASSPHRASE_SIZE); + sec->old_state = sec->state; sec->state = ND_INTEL_SEC_STATE_OVERWRITE; dev_dbg(dev, "overwrite progressing.\n"); sec->overwrite_end_time = get_jiffies_64() + 5 * HZ; @@ -1122,7 +1123,8 @@ static int nd_intel_test_cmd_query_overwrite(struct nfit_test *t, if (time_is_before_jiffies64(sec->overwrite_end_time)) { sec->overwrite_end_time = 0; - sec->state = 0; + sec->state = sec->old_state; + sec->old_state = 0; sec->ext_state = ND_INTEL_SEC_ESTATE_ENABLED; dev_dbg(dev, "overwrite is complete\n"); } else -- cgit v1.2.3 From 00fb67ec6b98114a887d9ef26fc7c3e566e7f665 Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Sun, 7 Apr 2019 13:23:39 +0800 Subject: RDMA/hns: Bugfix for SCC hem free The method of hem free for SCC context is different from qp context. In the current version, if free SCC hem during the execution of qp free, there may be smmu error as below: arm-smmu-v3 arm-smmu-v3.1.auto: event 0x10 received: arm-smmu-v3 arm-smmu-v3.1.auto: 0x00007d0000000010 arm-smmu-v3 arm-smmu-v3.1.auto: 0x000012000000017c arm-smmu-v3 arm-smmu-v3.1.auto: 0x00000000000009e0 arm-smmu-v3 arm-smmu-v3.1.auto: 0x0000000000000000 As SCC context is still used by hardware after qp free, we can solve this problem by removing SCC hem free from hns_roce_qp_free. Fixes: 6a157f7d1b14 ("RDMA/hns: Add SCC context allocation support for hip08") Signed-off-by: Yangyang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_qp.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 57c76eafef2f..66cdf625534f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -274,9 +274,6 @@ void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) wait_for_completion(&hr_qp->free); if ((hr_qp->ibqp.qp_type) != IB_QPT_GSI) { - if (hr_dev->caps.sccc_entry_sz) - hns_roce_table_put(hr_dev, &qp_table->sccc_table, - hr_qp->qpn); if (hr_dev->caps.trrl_entry_sz) hns_roce_table_put(hr_dev, &qp_table->trrl_table, hr_qp->qpn); -- cgit v1.2.3 From e6abc8caa6deb14be2a206253f7e1c5e37e9515b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 5 Apr 2019 08:54:37 -0700 Subject: nfsd: Don't release the callback slot unless it was actually held If there are multiple callbacks queued, waiting for the callback slot when the callback gets shut down, then they all currently end up acting as if they hold the slot, and call nfsd4_cb_sequence_done() resulting in interesting side-effects. In addition, the 'retry_nowait' path in nfsd4_cb_sequence_done() causes a loop back to nfsd4_cb_prepare() without first freeing the slot, which causes a deadlock when nfsd41_cb_get_slot() gets called a second time. This patch therefore adds a boolean to track whether or not the callback did pick up the slot, so that it can do the right thing in these 2 cases. Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 8 +++++++- fs/nfsd/state.h | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index d219159b98af..7caa3801ce72 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -1010,8 +1010,9 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) cb->cb_seq_status = 1; cb->cb_status = 0; if (minorversion) { - if (!nfsd41_cb_get_slot(clp, task)) + if (!cb->cb_holds_slot && !nfsd41_cb_get_slot(clp, task)) return; + cb->cb_holds_slot = true; } rpc_call_start(task); } @@ -1038,6 +1039,9 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback return true; } + if (!cb->cb_holds_slot) + goto need_restart; + switch (cb->cb_seq_status) { case 0: /* @@ -1076,6 +1080,7 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback cb->cb_seq_status); } + cb->cb_holds_slot = false; clear_bit(0, &clp->cl_cb_slot_busy); rpc_wake_up_next(&clp->cl_cb_waitq); dprintk("%s: freed slot, new seqid=%d\n", __func__, @@ -1283,6 +1288,7 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, cb->cb_seq_status = 1; cb->cb_status = 0; cb->cb_need_restart = false; + cb->cb_holds_slot = false; } void nfsd4_run_cb(struct nfsd4_callback *cb) diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 396c76755b03..9d6cb246c6c5 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -70,6 +70,7 @@ struct nfsd4_callback { int cb_seq_status; int cb_status; bool cb_need_restart; + bool cb_holds_slot; }; struct nfsd4_callback_ops { -- cgit v1.2.3 From 704236672edacf353c362bab70c3d3eda7bb4a51 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 8 Apr 2019 10:48:50 -0600 Subject: tools/io_uring: remove IOCQE_FLAG_CACHEHIT This ended up not being included in the mainline version of io_uring, so drop it from the test app as well. Signed-off-by: Jens Axboe --- tools/io_uring/io_uring-bench.c | 32 ++++---------------------------- 1 file changed, 4 insertions(+), 28 deletions(-) diff --git a/tools/io_uring/io_uring-bench.c b/tools/io_uring/io_uring-bench.c index 512306a37531..0f257139b003 100644 --- a/tools/io_uring/io_uring-bench.c +++ b/tools/io_uring/io_uring-bench.c @@ -32,10 +32,6 @@ #include "liburing.h" #include "barrier.h" -#ifndef IOCQE_FLAG_CACHEHIT -#define IOCQE_FLAG_CACHEHIT (1U << 0) -#endif - #define min(a, b) ((a < b) ? (a) : (b)) struct io_sq_ring { @@ -85,7 +81,6 @@ struct submitter { unsigned long reaps; unsigned long done; unsigned long calls; - unsigned long cachehit, cachemiss; volatile int finish; __s32 *fds; @@ -270,10 +265,6 @@ static int reap_events(struct submitter *s) return -1; } } - if (cqe->flags & IOCQE_FLAG_CACHEHIT) - s->cachehit++; - else - s->cachemiss++; reaped++; head++; } while (1); @@ -489,7 +480,7 @@ static void file_depths(char *buf) int main(int argc, char *argv[]) { struct submitter *s = &submitters[0]; - unsigned long done, calls, reap, cache_hit, cache_miss; + unsigned long done, calls, reap; int err, i, flags, fd; char *fdepths; void *ret; @@ -569,44 +560,29 @@ int main(int argc, char *argv[]) pthread_create(&s->thread, NULL, submitter_fn, s); fdepths = malloc(8 * s->nr_files); - cache_hit = cache_miss = reap = calls = done = 0; + reap = calls = done = 0; do { unsigned long this_done = 0; unsigned long this_reap = 0; unsigned long this_call = 0; - unsigned long this_cache_hit = 0; - unsigned long this_cache_miss = 0; unsigned long rpc = 0, ipc = 0; - double hit = 0.0; sleep(1); this_done += s->done; this_call += s->calls; this_reap += s->reaps; - this_cache_hit += s->cachehit; - this_cache_miss += s->cachemiss; - if (this_cache_hit && this_cache_miss) { - unsigned long hits, total; - - hits = this_cache_hit - cache_hit; - total = hits + this_cache_miss - cache_miss; - hit = (double) hits / (double) total; - hit *= 100.0; - } if (this_call - calls) { rpc = (this_done - done) / (this_call - calls); ipc = (this_reap - reap) / (this_call - calls); } else rpc = ipc = -1; file_depths(fdepths); - printf("IOPS=%lu, IOS/call=%ld/%ld, inflight=%u (%s), Cachehit=%0.2f%%\n", + printf("IOPS=%lu, IOS/call=%ld/%ld, inflight=%u (%s)\n", this_done - done, rpc, ipc, s->inflight, - fdepths, hit); + fdepths); done = this_done; calls = this_call; reap = this_reap; - cache_hit = s->cachehit; - cache_miss = s->cachemiss; } while (!finish); pthread_join(s->thread, &ret); -- cgit v1.2.3 From 3ec482d15cb986bf08b923f9193eeddb3b9ca69f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 8 Apr 2019 10:51:01 -0600 Subject: io_uring: restrict IORING_SETUP_SQPOLL to root This options spawns a kernel side thread that will poll for submissions (and completions, if IORING_SETUP_IOPOLL is set). As this allows a user to potentially use more cycles outside of the normal hierarchy, restrict the use of this feature to root. Signed-off-by: Jens Axboe --- fs/io_uring.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 07d6ef195d05..89aa8412b5f5 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2245,6 +2245,10 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx, goto err; if (ctx->flags & IORING_SETUP_SQPOLL) { + ret = -EPERM; + if (!capable(CAP_SYS_ADMIN)) + goto err; + if (p->flags & IORING_SETUP_SQ_AFF) { int cpu; -- cgit v1.2.3 From 1e6f5440a6814d28c32d347f338bfef68bc3e69d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 8 Apr 2019 17:56:34 +0100 Subject: arm64: backtrace: Don't bother trying to unwind the userspace stack Calling dump_backtrace() with a pt_regs argument corresponding to userspace doesn't make any sense and our unwinder will simply print "Call trace:" before unwinding the stack looking for user frames. Rather than go through this song and dance, just return early if we're passed a user register state. Cc: Fixes: 1149aad10b1e ("arm64: Add dump_backtrace() in show_regs") Reported-by: Kefeng Wang Signed-off-by: Will Deacon --- arch/arm64/kernel/traps.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 8ad119c3f665..29755989f616 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -102,10 +102,16 @@ static void dump_instr(const char *lvl, struct pt_regs *regs) void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) { struct stackframe frame; - int skip; + int skip = 0; pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); + if (regs) { + if (user_mode(regs)) + return; + skip = 1; + } + if (!tsk) tsk = current; @@ -126,7 +132,6 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) frame.graph = 0; #endif - skip = !!regs; printk("Call trace:\n"); do { /* skip until specified stack frame */ @@ -176,15 +181,13 @@ static int __die(const char *str, int err, struct pt_regs *regs) return ret; print_modules(); - __show_regs(regs); pr_emerg("Process %.*s (pid: %d, stack limit = 0x%p)\n", TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), end_of_stack(tsk)); + show_regs(regs); - if (!user_mode(regs)) { - dump_backtrace(regs, tsk); + if (!user_mode(regs)) dump_instr(KERN_EMERG, regs); - } return ret; } -- cgit v1.2.3 From b1a6e8f9131381a92bfdacdf86ef80cca82f71d4 Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Mon, 8 Apr 2019 18:08:04 +0200 Subject: MAINTAINERS: ieee802154: update documentation file pattern When moving the documentation for the ieee802154 subsystem from plain text to rst the file pattern in the MAINTAINERS file got wrong. Updating it here to fix scripts using this file. Reported-by: Joe Perches Signed-off-by: Stefan Schmidt Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 6771bd784f5f..621622138860 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7515,7 +7515,7 @@ F: include/net/mac802154.h F: include/net/af_ieee802154.h F: include/net/cfg802154.h F: include/net/ieee802154_netdev.h -F: Documentation/networking/ieee802154.txt +F: Documentation/networking/ieee802154.rst IFE PROTOCOL M: Yotam Gigi -- cgit v1.2.3 From cf94db21905333e610e479688add629397a4b384 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Mon, 8 Apr 2019 14:33:22 +0200 Subject: virtio: Honour 'may_reduce_num' in vring_create_virtqueue vring_create_virtqueue() allows the caller to specify via the may_reduce_num parameter whether the vring code is allowed to allocate a smaller ring than specified. However, the split ring allocation code tries to allocate a smaller ring on allocation failure regardless of what the caller specified. This may cause trouble for e.g. virtio-pci in legacy mode, which does not support ring resizing. (The packed ring code does not resize in any case.) Let's fix this by bailing out immediately in the split ring code if the requested size cannot be allocated and may_reduce_num has not been specified. While at it, fix a typo in the usage instructions. Fixes: 2a2d1382fe9d ("virtio: Add improved queue allocation API") Cc: stable@vger.kernel.org # v4.6+ Signed-off-by: Cornelia Huck Signed-off-by: Michael S. Tsirkin Reviewed-by: Halil Pasic Reviewed-by: Jens Freimann --- drivers/virtio/virtio_ring.c | 2 ++ include/linux/virtio_ring.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 18846afb39da..5df92c308286 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -882,6 +882,8 @@ static struct virtqueue *vring_create_virtqueue_split( GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); if (queue) break; + if (!may_reduce_num) + return NULL; } if (!num) diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index fab02133a919..3dc70adfe5f5 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -63,7 +63,7 @@ struct virtqueue; /* * Creates a virtqueue and allocates the descriptor ring. If * may_reduce_num is set, then this may allocate a smaller ring than - * expected. The caller should query virtqueue_get_ring_size to learn + * expected. The caller should query virtqueue_get_vring_size to learn * the actual size of the ring. */ struct virtqueue *vring_create_virtqueue(unsigned int index, -- cgit v1.2.3 From b75bb8a5b755d0c7bf1ac071e4df2349a7644a1e Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 5 Apr 2019 20:46:46 +0200 Subject: r8169: disable ASPM again There's a significant number of reports that re-enabling ASPM causes different issues, ranging from decreased performance to system not booting at all. This affects only a minority of users, but the number of affected users is big enough that we better switch off ASPM again. This will hurt notebook users who are not affected by the issues, they may see decreased battery runtime w/o ASPM. With the PCI core folks is being discussed to add generic sysfs attributes to control ASPM. Once this is in place brave enough users can re-enable ASPM on their system. Fixes: a99790bf5c7f ("r8169: Reinstate ASPM Support") Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 19efa88f3f02..ed651dde6ef9 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -7352,6 +7353,11 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (rc) return rc; + /* Disable ASPM completely as that cause random device stop working + * problems as well as full system hangs for some PCIe devices users. + */ + pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1); + /* enable device (incl. PCI PM wakeup and hotplug setup) */ rc = pcim_enable_device(pdev); if (rc < 0) { -- cgit v1.2.3 From e891db1a18bf11e02533ec2386b796cfd8d60666 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Fri, 22 Mar 2019 12:51:20 +0200 Subject: tpm: turn on TPM on suspend for TPM 1.x tpm_chip_start/stop() should be also called for TPM 1.x devices on suspend. Add that functionality back. Do not lock the chip because it is unnecessary as there are no multiple threads using it when doing the suspend. Fixes: a3fbfae82b4c ("tpm: take TPM chip power gating out of tpm_transmit()") Reported-by: Paul Zimmerman Signed-off-by: Jarkko Sakkinen Tested-by: Domenico Andreoli Signed-off-by: James Morris --- drivers/char/tpm/tpm-interface.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index 83ece5639f86..ae1030c9b086 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -402,15 +402,13 @@ int tpm_pm_suspend(struct device *dev) if (chip->flags & TPM_CHIP_FLAG_ALWAYS_POWERED) return 0; - if (chip->flags & TPM_CHIP_FLAG_TPM2) { - mutex_lock(&chip->tpm_mutex); - if (!tpm_chip_start(chip)) { + if (!tpm_chip_start(chip)) { + if (chip->flags & TPM_CHIP_FLAG_TPM2) tpm2_shutdown(chip, TPM2_SU_STATE); - tpm_chip_stop(chip); - } - mutex_unlock(&chip->tpm_mutex); - } else { - rc = tpm1_pm_suspend(chip, tpm_suspend_pcr); + else + rc = tpm1_pm_suspend(chip, tpm_suspend_pcr); + + tpm_chip_stop(chip); } return rc; -- cgit v1.2.3 From 7110629263469b4664d00b38ef80a656eddf3637 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Wed, 27 Mar 2019 11:32:38 -0700 Subject: tpm: fix an invalid condition in tpm_common_poll MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The poll condition should only check response_length, because reads should only be issued if there is data to read. The response_read flag only prevents double writes. The problem was that the write set the response_read to false, enqued a tpm job, and returned. Then application called poll which checked the response_read flag and returned EPOLLIN. Then the application called read, but got nothing. After all that the async_work kicked in. Added also mutex_lock around the poll check to prevent other possible race conditions. Fixes: 9488585b21bef0df12 ("tpm: add support for partial reads") Reported-by: Mantas Mikulėnas Tested-by: Mantas Mikulėnas Signed-off-by: Tadeusz Struk Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: James Morris --- drivers/char/tpm/tpm-dev-common.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/char/tpm/tpm-dev-common.c b/drivers/char/tpm/tpm-dev-common.c index 8856cce5a23b..817ae09a369e 100644 --- a/drivers/char/tpm/tpm-dev-common.c +++ b/drivers/char/tpm/tpm-dev-common.c @@ -233,12 +233,19 @@ __poll_t tpm_common_poll(struct file *file, poll_table *wait) __poll_t mask = 0; poll_wait(file, &priv->async_wait, wait); + mutex_lock(&priv->buffer_mutex); - if (!priv->response_read || priv->response_length) + /* + * The response_length indicates if there is still response + * (or part of it) to be consumed. Partial reads decrease it + * by the number of bytes read, and write resets it the zero. + */ + if (priv->response_length) mask = EPOLLIN | EPOLLRDNORM; else mask = EPOLLOUT | EPOLLWRNORM; + mutex_unlock(&priv->buffer_mutex); return mask; } -- cgit v1.2.3 From c78719203fc629421a0d91d3d22240c36ae0119c Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Mon, 25 Mar 2019 16:43:10 +0200 Subject: KEYS: trusted: allow trusted.ko to initialize w/o a TPM Allow trusted.ko to initialize w/o a TPM. This commit also adds checks to the exported functions to fail when a TPM is not available. Fixes: 240730437deb ("KEYS: trusted: explicitly use tpm_chip structure...") Cc: James Morris Reported-by: Dan Williams Tested-by: Dan Williams Signed-off-by: Jarkko Sakkinen Signed-off-by: James Morris --- security/keys/trusted.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/security/keys/trusted.c b/security/keys/trusted.c index bcc9c6ead7fd..a284d790de8c 100644 --- a/security/keys/trusted.c +++ b/security/keys/trusted.c @@ -135,6 +135,9 @@ int TSS_authhmac(unsigned char *digest, const unsigned char *key, int ret; va_list argp; + if (!chip) + return -ENODEV; + sdesc = init_sdesc(hashalg); if (IS_ERR(sdesc)) { pr_info("trusted_key: can't alloc %s\n", hash_alg); @@ -196,6 +199,9 @@ int TSS_checkhmac1(unsigned char *buffer, va_list argp; int ret; + if (!chip) + return -ENODEV; + bufsize = LOAD32(buffer, TPM_SIZE_OFFSET); tag = LOAD16(buffer, 0); ordinal = command; @@ -363,6 +369,9 @@ int trusted_tpm_send(unsigned char *cmd, size_t buflen) { int rc; + if (!chip) + return -ENODEV; + dump_tpm_buf(cmd); rc = tpm_send(chip, cmd, buflen); dump_tpm_buf(cmd); @@ -429,6 +438,9 @@ int oiap(struct tpm_buf *tb, uint32_t *handle, unsigned char *nonce) { int ret; + if (!chip) + return -ENODEV; + INIT_BUF(tb); store16(tb, TPM_TAG_RQU_COMMAND); store32(tb, TPM_OIAP_SIZE); @@ -1245,9 +1257,13 @@ static int __init init_trusted(void) { int ret; + /* encrypted_keys.ko depends on successful load of this module even if + * TPM is not used. + */ chip = tpm_default_chip(); if (!chip) - return -ENOENT; + return 0; + ret = init_digests(); if (ret < 0) goto err_put; @@ -1269,10 +1285,12 @@ err_put: static void __exit cleanup_trusted(void) { - put_device(&chip->dev); - kfree(digests); - trusted_shash_release(); - unregister_key_type(&key_type_trusted); + if (chip) { + put_device(&chip->dev); + kfree(digests); + trusted_shash_release(); + unregister_key_type(&key_type_trusted); + } } late_initcall(init_trusted); -- cgit v1.2.3 From b9d0a85d6b2e76630cfd4c475ee3af4109bfd87a Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Wed, 20 Feb 2019 16:25:38 +0800 Subject: tpm: Fix the type of the return value in calc_tpm2_event_size() calc_tpm2_event_size() has an invalid signature because it returns a 'size_t' where as its signature says that it returns 'int'. Cc: Fixes: 4d23cc323cdb ("tpm: add securityfs support for TPM 2.0 firmware event log") Suggested-by: Jarkko Sakkinen Signed-off-by: Yue Haibing Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: James Morris --- drivers/char/tpm/eventlog/tpm2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/tpm/eventlog/tpm2.c b/drivers/char/tpm/eventlog/tpm2.c index d8b77133a83a..f824563fc28d 100644 --- a/drivers/char/tpm/eventlog/tpm2.c +++ b/drivers/char/tpm/eventlog/tpm2.c @@ -37,8 +37,8 @@ * * Returns size of the event. If it is an invalid event, returns 0. */ -static int calc_tpm2_event_size(struct tcg_pcr_event2_head *event, - struct tcg_pcr_event *event_header) +static size_t calc_tpm2_event_size(struct tcg_pcr_event2_head *event, + struct tcg_pcr_event *event_header) { struct tcg_efi_specid_event_head *efispecid; struct tcg_event_field *event_field; -- cgit v1.2.3 From be24b37e22c20cbaa891971616784dd0f35211e8 Mon Sep 17 00:00:00 2001 From: "ndesaulniers@google.com" Date: Mon, 22 Oct 2018 16:43:57 -0700 Subject: KEYS: trusted: fix -Wvarags warning Fixes the warning reported by Clang: security/keys/trusted.c:146:17: warning: passing an object that undergoes default argument promotion to 'va_start' has undefined behavior [-Wvarargs] va_start(argp, h3); ^ security/keys/trusted.c:126:37: note: parameter of type 'unsigned char' is declared here unsigned char *h2, unsigned char h3, ...) ^ Specifically, it seems that both the C90 (4.8.1.1) and C11 (7.16.1.4) standards explicitly call this out as undefined behavior: The parameter parmN is the identifier of the rightmost parameter in the variable parameter list in the function definition (the one just before the ...). If the parameter parmN is declared with ... or with a type that is not compatible with the type that results after application of the default argument promotions, the behavior is undefined. Link: https://github.com/ClangBuiltLinux/linux/issues/41 Link: https://www.eskimo.com/~scs/cclass/int/sx11c.html Suggested-by: David Laight Suggested-by: Denis Kenzior Suggested-by: James Bottomley Suggested-by: Nathan Chancellor Signed-off-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: James Morris --- include/keys/trusted.h | 2 +- security/keys/trusted.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/keys/trusted.h b/include/keys/trusted.h index adbcb6817826..0071298b9b28 100644 --- a/include/keys/trusted.h +++ b/include/keys/trusted.h @@ -38,7 +38,7 @@ enum { int TSS_authhmac(unsigned char *digest, const unsigned char *key, unsigned int keylen, unsigned char *h1, - unsigned char *h2, unsigned char h3, ...); + unsigned char *h2, unsigned int h3, ...); int TSS_checkhmac1(unsigned char *buffer, const uint32_t command, const unsigned char *ononce, diff --git a/security/keys/trusted.c b/security/keys/trusted.c index a284d790de8c..efdbf17f3915 100644 --- a/security/keys/trusted.c +++ b/security/keys/trusted.c @@ -125,7 +125,7 @@ out: */ int TSS_authhmac(unsigned char *digest, const unsigned char *key, unsigned int keylen, unsigned char *h1, - unsigned char *h2, unsigned char h3, ...) + unsigned char *h2, unsigned int h3, ...) { unsigned char paramdigest[SHA1_DIGEST_SIZE]; struct sdesc *sdesc; @@ -144,7 +144,7 @@ int TSS_authhmac(unsigned char *digest, const unsigned char *key, return PTR_ERR(sdesc); } - c = h3; + c = !!h3; ret = crypto_shash_init(&sdesc->shash); if (ret < 0) goto out; -- cgit v1.2.3 From f1a0ba6cccff75d882204cae1f154f17620b3c4a Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Tue, 12 Feb 2019 15:42:10 -0800 Subject: selftests/tpm2: Extend tests to cover partial reads Three new tests added: 1. Send get random cmd, read header in 1st read, read the rest in second read - expect success 2. Send get random cmd, read only part of the response, send another get random command, read the response - expect success 3. Send get random cmd followed by another get random cmd, without reading the first response - expect the second cmd to fail with -EBUSY Signed-off-by: Tadeusz Struk Reviewed-by: Jarkko Sakkinen Tested-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: James Morris --- tools/testing/selftests/tpm2/tpm2.py | 1 + tools/testing/selftests/tpm2/tpm2_tests.py | 63 ++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/tools/testing/selftests/tpm2/tpm2.py b/tools/testing/selftests/tpm2/tpm2.py index 40ea95ce2ead..6fc99ce025b5 100644 --- a/tools/testing/selftests/tpm2/tpm2.py +++ b/tools/testing/selftests/tpm2/tpm2.py @@ -22,6 +22,7 @@ TPM2_CC_UNSEAL = 0x015E TPM2_CC_FLUSH_CONTEXT = 0x0165 TPM2_CC_START_AUTH_SESSION = 0x0176 TPM2_CC_GET_CAPABILITY = 0x017A +TPM2_CC_GET_RANDOM = 0x017B TPM2_CC_PCR_READ = 0x017E TPM2_CC_POLICY_PCR = 0x017F TPM2_CC_PCR_EXTEND = 0x0182 diff --git a/tools/testing/selftests/tpm2/tpm2_tests.py b/tools/testing/selftests/tpm2/tpm2_tests.py index 3bb066fea4a0..d4973be53493 100644 --- a/tools/testing/selftests/tpm2/tpm2_tests.py +++ b/tools/testing/selftests/tpm2/tpm2_tests.py @@ -158,6 +158,69 @@ class SmokeTest(unittest.TestCase): pass self.assertEqual(rejected, True) + def test_read_partial_resp(self): + try: + fmt = '>HIIH' + cmd = struct.pack(fmt, + tpm2.TPM2_ST_NO_SESSIONS, + struct.calcsize(fmt), + tpm2.TPM2_CC_GET_RANDOM, + 0x20) + self.client.tpm.write(cmd) + hdr = self.client.tpm.read(10) + sz = struct.unpack('>I', hdr[2:6])[0] + rsp = self.client.tpm.read() + except: + pass + self.assertEqual(sz, 10 + 2 + 32) + self.assertEqual(len(rsp), 2 + 32) + + def test_read_partial_overwrite(self): + try: + fmt = '>HIIH' + cmd = struct.pack(fmt, + tpm2.TPM2_ST_NO_SESSIONS, + struct.calcsize(fmt), + tpm2.TPM2_CC_GET_RANDOM, + 0x20) + self.client.tpm.write(cmd) + # Read part of the respone + rsp1 = self.client.tpm.read(15) + + # Send a new cmd + self.client.tpm.write(cmd) + + # Read the whole respone + rsp2 = self.client.tpm.read() + except: + pass + self.assertEqual(len(rsp1), 15) + self.assertEqual(len(rsp2), 10 + 2 + 32) + + def test_send_two_cmds(self): + rejected = False + try: + fmt = '>HIIH' + cmd = struct.pack(fmt, + tpm2.TPM2_ST_NO_SESSIONS, + struct.calcsize(fmt), + tpm2.TPM2_CC_GET_RANDOM, + 0x20) + self.client.tpm.write(cmd) + + # expect the second one to raise -EBUSY error + self.client.tpm.write(cmd) + rsp = self.client.tpm.read() + + except IOError, e: + # read the response + rsp = self.client.tpm.read() + rejected = True + pass + except: + pass + self.assertEqual(rejected, True) + class SpaceTest(unittest.TestCase): def setUp(self): logging.basicConfig(filename='SpaceTest.log', level=logging.DEBUG) -- cgit v1.2.3 From 6da70580af9612accf042b37564d73e787af39b4 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Tue, 12 Feb 2019 15:42:05 -0800 Subject: selftests/tpm2: Open tpm dev in unbuffered mode In order to have control over how many bytes are read or written the device needs to be opened in unbuffered mode. Signed-off-by: Tadeusz Struk Reviewed-by: Jarkko Sakkinen Tested-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: James Morris --- tools/testing/selftests/tpm2/tpm2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/tpm2/tpm2.py b/tools/testing/selftests/tpm2/tpm2.py index 6fc99ce025b5..828c18584624 100644 --- a/tools/testing/selftests/tpm2/tpm2.py +++ b/tools/testing/selftests/tpm2/tpm2.py @@ -358,9 +358,9 @@ class Client: self.flags = flags if (self.flags & Client.FLAG_SPACE) == 0: - self.tpm = open('/dev/tpm0', 'r+b') + self.tpm = open('/dev/tpm0', 'r+b', buffering=0) else: - self.tpm = open('/dev/tpmrm0', 'r+b') + self.tpm = open('/dev/tpmrm0', 'r+b', buffering=0) def close(self): self.tpm.close() -- cgit v1.2.3 From 492b67e28ee5f2a2522fb72e3d3bcb990e461514 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sat, 6 Apr 2019 17:16:52 +0200 Subject: net: ip_gre: fix possible use-after-free in erspan_rcv erspan tunnels run __iptunnel_pull_header on received skbs to remove gre and erspan headers. This can determine a possible use-after-free accessing pkt_md pointer in erspan_rcv since the packet will be 'uncloned' running pskb_expand_head if it is a cloned gso skb (e.g if the packet has been sent though a veth device). Fix it resetting pkt_md pointer after __iptunnel_pull_header Fixes: 1d7e2ed22f8d ("net: erspan: refactor existing erspan code") Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index fd219f7bd3ea..4b0526441476 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -259,7 +259,6 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, struct net *net = dev_net(skb->dev); struct metadata_dst *tun_dst = NULL; struct erspan_base_hdr *ershdr; - struct erspan_metadata *pkt_md; struct ip_tunnel_net *itn; struct ip_tunnel *tunnel; const struct iphdr *iph; @@ -282,9 +281,6 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, if (unlikely(!pskb_may_pull(skb, len))) return PACKET_REJECT; - ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len); - pkt_md = (struct erspan_metadata *)(ershdr + 1); - if (__iptunnel_pull_header(skb, len, htons(ETH_P_TEB), @@ -292,8 +288,9 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, goto drop; if (tunnel->collect_md) { + struct erspan_metadata *pkt_md, *md; struct ip_tunnel_info *info; - struct erspan_metadata *md; + unsigned char *gh; __be64 tun_id; __be16 flags; @@ -306,6 +303,14 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, if (!tun_dst) return PACKET_REJECT; + /* skb can be uncloned in __iptunnel_pull_header, so + * old pkt_md is no longer valid and we need to reset + * it + */ + gh = skb_network_header(skb) + + skb_network_header_len(skb); + pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len + + sizeof(*ershdr)); md = ip_tunnel_info_opts(&tun_dst->u.tun_info); md->version = ver; md2 = &md->u.md2; -- cgit v1.2.3 From 2a3cabae4536edbcb21d344e7aa8be7a584d2afb Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sat, 6 Apr 2019 17:16:53 +0200 Subject: net: ip6_gre: fix possible use-after-free in ip6erspan_rcv erspan_v6 tunnels run __iptunnel_pull_header on received skbs to remove erspan header. This can determine a possible use-after-free accessing pkt_md pointer in ip6erspan_rcv since the packet will be 'uncloned' running pskb_expand_head if it is a cloned gso skb (e.g if the packet has been sent though a veth device). Fix it resetting pkt_md pointer after __iptunnel_pull_header Fixes: 1d7e2ed22f8d ("net: erspan: refactor existing erspan code") Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index b32c95f02128..655e46b227f9 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -525,10 +525,10 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) } static int ip6erspan_rcv(struct sk_buff *skb, - struct tnl_ptk_info *tpi) + struct tnl_ptk_info *tpi, + int gre_hdr_len) { struct erspan_base_hdr *ershdr; - struct erspan_metadata *pkt_md; const struct ipv6hdr *ipv6h; struct erspan_md2 *md2; struct ip6_tnl *tunnel; @@ -547,18 +547,16 @@ static int ip6erspan_rcv(struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, len))) return PACKET_REJECT; - ershdr = (struct erspan_base_hdr *)skb->data; - pkt_md = (struct erspan_metadata *)(ershdr + 1); - if (__iptunnel_pull_header(skb, len, htons(ETH_P_TEB), false, false) < 0) return PACKET_REJECT; if (tunnel->parms.collect_md) { + struct erspan_metadata *pkt_md, *md; struct metadata_dst *tun_dst; struct ip_tunnel_info *info; - struct erspan_metadata *md; + unsigned char *gh; __be64 tun_id; __be16 flags; @@ -571,6 +569,14 @@ static int ip6erspan_rcv(struct sk_buff *skb, if (!tun_dst) return PACKET_REJECT; + /* skb can be uncloned in __iptunnel_pull_header, so + * old pkt_md is no longer valid and we need to reset + * it + */ + gh = skb_network_header(skb) + + skb_network_header_len(skb); + pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len + + sizeof(*ershdr)); info = &tun_dst->u.tun_info; md = ip_tunnel_info_opts(info); md->version = ver; @@ -607,7 +613,7 @@ static int gre_rcv(struct sk_buff *skb) if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) || tpi.proto == htons(ETH_P_ERSPAN2))) { - if (ip6erspan_rcv(skb, &tpi) == PACKET_RCVD) + if (ip6erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD) return 0; goto out; } -- cgit v1.2.3 From afe64245af9f58267e7fa8fb76ad5650ee7ec25f Mon Sep 17 00:00:00 2001 From: Michael Zhivich Date: Mon, 8 Apr 2019 10:48:45 -0400 Subject: ethtool: avoid signed-unsigned comparison in ethtool_validate_speed() When building C++ userspace code that includes ethtool.h with "-Werror -Wall", g++ complains about signed-unsigned comparison in ethtool_validate_speed() due to definition of SPEED_UNKNOWN as -1. Explicitly cast SPEED_UNKNOWN to __u32 to match type of ethtool_validate_speed() argument. Signed-off-by: Michael Zhivich Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 3652b239dad1..d473e5ed044c 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1591,7 +1591,7 @@ enum ethtool_link_mode_bit_indices { static inline int ethtool_validate_speed(__u32 speed) { - return speed <= INT_MAX || speed == SPEED_UNKNOWN; + return speed <= INT_MAX || speed == (__u32)SPEED_UNKNOWN; } /* Duplex, half or full. */ -- cgit v1.2.3 From caf2c5205d82ff0d758096a69a7e0466c38d4dbb Mon Sep 17 00:00:00 2001 From: Michael Zhivich Date: Mon, 8 Apr 2019 10:48:46 -0400 Subject: broadcom: tg3: fix use of SPEED_UNKNOWN ethtool constant tg3 driver uses u16 to store SPEED_UKNOWN ethtool constant, which is defined as -1, resulting in value truncation and thus incorrect test results against SPEED_UNKNOWN. For example, the following test will print "False": u16 speed = SPEED_UNKNOWN; if (speed == SPEED_UNKNOWN) printf("True"); else printf("False"); Change storage of speed to use u32 to avoid this issue. Signed-off-by: Michael Zhivich Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 8 ++++---- drivers/net/ethernet/broadcom/tg3.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 328373e0578f..060a6f386104 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -4283,7 +4283,7 @@ static void tg3_power_down(struct tg3 *tp) pci_set_power_state(tp->pdev, PCI_D3hot); } -static void tg3_aux_stat_to_speed_duplex(struct tg3 *tp, u32 val, u16 *speed, u8 *duplex) +static void tg3_aux_stat_to_speed_duplex(struct tg3 *tp, u32 val, u32 *speed, u8 *duplex) { switch (val & MII_TG3_AUX_STAT_SPDMASK) { case MII_TG3_AUX_STAT_10HALF: @@ -4787,7 +4787,7 @@ static int tg3_setup_copper_phy(struct tg3 *tp, bool force_reset) bool current_link_up; u32 bmsr, val; u32 lcl_adv, rmt_adv; - u16 current_speed; + u32 current_speed; u8 current_duplex; int i, err; @@ -5719,7 +5719,7 @@ out: static int tg3_setup_fiber_phy(struct tg3 *tp, bool force_reset) { u32 orig_pause_cfg; - u16 orig_active_speed; + u32 orig_active_speed; u8 orig_active_duplex; u32 mac_status; bool current_link_up; @@ -5823,7 +5823,7 @@ static int tg3_setup_fiber_mii_phy(struct tg3 *tp, bool force_reset) { int err = 0; u32 bmsr, bmcr; - u16 current_speed = SPEED_UNKNOWN; + u32 current_speed = SPEED_UNKNOWN; u8 current_duplex = DUPLEX_UNKNOWN; bool current_link_up = false; u32 local_adv, remote_adv, sgsr; diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h index a772a33b685c..6953d0546acb 100644 --- a/drivers/net/ethernet/broadcom/tg3.h +++ b/drivers/net/ethernet/broadcom/tg3.h @@ -2873,7 +2873,7 @@ struct tg3_tx_ring_info { struct tg3_link_config { /* Describes what we're trying to get. */ u32 advertising; - u16 speed; + u32 speed; u8 duplex; u8 autoneg; u8 flowctrl; @@ -2882,7 +2882,7 @@ struct tg3_link_config { u8 active_flowctrl; u8 active_duplex; - u16 active_speed; + u32 active_speed; u32 rmt_adv; }; -- cgit v1.2.3 From d63da85a4226c4b5a28536a1f48d89eefd50a832 Mon Sep 17 00:00:00 2001 From: Michael Zhivich Date: Mon, 8 Apr 2019 10:48:47 -0400 Subject: qlogic: qlcnic: fix use of SPEED_UNKNOWN ethtool constant qlcnic driver uses u16 to store SPEED_UKNOWN ethtool constant, which is defined as -1, resulting in value truncation and thus incorrect test results against SPEED_UNKNOWN. For example, the following test will print "False": u16 speed = SPEED_UNKNOWN; if (speed == SPEED_UNKNOWN) printf("True"); else printf("False"); Change storage of speed to use u32 to avoid this issue. Signed-off-by: Michael Zhivich Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h index 0c443ea98479..374a4d4371f9 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h @@ -497,7 +497,7 @@ struct qlcnic_hardware_context { u16 board_type; u16 supported_type; - u16 link_speed; + u32 link_speed; u16 link_duplex; u16 link_autoneg; u16 module_type; -- cgit v1.2.3 From a62520473f15750cd1432d36b377a06cd7cff8d2 Mon Sep 17 00:00:00 2001 From: Paul Thomas Date: Mon, 8 Apr 2019 15:37:54 -0400 Subject: net: macb driver, check for SKBTX_HW_TSTAMP Make sure SKBTX_HW_TSTAMP (i.e. SOF_TIMESTAMPING_TX_HARDWARE) has been enabled for this skb. It does fix the issue where normal socks that aren't expecting a timestamp will not wake up on select, but when a user does want a SOF_TIMESTAMPING_TX_HARDWARE it does work. Signed-off-by: Paul Thomas Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 1522aee81884..3da2795e2486 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -898,7 +898,9 @@ static void macb_tx_interrupt(struct macb_queue *queue) /* First, update TX stats if needed */ if (skb) { - if (gem_ptp_do_txstamp(queue, skb, desc) == 0) { + if (unlikely(skb_shinfo(skb)->tx_flags & + SKBTX_HW_TSTAMP) && + gem_ptp_do_txstamp(queue, skb, desc) == 0) { /* skb now belongs to timestamp buffer * and will be removed later */ -- cgit v1.2.3 From a1b0e4e684e9c300b9e759b46cb7a0147e61ddff Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 8 Apr 2019 17:39:54 -0400 Subject: bnxt_en: Improve RX consumer index validity check. There is logic to check that the RX/TPA consumer index is the expected index to work around a hardware problem. However, the potentially bad consumer index is first used to index into an array to reference an entry. This can potentially crash if the bad consumer index is beyond legal range. Improve the logic to use the consumer index for dereferencing after the validity check and log an error message. Fixes: fa7e28127a5a ("bnxt_en: Add workaround to detect bad opaque in rx completion (part 2)") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 0bb9d7b3a2b6..3df847b7079f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1133,6 +1133,8 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, tpa_info = &rxr->rx_tpa[agg_id]; if (unlikely(cons != rxr->rx_next_cons)) { + netdev_warn(bp->dev, "TPA cons %x != expected cons %x\n", + cons, rxr->rx_next_cons); bnxt_sched_reset(bp, rxr); return; } @@ -1585,15 +1587,17 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, } cons = rxcmp->rx_cmp_opaque; - rx_buf = &rxr->rx_buf_ring[cons]; - data = rx_buf->data; - data_ptr = rx_buf->data_ptr; if (unlikely(cons != rxr->rx_next_cons)) { int rc1 = bnxt_discard_rx(bp, cpr, raw_cons, rxcmp); + netdev_warn(bp->dev, "RX cons %x != expected cons %x\n", + cons, rxr->rx_next_cons); bnxt_sched_reset(bp, rxr); return rc1; } + rx_buf = &rxr->rx_buf_ring[cons]; + data = rx_buf->data; + data_ptr = rx_buf->data_ptr; prefetch(data_ptr); misc = le32_to_cpu(rxcmp->rx_cmp_misc_v1); -- cgit v1.2.3 From 8e44e96c6c8e8fb80b84a2ca11798a8554f710f2 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 8 Apr 2019 17:39:55 -0400 Subject: bnxt_en: Reset device on RX buffer errors. If the RX completion indicates RX buffers errors, the RX ring will be disabled by firmware and no packets will be received on that ring from that point on. Recover by resetting the device. Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 3df847b7079f..4c586ba4364b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1614,11 +1614,17 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, rx_buf->data = NULL; if (rxcmp1->rx_cmp_cfa_code_errors_v2 & RX_CMP_L2_ERRORS) { + u32 rx_err = le32_to_cpu(rxcmp1->rx_cmp_cfa_code_errors_v2); + bnxt_reuse_rx_data(rxr, cons, data); if (agg_bufs) bnxt_reuse_rx_agg_bufs(cpr, cp_cons, agg_bufs); rc = -EIO; + if (rx_err & RX_CMPL_ERRORS_BUFFER_ERROR_MASK) { + netdev_warn(bp->dev, "RX buffer error %x\n", rx_err); + bnxt_sched_reset(bp, rxr); + } goto next_rx; } -- cgit v1.2.3 From 5c2442fd78998af60e13aba506d103f7f43f8701 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Fri, 5 Apr 2019 20:39:13 +0530 Subject: scsi: csiostor: fix missing data copy in csio_scsi_err_handler() If scsi cmd sglist is not suitable for DDP then csiostor driver uses preallocated buffers for DDP, because of this data copy is required from DDP buffer to scsi cmd sglist before calling ->scsi_done(). Signed-off-by: Varun Prakash Signed-off-by: Martin K. Petersen --- drivers/scsi/csiostor/csio_scsi.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/csiostor/csio_scsi.c b/drivers/scsi/csiostor/csio_scsi.c index 462560b2855e..469d0bc9f5fe 100644 --- a/drivers/scsi/csiostor/csio_scsi.c +++ b/drivers/scsi/csiostor/csio_scsi.c @@ -1713,8 +1713,11 @@ csio_scsi_err_handler(struct csio_hw *hw, struct csio_ioreq *req) } out: - if (req->nsge > 0) + if (req->nsge > 0) { scsi_dma_unmap(cmnd); + if (req->dcopy && (host_status == DID_OK)) + host_status = csio_scsi_copy_to_sgl(hw, req); + } cmnd->result = (((host_status) << 16) | scsi_status); cmnd->scsi_done(cmnd); -- cgit v1.2.3 From bef42cb2029c0ec8cd8c9e0545589d81913dec4e Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Sun, 7 Apr 2019 15:46:55 +0300 Subject: drm/i915: Get power refs in encoder->get_power_domains() Push getting the reference for the encoders' power domains into the encoder get_power_domains() hook instead of doing this from the caller. This way the encoder can store away the corresponding wakerefs. This fixes the DSI encoder disabling, which didn't release these power references it acquired during HW state readout. Note that longtime ownership for the corresponding wakerefs can be thus acquired / released in two ways. Nevertheless there is always only one owner for them: After HW readout (booting/system resume): - encoder->get_power_domains() acquires - encoder->disable*() releases After a modeset (calling intel_atomic_commit()): - encoder->enable*() acquires - encoder->disable*() releases * can be any of the encoder enable/disable hooks. v2: - Check that the DSI io_wakerefs are unset both during encoder HW readout and enabling. (Chris) Fixes: 0e6e0be4c9523 ("drm/i915: Markup paired operations on display power domains") Cc: Vandita Kulkarni Cc: Chris Wilson Signed-off-by: Imre Deak Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190407124655.31536-1-imre.deak@intel.com (cherry picked from commit 3a52fb7e7953f0b13df8c05d0d74b56a66888f30) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/icl_dsi.c | 40 ++++++++++++++++++------------------ drivers/gpu/drm/i915/intel_ddi.c | 17 ++++++++------- drivers/gpu/drm/i915/intel_display.c | 6 +----- drivers/gpu/drm/i915/intel_drv.h | 10 +++++---- 4 files changed, 35 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/i915/icl_dsi.c b/drivers/gpu/drm/i915/icl_dsi.c index 73a7bee24a66..83cd8284e807 100644 --- a/drivers/gpu/drm/i915/icl_dsi.c +++ b/drivers/gpu/drm/i915/icl_dsi.c @@ -323,6 +323,21 @@ static void gen11_dsi_program_esc_clk_div(struct intel_encoder *encoder) } } +static void get_dsi_io_power_domains(struct drm_i915_private *dev_priv, + struct intel_dsi *intel_dsi) +{ + enum port port; + + for_each_dsi_port(port, intel_dsi->ports) { + WARN_ON(intel_dsi->io_wakeref[port]); + intel_dsi->io_wakeref[port] = + intel_display_power_get(dev_priv, + port == PORT_A ? + POWER_DOMAIN_PORT_DDI_A_IO : + POWER_DOMAIN_PORT_DDI_B_IO); + } +} + static void gen11_dsi_enable_io_power(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); @@ -336,13 +351,7 @@ static void gen11_dsi_enable_io_power(struct intel_encoder *encoder) I915_WRITE(ICL_DSI_IO_MODECTL(port), tmp); } - for_each_dsi_port(port, intel_dsi->ports) { - intel_dsi->io_wakeref[port] = - intel_display_power_get(dev_priv, - port == PORT_A ? - POWER_DOMAIN_PORT_DDI_A_IO : - POWER_DOMAIN_PORT_DDI_B_IO); - } + get_dsi_io_power_domains(dev_priv, intel_dsi); } static void gen11_dsi_power_up_lanes(struct intel_encoder *encoder) @@ -1218,20 +1227,11 @@ static int gen11_dsi_compute_config(struct intel_encoder *encoder, return 0; } -static u64 gen11_dsi_get_power_domains(struct intel_encoder *encoder, - struct intel_crtc_state *crtc_state) +static void gen11_dsi_get_power_domains(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state) { - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); - u64 domains = 0; - enum port port; - - for_each_dsi_port(port, intel_dsi->ports) - if (port == PORT_A) - domains |= BIT_ULL(POWER_DOMAIN_PORT_DDI_A_IO); - else - domains |= BIT_ULL(POWER_DOMAIN_PORT_DDI_B_IO); - - return domains; + get_dsi_io_power_domains(to_i915(encoder->base.dev), + enc_to_intel_dsi(&encoder->base)); } static bool gen11_dsi_get_hw_state(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 14d580cdefd3..deba8e90360a 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -2075,12 +2075,11 @@ intel_ddi_main_link_aux_domain(struct intel_digital_port *dig_port) intel_aux_power_domain(dig_port); } -static u64 intel_ddi_get_power_domains(struct intel_encoder *encoder, - struct intel_crtc_state *crtc_state) +static void intel_ddi_get_power_domains(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_digital_port *dig_port; - u64 domains; /* * TODO: Add support for MST encoders. Atm, the following should never @@ -2088,10 +2087,10 @@ static u64 intel_ddi_get_power_domains(struct intel_encoder *encoder, * hook. */ if (WARN_ON(intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST))) - return 0; + return; dig_port = enc_to_dig_port(&encoder->base); - domains = BIT_ULL(dig_port->ddi_io_power_domain); + intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); /* * AUX power is only needed for (e)DP mode, and for HDMI mode on TC @@ -2099,15 +2098,15 @@ static u64 intel_ddi_get_power_domains(struct intel_encoder *encoder, */ if (intel_crtc_has_dp_encoder(crtc_state) || intel_port_is_tc(dev_priv, encoder->port)) - domains |= BIT_ULL(intel_ddi_main_link_aux_domain(dig_port)); + intel_display_power_get(dev_priv, + intel_ddi_main_link_aux_domain(dig_port)); /* * VDSC power is needed when DSC is enabled */ if (crtc_state->dsc_params.compression_enable) - domains |= BIT_ULL(intel_dsc_power_domain(crtc_state)); - - return domains; + intel_display_power_get(dev_priv, + intel_dsc_power_domain(crtc_state)); } void intel_ddi_enable_pipe_clock(const struct intel_crtc_state *crtc_state) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ccb616351bba..421aac80a838 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -15986,8 +15986,6 @@ get_encoder_power_domains(struct drm_i915_private *dev_priv) struct intel_encoder *encoder; for_each_intel_encoder(&dev_priv->drm, encoder) { - u64 get_domains; - enum intel_display_power_domain domain; struct intel_crtc_state *crtc_state; if (!encoder->get_power_domains) @@ -16001,9 +15999,7 @@ get_encoder_power_domains(struct drm_i915_private *dev_priv) continue; crtc_state = to_intel_crtc_state(encoder->base.crtc->state); - get_domains = encoder->get_power_domains(encoder, crtc_state); - for_each_power_domain(domain, get_domains) - intel_display_power_get(dev_priv, domain); + encoder->get_power_domains(encoder, crtc_state); } } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 15db41394b9e..d5660ac1b0d6 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -270,10 +270,12 @@ struct intel_encoder { * be set correctly before calling this function. */ void (*get_config)(struct intel_encoder *, struct intel_crtc_state *pipe_config); - /* Returns a mask of power domains that need to be referenced as part - * of the hardware state readout code. */ - u64 (*get_power_domains)(struct intel_encoder *encoder, - struct intel_crtc_state *crtc_state); + /* + * Acquires the power domains needed for an active encoder during + * hardware state readout. + */ + void (*get_power_domains)(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state); /* * Called during system suspend after all pending requests for the * encoder are flushed (for example for DP AUX transactions) and -- cgit v1.2.3 From 2ae2c3316fb77dcf64275d011596b60104c45426 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Thu, 4 Apr 2019 00:04:09 +0800 Subject: drm/mediatek: fix possible object reference leak The call to of_parse_phandle returns a node pointer with refcount incremented thus it must be explicitly decremented after the last usage. Detected by coccinelle with the following warnings: drivers/gpu/drm/mediatek/mtk_hdmi.c:1521:2-8: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 1509, but without a corresponding object release within this function. drivers/gpu/drm/mediatek/mtk_hdmi.c:1524:1-7: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 1509, but without a corresponding object release within this function. Signed-off-by: Wen Yang Cc: CK Hu Cc: Philipp Zabel Cc: David Airlie Cc: Daniel Vetter Cc: Matthias Brugger Cc: dri-devel@lists.freedesktop.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mediatek@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_hdmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index 543a25e5765e..e04e6c293d39 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -1515,6 +1515,7 @@ static int mtk_hdmi_dt_parse_pdata(struct mtk_hdmi *hdmi, of_node_put(remote); hdmi->ddc_adpt = of_find_i2c_adapter_by_node(i2c_np); + of_node_put(i2c_np); if (!hdmi->ddc_adpt) { dev_err(dev, "Failed to get ddc i2c adapter by node\n"); return -EINVAL; -- cgit v1.2.3 From c5781ffbbd4f742a58263458145fe7f0ac01d9e0 Mon Sep 17 00:00:00 2001 From: Erik Schmauss Date: Mon, 8 Apr 2019 13:42:26 -0700 Subject: ACPICA: Namespace: remove address node from global list after method termination ACPICA commit b233720031a480abd438f2e9c643080929d144c3 ASL operation_regions declare a range of addresses that it uses. In a perfect world, the range of addresses should be used exclusively by the AML interpreter. The OS can use this information to decide which drivers to load so that the AML interpreter and device drivers use different regions of memory. During table load, the address information is added to a global address range list. Each node in this list contains an address range as well as a namespace node of the operation_region. This list is deleted at ACPI shutdown. Unfortunately, ASL operation_regions can be declared inside of control methods. Although this is not recommended, modern firmware contains such code. New module level code changes unintentionally removed the functionality of adding and removing nodes to the global address range list. A few months ago, support for adding addresses has been re- implemented. However, the removal of the address range list was missed and resulted in some systems to crash due to the address list containing bogus namespace nodes from operation_regions declared in control methods. In order to fix the crash, this change removes dynamic operation_regions after control method termination. Link: https://github.com/acpica/acpica/commit/b2337200 Link: https://bugzilla.kernel.org/show_bug.cgi?id=202475 Fixes: 4abb951b73ff ("ACPICA: AML interpreter: add region addresses in global list during initialization") Reported-by: Michael J Gruber Signed-off-by: Erik Schmauss Signed-off-by: Bob Moore Cc: 4.20+ # 4.20+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/nsobject.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/acpi/acpica/nsobject.c b/drivers/acpi/acpica/nsobject.c index 8638f43cfc3d..79d86da1c892 100644 --- a/drivers/acpi/acpica/nsobject.c +++ b/drivers/acpi/acpica/nsobject.c @@ -186,6 +186,10 @@ void acpi_ns_detach_object(struct acpi_namespace_node *node) } } + if (obj_desc->common.type == ACPI_TYPE_REGION) { + acpi_ut_remove_address_range(obj_desc->region.space_id, node); + } + /* Clear the Node entry in all cases */ node->object = NULL; -- cgit v1.2.3 From 0c24613cda163dedfa229afc8eff6072e57fac8d Mon Sep 17 00:00:00 2001 From: Wangyan Wang Date: Tue, 9 Apr 2019 14:53:04 +0800 Subject: drm/mediatek: fix the rate and divder of hdmi phy for MT2701 Due to a clerical error,there is one zero less for 12800000. Fix it for 128000000 Fixes: 0fc721b2968e ("drm/mediatek: add hdmi driver for MT2701 and MT7623") Signed-off-by: Wangyan Wang Signed-off-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c index fcc42dc6ea7f..0746fc887706 100644 --- a/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c +++ b/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c @@ -116,8 +116,8 @@ static int mtk_hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate, if (rate <= 64000000) pos_div = 3; - else if (rate <= 12800000) - pos_div = 1; + else if (rate <= 128000000) + pos_div = 2; else pos_div = 1; -- cgit v1.2.3 From 321b628e6f5a3af999f75eadd373adbcb8b4cb1f Mon Sep 17 00:00:00 2001 From: Wangyan Wang Date: Tue, 9 Apr 2019 14:53:06 +0800 Subject: drm/mediatek: make implementation of recalc_rate() for MT2701 hdmi phy Recalculate the rate of this clock, by querying hardware to make implementation of recalc_rate() to match the definition. Signed-off-by: Wangyan Wang Signed-off-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_hdmi_phy.c | 8 ------ drivers/gpu/drm/mediatek/mtk_hdmi_phy.h | 2 -- drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c | 38 +++++++++++++++++++++++--- drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c | 8 ++++++ 4 files changed, 42 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c index 4ef9c57ffd44..efc400ebbb90 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c @@ -29,14 +29,6 @@ long mtk_hdmi_pll_round_rate(struct clk_hw *hw, unsigned long rate, return rate; } -unsigned long mtk_hdmi_pll_recalc_rate(struct clk_hw *hw, - unsigned long parent_rate) -{ - struct mtk_hdmi_phy *hdmi_phy = to_mtk_hdmi_phy(hw); - - return hdmi_phy->pll_rate; -} - void mtk_hdmi_phy_clear_bits(struct mtk_hdmi_phy *hdmi_phy, u32 offset, u32 bits) { diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h index f39b1fc66612..71430691ffe4 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h +++ b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h @@ -50,8 +50,6 @@ void mtk_hdmi_phy_mask(struct mtk_hdmi_phy *hdmi_phy, u32 offset, struct mtk_hdmi_phy *to_mtk_hdmi_phy(struct clk_hw *hw); long mtk_hdmi_pll_round_rate(struct clk_hw *hw, unsigned long rate, unsigned long *parent_rate); -unsigned long mtk_hdmi_pll_recalc_rate(struct clk_hw *hw, - unsigned long parent_rate); extern struct platform_driver mtk_hdmi_phy_driver; extern struct mtk_hdmi_phy_conf mtk_hdmi_phy_8173_conf; diff --git a/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c index 0746fc887706..feb6a7ed63d1 100644 --- a/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c +++ b/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c @@ -79,7 +79,6 @@ static int mtk_hdmi_pll_prepare(struct clk_hw *hw) mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_SLDO_MASK); usleep_range(80, 100); mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_MBIAS_LPF_EN); - mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_EN_TX_POSDIV); mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_SER_MASK); mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_PRED_MASK); mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_DRV_MASK); @@ -94,7 +93,6 @@ static void mtk_hdmi_pll_unprepare(struct clk_hw *hw) mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_DRV_MASK); mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_PRED_MASK); mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_SER_MASK); - mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_EN_TX_POSDIV); mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_MBIAS_LPF_EN); usleep_range(80, 100); mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_SLDO_MASK); @@ -123,6 +121,7 @@ static int mtk_hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate, mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON6, RG_HTPLL_PREDIV_MASK); mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON6, RG_HTPLL_POSDIV_MASK); + mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_EN_TX_POSDIV); mtk_hdmi_phy_mask(hdmi_phy, HDMI_CON6, (0x1 << RG_HTPLL_IC), RG_HTPLL_IC_MASK); mtk_hdmi_phy_mask(hdmi_phy, HDMI_CON6, (0x1 << RG_HTPLL_IR), @@ -154,6 +153,39 @@ static int mtk_hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate, return 0; } +static unsigned long mtk_hdmi_pll_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct mtk_hdmi_phy *hdmi_phy = to_mtk_hdmi_phy(hw); + unsigned long out_rate, val; + + val = (readl(hdmi_phy->regs + HDMI_CON6) + & RG_HTPLL_PREDIV_MASK) >> RG_HTPLL_PREDIV; + switch (val) { + case 0x00: + out_rate = parent_rate; + break; + case 0x01: + out_rate = parent_rate / 2; + break; + default: + out_rate = parent_rate / 4; + break; + } + + val = (readl(hdmi_phy->regs + HDMI_CON6) + & RG_HTPLL_FBKDIV_MASK) >> RG_HTPLL_FBKDIV; + out_rate *= (val + 1) * 2; + val = (readl(hdmi_phy->regs + HDMI_CON2) + & RG_HDMITX_TX_POSDIV_MASK); + out_rate >>= (val >> RG_HDMITX_TX_POSDIV); + + if (readl(hdmi_phy->regs + HDMI_CON2) & RG_HDMITX_EN_TX_POSDIV) + out_rate /= 5; + + return out_rate; +} + static const struct clk_ops mtk_hdmi_phy_pll_ops = { .prepare = mtk_hdmi_pll_prepare, .unprepare = mtk_hdmi_pll_unprepare, @@ -174,7 +206,6 @@ static void mtk_hdmi_phy_enable_tmds(struct mtk_hdmi_phy *hdmi_phy) mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_SLDO_MASK); usleep_range(80, 100); mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_MBIAS_LPF_EN); - mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_EN_TX_POSDIV); mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_SER_MASK); mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_PRED_MASK); mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_DRV_MASK); @@ -186,7 +217,6 @@ static void mtk_hdmi_phy_disable_tmds(struct mtk_hdmi_phy *hdmi_phy) mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_DRV_MASK); mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_PRED_MASK); mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_SER_MASK); - mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_EN_TX_POSDIV); mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_MBIAS_LPF_EN); usleep_range(80, 100); mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_SLDO_MASK); diff --git a/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c index ed5916b27658..83662a208491 100644 --- a/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c +++ b/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c @@ -285,6 +285,14 @@ static int mtk_hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate, return 0; } +static unsigned long mtk_hdmi_pll_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct mtk_hdmi_phy *hdmi_phy = to_mtk_hdmi_phy(hw); + + return hdmi_phy->pll_rate; +} + static const struct clk_ops mtk_hdmi_phy_pll_ops = { .prepare = mtk_hdmi_pll_prepare, .unprepare = mtk_hdmi_pll_unprepare, -- cgit v1.2.3 From 827abdd024207146822f66ba3ba74867135866b9 Mon Sep 17 00:00:00 2001 From: Wangyan Wang Date: Tue, 9 Apr 2019 14:53:03 +0800 Subject: drm/mediatek: remove flag CLK_SET_RATE_PARENT for MT2701 hdmi phy This is the first step to make MT2701 hdmi stable. The parent rate of hdmi phy had set by DPI driver. We should not set or change the parent rate of MT2701 hdmi phy, as a result we should remove the flags of "CLK_SET_RATE_PARENT" from the clock of MT2701 hdmi phy. Signed-off-by: Wangyan Wang Signed-off-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_hdmi_phy.c | 13 +++++-------- drivers/gpu/drm/mediatek/mtk_hdmi_phy.h | 1 + drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c | 1 + drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c | 1 + 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c index efc400ebbb90..08b029772c5a 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c @@ -102,13 +102,11 @@ mtk_hdmi_phy_dev_get_ops(const struct mtk_hdmi_phy *hdmi_phy) return NULL; } -static void mtk_hdmi_phy_clk_get_ops(struct mtk_hdmi_phy *hdmi_phy, - const struct clk_ops **ops) +static void mtk_hdmi_phy_clk_get_data(struct mtk_hdmi_phy *hdmi_phy, + struct clk_init_data *clk_init) { - if (hdmi_phy && hdmi_phy->conf && hdmi_phy->conf->hdmi_phy_clk_ops) - *ops = hdmi_phy->conf->hdmi_phy_clk_ops; - else - dev_err(hdmi_phy->dev, "Failed to get clk ops of phy\n"); + clk_init->flags = hdmi_phy->conf->flags; + clk_init->ops = hdmi_phy->conf->hdmi_phy_clk_ops; } static int mtk_hdmi_phy_probe(struct platform_device *pdev) @@ -121,7 +119,6 @@ static int mtk_hdmi_phy_probe(struct platform_device *pdev) struct clk_init_data clk_init = { .num_parents = 1, .parent_names = (const char * const *)&ref_clk_name, - .flags = CLK_SET_RATE_PARENT | CLK_SET_RATE_GATE, }; struct phy *phy; @@ -159,7 +156,7 @@ static int mtk_hdmi_phy_probe(struct platform_device *pdev) hdmi_phy->dev = dev; hdmi_phy->conf = (struct mtk_hdmi_phy_conf *)of_device_get_match_data(dev); - mtk_hdmi_phy_clk_get_ops(hdmi_phy, &clk_init.ops); + mtk_hdmi_phy_clk_get_data(hdmi_phy, &clk_init); hdmi_phy->pll_hw.init = &clk_init; hdmi_phy->pll = devm_clk_register(dev, &hdmi_phy->pll_hw); if (IS_ERR(hdmi_phy->pll)) { diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h index 71430691ffe4..d28b8d5ed2b4 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h +++ b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h @@ -21,6 +21,7 @@ struct mtk_hdmi_phy; struct mtk_hdmi_phy_conf { bool tz_disabled; + unsigned long flags; const struct clk_ops *hdmi_phy_clk_ops; void (*hdmi_phy_enable_tmds)(struct mtk_hdmi_phy *hdmi_phy); void (*hdmi_phy_disable_tmds)(struct mtk_hdmi_phy *hdmi_phy); diff --git a/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c index feb6a7ed63d1..31f3175f032b 100644 --- a/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c +++ b/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c @@ -232,6 +232,7 @@ static void mtk_hdmi_phy_disable_tmds(struct mtk_hdmi_phy *hdmi_phy) struct mtk_hdmi_phy_conf mtk_hdmi_phy_2701_conf = { .tz_disabled = true, + .flags = CLK_SET_RATE_GATE, .hdmi_phy_clk_ops = &mtk_hdmi_phy_pll_ops, .hdmi_phy_enable_tmds = mtk_hdmi_phy_enable_tmds, .hdmi_phy_disable_tmds = mtk_hdmi_phy_disable_tmds, diff --git a/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c index 83662a208491..37f9503d7643 100644 --- a/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c +++ b/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c @@ -317,6 +317,7 @@ static void mtk_hdmi_phy_disable_tmds(struct mtk_hdmi_phy *hdmi_phy) } struct mtk_hdmi_phy_conf mtk_hdmi_phy_8173_conf = { + .flags = CLK_SET_RATE_PARENT | CLK_SET_RATE_GATE, .hdmi_phy_clk_ops = &mtk_hdmi_phy_pll_ops, .hdmi_phy_enable_tmds = mtk_hdmi_phy_enable_tmds, .hdmi_phy_disable_tmds = mtk_hdmi_phy_disable_tmds, -- cgit v1.2.3 From 8eeb3946feeb00486ac0909e2309da87db8988a5 Mon Sep 17 00:00:00 2001 From: Wangyan Wang Date: Tue, 9 Apr 2019 14:53:05 +0800 Subject: drm/mediatek: using new factor for tvdpll for MT2701 hdmi phy This is the second step to make MT2701 HDMI stable. The factor depends on the divider of DPI in MT2701, therefore, we should fix this factor to the right and new one. Test: search ok Signed-off-by: Wangyan Wang Signed-off-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_dpi.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c index 22e68a100e7b..5d333138f913 100644 --- a/drivers/gpu/drm/mediatek/mtk_dpi.c +++ b/drivers/gpu/drm/mediatek/mtk_dpi.c @@ -662,13 +662,11 @@ static unsigned int mt8173_calculate_factor(int clock) static unsigned int mt2701_calculate_factor(int clock) { if (clock <= 64000) - return 16; - else if (clock <= 128000) - return 8; - else if (clock <= 256000) return 4; - else + else if (clock <= 128000) return 2; + else + return 1; } static const struct mtk_dpi_conf mt8173_conf = { -- cgit v1.2.3 From 9ee76098a1b8ae21cccac641b735ee4d3a77bccf Mon Sep 17 00:00:00 2001 From: Wangyan Wang Date: Tue, 9 Apr 2019 14:53:07 +0800 Subject: drm/mediatek: no change parent rate in round_rate() for MT2701 hdmi phy This is the third step to make MT2701 HDMI stable. We should not change the rate of parent for hdmi phy when doing round_rate for this clock. The parent clock of hdmi phy must be the same as it. We change it when doing set_rate only. Signed-off-by: Wangyan Wang Signed-off-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_hdmi_phy.c | 14 -------------- drivers/gpu/drm/mediatek/mtk_hdmi_phy.h | 2 -- drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c | 6 ++++++ drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c | 14 ++++++++++++++ 4 files changed, 20 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c index 08b029772c5a..5223498502c4 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c @@ -15,20 +15,6 @@ static const struct phy_ops mtk_hdmi_phy_dev_ops = { .owner = THIS_MODULE, }; -long mtk_hdmi_pll_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *parent_rate) -{ - struct mtk_hdmi_phy *hdmi_phy = to_mtk_hdmi_phy(hw); - - hdmi_phy->pll_rate = rate; - if (rate <= 74250000) - *parent_rate = rate; - else - *parent_rate = rate / 2; - - return rate; -} - void mtk_hdmi_phy_clear_bits(struct mtk_hdmi_phy *hdmi_phy, u32 offset, u32 bits) { diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h index d28b8d5ed2b4..2d8b3182470d 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h +++ b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h @@ -49,8 +49,6 @@ void mtk_hdmi_phy_set_bits(struct mtk_hdmi_phy *hdmi_phy, u32 offset, void mtk_hdmi_phy_mask(struct mtk_hdmi_phy *hdmi_phy, u32 offset, u32 val, u32 mask); struct mtk_hdmi_phy *to_mtk_hdmi_phy(struct clk_hw *hw); -long mtk_hdmi_pll_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *parent_rate); extern struct platform_driver mtk_hdmi_phy_driver; extern struct mtk_hdmi_phy_conf mtk_hdmi_phy_8173_conf; diff --git a/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c index 31f3175f032b..d3cc4022e988 100644 --- a/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c +++ b/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c @@ -106,6 +106,12 @@ static void mtk_hdmi_pll_unprepare(struct clk_hw *hw) usleep_range(80, 100); } +static long mtk_hdmi_pll_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *parent_rate) +{ + return rate; +} + static int mtk_hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate, unsigned long parent_rate) { diff --git a/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c index 37f9503d7643..47f8a2951682 100644 --- a/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c +++ b/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c @@ -199,6 +199,20 @@ static void mtk_hdmi_pll_unprepare(struct clk_hw *hw) usleep_range(100, 150); } +static long mtk_hdmi_pll_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *parent_rate) +{ + struct mtk_hdmi_phy *hdmi_phy = to_mtk_hdmi_phy(hw); + + hdmi_phy->pll_rate = rate; + if (rate <= 74250000) + *parent_rate = rate; + else + *parent_rate = rate / 2; + + return rate; +} + static int mtk_hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate, unsigned long parent_rate) { -- cgit v1.2.3 From 78ad2341521d5ea96cb936244ed4c4c4ef9ec13b Mon Sep 17 00:00:00 2001 From: Alexander Wetzel Date: Sat, 9 Feb 2019 15:01:38 +0100 Subject: mac80211: Honor SW_CRYPTO_CONTROL for unicast keys in AP VLAN mode Restore SW_CRYPTO_CONTROL operation on AP_VLAN interfaces for unicast keys, the original override was intended to be done for group keys as those are treated specially by mac80211 and would always have been rejected. Now the situation is that AP_VLAN support must be enabled by the driver if it can support it (meaning it can support software crypto GTK TX). Thus, also simplify the code - if we get here with AP_VLAN and non- pairwise key, software crypto must be used (driver doesn't know about the interface) and can be used (driver must've advertised AP_VLAN if it also uses SW_CRYPTO_CONTROL). Fixes: db3bdcb9c3ff ("mac80211: allow AP_VLAN operation on crypto controlled devices") Signed-off-by: Alexander Wetzel [rewrite commit message] Signed-off-by: Johannes Berg --- net/mac80211/key.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 4700718e010f..37e372896230 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -167,8 +167,10 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) * The driver doesn't know anything about VLAN interfaces. * Hence, don't send GTKs for VLAN interfaces to the driver. */ - if (!(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE)) + if (!(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE)) { + ret = 1; goto out_unsupported; + } } ret = drv_set_key(key->local, SET_KEY, sdata, @@ -213,11 +215,8 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) /* all of these we can do in software - if driver can */ if (ret == 1) return 0; - if (ieee80211_hw_check(&key->local->hw, SW_CRYPTO_CONTROL)) { - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) - return 0; + if (ieee80211_hw_check(&key->local->hw, SW_CRYPTO_CONTROL)) return -EINVAL; - } return 0; default: return -EINVAL; -- cgit v1.2.3 From 1b937e8faa87ccfb4b7d5b230796fa67bc8a183b Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Tue, 12 Mar 2019 22:08:12 +0000 Subject: RISC-V: Add separate defconfig for 32bit systems This patch adds rv32_defconfig for 32bit systems. The only difference between rv32_defconfig and defconfig is that rv32_defconfig has CONFIG_ARCH_RV32I=y. Signed-off-by: Anup Patel Signed-off-by: Palmer Dabbelt --- arch/riscv/configs/rv32_defconfig | 84 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 arch/riscv/configs/rv32_defconfig diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig new file mode 100644 index 000000000000..1a911ed8e772 --- /dev/null +++ b/arch/riscv/configs/rv32_defconfig @@ -0,0 +1,84 @@ +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_CGROUPS=y +CONFIG_CGROUP_SCHED=y +CONFIG_CFS_BANDWIDTH=y +CONFIG_CGROUP_BPF=y +CONFIG_NAMESPACES=y +CONFIG_USER_NS=y +CONFIG_CHECKPOINT_RESTORE=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_EXPERT=y +CONFIG_BPF_SYSCALL=y +CONFIG_ARCH_RV32I=y +CONFIG_SMP=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +CONFIG_NETLINK_DIAG=y +CONFIG_PCI=y +CONFIG_PCIEPORTBUS=y +CONFIG_PCI_HOST_GENERIC=y +CONFIG_PCIE_XILINX=y +CONFIG_DEVTMPFS=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_VIRTIO_BLK=y +CONFIG_BLK_DEV_SD=y +CONFIG_BLK_DEV_SR=y +CONFIG_ATA=y +CONFIG_SATA_AHCI=y +CONFIG_SATA_AHCI_PLATFORM=y +CONFIG_NETDEVICES=y +CONFIG_VIRTIO_NET=y +CONFIG_MACB=y +CONFIG_E1000E=y +CONFIG_R8169=y +CONFIG_MICROSEMI_PHY=y +CONFIG_INPUT_MOUSEDEV=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_OF_PLATFORM=y +CONFIG_SERIAL_EARLYCON_RISCV_SBI=y +CONFIG_HVC_RISCV_SBI=y +# CONFIG_PTP_1588_CLOCK is not set +CONFIG_DRM=y +CONFIG_DRM_RADEON=y +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_USB=y +CONFIG_USB_XHCI_HCD=y +CONFIG_USB_XHCI_PLATFORM=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_EHCI_HCD_PLATFORM=y +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_OHCI_HCD_PLATFORM=y +CONFIG_USB_STORAGE=y +CONFIG_USB_UAS=y +CONFIG_VIRTIO_MMIO=y +CONFIG_SIFIVE_PLIC=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_AUTOFS4_FS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_NFS_FS=y +CONFIG_NFS_V4=y +CONFIG_NFS_V4_1=y +CONFIG_NFS_V4_2=y +CONFIG_ROOT_NFS=y +CONFIG_CRYPTO_USER_API_HASH=y +CONFIG_CRYPTO_DEV_VIRTIO=y +CONFIG_PRINTK_TIME=y +# CONFIG_RCU_TRACE is not set -- cgit v1.2.3 From 31634bf5dcc418b5b2cacd954394c0c4620db6a2 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 19 Mar 2019 22:09:05 -0700 Subject: net/mlx5: FPGA, tls, hold rcu read lock a bit longer To avoid use-after-free, hold the rcu read lock until we are done copying flow data into the command buffer. Fixes: ab412e1dd7db ("net/mlx5: Accel, add TLS rx offload routines") Reported-by: Eric Dumazet Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c index 8de64e88c670..08aa7266c8c0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c @@ -217,22 +217,22 @@ int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq, void *cmd; int ret; - rcu_read_lock(); - flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle)); - rcu_read_unlock(); - - if (!flow) { - WARN_ONCE(1, "Received NULL pointer for handle\n"); - return -EINVAL; - } - buf = kzalloc(size, GFP_ATOMIC); if (!buf) return -ENOMEM; cmd = (buf + 1); + rcu_read_lock(); + flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle)); + if (unlikely(!flow)) { + rcu_read_unlock(); + WARN_ONCE(1, "Received NULL pointer for handle\n"); + kfree(buf); + return -EINVAL; + } mlx5_fpga_tls_flow_to_cmd(flow, cmd); + rcu_read_unlock(); MLX5_SET(tls_cmd, cmd, swid, ntohl(handle)); MLX5_SET64(tls_cmd, cmd, tls_rcd_sn, be64_to_cpu(rcd_sn)); -- cgit v1.2.3 From df3a8344d404a810b4aadbf19b08c8232fbaa715 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 19 Mar 2019 01:05:41 -0700 Subject: net/mlx5: FPGA, tls, idr remove on flow delete Flow is kfreed on mlx5_fpga_tls_del_flow but kept in the idr data structure, this is risky and can cause use-after-free, since the idr_remove is delayed until tls_send_teardown_cmd completion. Instead of delaying idr_remove, in this patch we do it on mlx5_fpga_tls_del_flow, before actually kfree(flow). Added synchronize_rcu before kfree(flow) Fixes: ab412e1dd7db ("net/mlx5: Accel, add TLS rx offload routines") Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c | 43 ++++++++-------------- 1 file changed, 15 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c index 08aa7266c8c0..22a2ef111514 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c @@ -148,14 +148,16 @@ static int mlx5_fpga_tls_alloc_swid(struct idr *idr, spinlock_t *idr_spinlock, return ret; } -static void mlx5_fpga_tls_release_swid(struct idr *idr, - spinlock_t *idr_spinlock, u32 swid) +static void *mlx5_fpga_tls_release_swid(struct idr *idr, + spinlock_t *idr_spinlock, u32 swid) { unsigned long flags; + void *ptr; spin_lock_irqsave(idr_spinlock, flags); - idr_remove(idr, swid); + ptr = idr_remove(idr, swid); spin_unlock_irqrestore(idr_spinlock, flags); + return ptr; } static void mlx_tls_kfree_complete(struct mlx5_fpga_conn *conn, @@ -165,20 +167,12 @@ static void mlx_tls_kfree_complete(struct mlx5_fpga_conn *conn, kfree(buf); } -struct mlx5_teardown_stream_context { - struct mlx5_fpga_tls_command_context cmd; - u32 swid; -}; - static void mlx5_fpga_tls_teardown_completion(struct mlx5_fpga_conn *conn, struct mlx5_fpga_device *fdev, struct mlx5_fpga_tls_command_context *cmd, struct mlx5_fpga_dma_buf *resp) { - struct mlx5_teardown_stream_context *ctx = - container_of(cmd, struct mlx5_teardown_stream_context, cmd); - if (resp) { u32 syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome); @@ -186,14 +180,6 @@ mlx5_fpga_tls_teardown_completion(struct mlx5_fpga_conn *conn, mlx5_fpga_err(fdev, "Teardown stream failed with syndrome = %d", syndrome); - else if (MLX5_GET(tls_cmd, cmd->buf.sg[0].data, direction_sx)) - mlx5_fpga_tls_release_swid(&fdev->tls->tx_idr, - &fdev->tls->tx_idr_spinlock, - ctx->swid); - else - mlx5_fpga_tls_release_swid(&fdev->tls->rx_idr, - &fdev->tls->rx_idr_spinlock, - ctx->swid); } mlx5_fpga_tls_put_command_ctx(cmd); } @@ -253,7 +239,7 @@ int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq, static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev, void *flow, u32 swid, gfp_t flags) { - struct mlx5_teardown_stream_context *ctx; + struct mlx5_fpga_tls_command_context *ctx; struct mlx5_fpga_dma_buf *buf; void *cmd; @@ -261,7 +247,7 @@ static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev, if (!ctx) return; - buf = &ctx->cmd.buf; + buf = &ctx->buf; cmd = (ctx + 1); MLX5_SET(tls_cmd, cmd, command_type, CMD_TEARDOWN_STREAM); MLX5_SET(tls_cmd, cmd, swid, swid); @@ -272,8 +258,7 @@ static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev, buf->sg[0].data = cmd; buf->sg[0].size = MLX5_TLS_COMMAND_SIZE; - ctx->swid = swid; - mlx5_fpga_tls_cmd_send(mdev->fpga, &ctx->cmd, + mlx5_fpga_tls_cmd_send(mdev->fpga, ctx, mlx5_fpga_tls_teardown_completion); } @@ -283,13 +268,14 @@ void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, struct mlx5_fpga_tls *tls = mdev->fpga->tls; void *flow; - rcu_read_lock(); if (direction_sx) - flow = idr_find(&tls->tx_idr, swid); + flow = mlx5_fpga_tls_release_swid(&tls->tx_idr, + &tls->tx_idr_spinlock, + swid); else - flow = idr_find(&tls->rx_idr, swid); - - rcu_read_unlock(); + flow = mlx5_fpga_tls_release_swid(&tls->rx_idr, + &tls->rx_idr_spinlock, + swid); if (!flow) { mlx5_fpga_err(mdev->fpga, "No flow information for swid %u\n", @@ -297,6 +283,7 @@ void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, return; } + synchronize_rcu(); /* before kfree(flow) */ mlx5_fpga_tls_send_teardown_cmd(mdev, flow, swid, flags); } -- cgit v1.2.3 From 192fba79822d9612af5ccd3f8aa05c922640ee13 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 28 Mar 2019 10:00:35 +0200 Subject: net/mlx5e: Skip un-needed tx recover if interface state is down Skip recover operation if interface is in down state as TX objects are not open. This fixes a bug were the recover flow re-opened TX objects which were not opened before, leading to a possible memory leak at driver unload. Fixes: de8650a82071 ("net/mlx5e: Add tx reporter support") Signed-off-by: Eran Ben Elisha Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 9d38e62cdf24..a85843e4e925 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -186,12 +186,18 @@ static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_tx_err_ctx *err_ctx) static int mlx5e_tx_reporter_recover_all(struct mlx5e_priv *priv) { - int err; + int err = 0; rtnl_lock(); mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto out; + mlx5e_close_locked(priv->netdev); err = mlx5e_open_locked(priv->netdev); + +out: mutex_unlock(&priv->state_lock); rtnl_unlock(); -- cgit v1.2.3 From 484c1ada0bd2bdcb76f849ae77983e24320a2d1d Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 28 Mar 2019 14:26:47 +0200 Subject: net/mlx5e: Use fail-safe channels reopen in tx reporter recover When requested to recover from error, the tx reporter might open new channels and close the existing ones. Use safe channels switch flow in order to guarantee opened channels at the end of the recover flow. For this purpose, define mlx5e_safe_reopen_channels function and use it within those flows. Fixes: de8650a82071 ("net/mlx5e: Add tx reporter support") Signed-off-by: Eran Ben Elisha Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 3 +-- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 13 ++++++++++--- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 71c65cc17904..d3eaf2ceaa39 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -858,6 +858,7 @@ void mlx5e_close_channels(struct mlx5e_channels *chs); * switching channels */ typedef int (*mlx5e_fp_hw_modify)(struct mlx5e_priv *priv); +int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv); int mlx5e_safe_switch_channels(struct mlx5e_priv *priv, struct mlx5e_channels *new_chs, mlx5e_fp_hw_modify hw_modify); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index a85843e4e925..476dd97f7f2f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -194,8 +194,7 @@ static int mlx5e_tx_reporter_recover_all(struct mlx5e_priv *priv) if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) goto out; - mlx5e_close_locked(priv->netdev); - err = mlx5e_open_locked(priv->netdev); + err = mlx5e_safe_reopen_channels(priv); out: mutex_unlock(&priv->state_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index b5fdbd3190d9..002e2adb3722 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2937,6 +2937,14 @@ int mlx5e_safe_switch_channels(struct mlx5e_priv *priv, return 0; } +int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv) +{ + struct mlx5e_channels new_channels = {}; + + new_channels.params = priv->channels.params; + return mlx5e_safe_switch_channels(priv, &new_channels, NULL); +} + void mlx5e_timestamp_init(struct mlx5e_priv *priv) { priv->tstamp.tx_type = HWTSTAMP_TX_OFF; @@ -4161,11 +4169,10 @@ static void mlx5e_tx_timeout_work(struct work_struct *work) if (!report_failed) goto unlock; - mlx5e_close_locked(priv->netdev); - err = mlx5e_open_locked(priv->netdev); + err = mlx5e_safe_reopen_channels(priv); if (err) netdev_err(priv->netdev, - "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n", + "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n", err); unlock: -- cgit v1.2.3 From 5d0bb3bac4b9f6c22280b04545626fdfd99edc6b Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 21 Mar 2019 19:07:20 -0700 Subject: net/mlx5e: XDP, Avoid checksum complete when XDP prog is loaded XDP programs might change packets data contents which will make the reported skb checksum (checksum complete) invalid. When XDP programs are loaded/unloaded set/clear rx RQs MLX5E_RQ_STATE_NO_CSUM_COMPLETE flag. Fixes: 86994156c736 ("net/mlx5e: XDP fast RX drop bpf programs support") Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 +++++- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 3 ++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 5efce4a3ff79..76a3d01a489e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1768,7 +1768,8 @@ static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable) struct mlx5e_channel *c; int i; - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || + priv->channels.params.xdp_prog) return 0; for (i = 0; i < channels->num; i++) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 002e2adb3722..4187c43b20ad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -951,7 +951,11 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, if (params->rx_dim_enabled) __set_bit(MLX5E_RQ_STATE_AM, &c->rq.state); - if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE)) + /* We disable csum_complete when XDP is enabled since + * XDP programs might manipulate packets which will render + * skb->checksum incorrect. + */ + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || c->xdp) __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 3dde5c7e0739..91af48344743 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -752,7 +752,8 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, return; } - if (unlikely(test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state))) + /* True when explicitly set via priv flag, or XDP prog is loaded */ + if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state)) goto csum_unnecessary; /* CQE csum doesn't cover padding octets in short ethernet -- cgit v1.2.3 From 0aa1d18615c163f92935b806dcaff9157645233a Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 12 Mar 2019 00:24:52 -0700 Subject: net/mlx5e: Rx, Fixup skb checksum for packets with tail padding When an ethernet frame with ip payload is padded, the padding octets are not covered by the hardware checksum. Prior to the cited commit, skb checksum was forced to be CHECKSUM_NONE when padding is detected. After it, the kernel will try to trim the padding bytes and subtract their checksum from skb->csum. In this patch we fixup skb->csum for any ip packet with tail padding of any size, if any padding found. FCS case is just one special case of this general purpose patch, hence, it is removed. Fixes: 88078d98d1bb ("net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends"), Cc: Eric Dumazet Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 79 ++++++++++++++++++---- drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 6 ++ drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 4 ++ 3 files changed, 74 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 91af48344743..0e254de23f3d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -712,17 +712,6 @@ static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb) rq->stats->ecn_mark += !!rc; } -static u32 mlx5e_get_fcs(const struct sk_buff *skb) -{ - const void *fcs_bytes; - u32 _fcs_bytes; - - fcs_bytes = skb_header_pointer(skb, skb->len - ETH_FCS_LEN, - ETH_FCS_LEN, &_fcs_bytes); - - return __get_unaligned_cpu32(fcs_bytes); -} - static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto) { void *ip_p = skb->data + network_depth; @@ -733,6 +722,68 @@ static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto) #define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN) +#define MAX_PADDING 8 + +static void +tail_padding_csum_slow(struct sk_buff *skb, int offset, int len, + struct mlx5e_rq_stats *stats) +{ + stats->csum_complete_tail_slow++; + skb->csum = csum_block_add(skb->csum, + skb_checksum(skb, offset, len, 0), + offset); +} + +static void +tail_padding_csum(struct sk_buff *skb, int offset, + struct mlx5e_rq_stats *stats) +{ + u8 tail_padding[MAX_PADDING]; + int len = skb->len - offset; + void *tail; + + if (unlikely(len > MAX_PADDING)) { + tail_padding_csum_slow(skb, offset, len, stats); + return; + } + + tail = skb_header_pointer(skb, offset, len, tail_padding); + if (unlikely(!tail)) { + tail_padding_csum_slow(skb, offset, len, stats); + return; + } + + stats->csum_complete_tail++; + skb->csum = csum_block_add(skb->csum, csum_partial(tail, len, 0), offset); +} + +static void +mlx5e_skb_padding_csum(struct sk_buff *skb, int network_depth, __be16 proto, + struct mlx5e_rq_stats *stats) +{ + struct ipv6hdr *ip6; + struct iphdr *ip4; + int pkt_len; + + switch (proto) { + case htons(ETH_P_IP): + ip4 = (struct iphdr *)(skb->data + network_depth); + pkt_len = network_depth + ntohs(ip4->tot_len); + break; + case htons(ETH_P_IPV6): + ip6 = (struct ipv6hdr *)(skb->data + network_depth); + pkt_len = network_depth + sizeof(*ip6) + ntohs(ip6->payload_len); + break; + default: + return; + } + + if (likely(pkt_len >= skb->len)) + return; + + tail_padding_csum(skb, pkt_len, stats); +} + static inline void mlx5e_handle_csum(struct net_device *netdev, struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, @@ -781,10 +832,8 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, skb->csum = csum_partial(skb->data + ETH_HLEN, network_depth - ETH_HLEN, skb->csum); - if (unlikely(netdev->features & NETIF_F_RXFCS)) - skb->csum = csum_block_add(skb->csum, - (__force __wsum)mlx5e_get_fcs(skb), - skb->len - ETH_FCS_LEN); + + mlx5e_skb_padding_csum(skb, network_depth, proto, stats); stats->csum_complete++; return; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 1a78e05cbba8..b75aa8b8bf04 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -59,6 +59,8 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail_slow) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_redirect) }, @@ -151,6 +153,8 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets; s->rx_csum_none += rq_stats->csum_none; s->rx_csum_complete += rq_stats->csum_complete; + s->rx_csum_complete_tail += rq_stats->csum_complete_tail; + s->rx_csum_complete_tail_slow += rq_stats->csum_complete_tail_slow; s->rx_csum_unnecessary += rq_stats->csum_unnecessary; s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner; s->rx_xdp_drop += rq_stats->xdp_drop; @@ -1190,6 +1194,8 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail_slow) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) }, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 4640d4f986f8..16c3b785f282 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -71,6 +71,8 @@ struct mlx5e_sw_stats { u64 rx_csum_unnecessary; u64 rx_csum_none; u64 rx_csum_complete; + u64 rx_csum_complete_tail; + u64 rx_csum_complete_tail_slow; u64 rx_csum_unnecessary_inner; u64 rx_xdp_drop; u64 rx_xdp_redirect; @@ -181,6 +183,8 @@ struct mlx5e_rq_stats { u64 packets; u64 bytes; u64 csum_complete; + u64 csum_complete_tail; + u64 csum_complete_tail_slow; u64 csum_unnecessary; u64 csum_unnecessary_inner; u64 csum_none; -- cgit v1.2.3 From 0318a7b7fcad9765931146efa7ca3a034194737c Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 25 Mar 2019 22:10:59 -0700 Subject: net/mlx5e: Rx, Check ip headers sanity In the two places is_last_ethertype_ip is being called, the caller will be looking inside the ip header, to be safe, add ip{4,6} header sanity check. And return true only on valid ip headers, i.e: the whole header is contained in the linear part of the skb. Note: Such situation is very rare and hard to reproduce, since mlx5e allocates a large enough headroom to contain the largest header one can imagine. Fixes: fe1dc069990c ("net/mlx5e: don't set CHECKSUM_COMPLETE on SCTP packets") Reported-by: Cong Wang Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 0e254de23f3d..36fd628188bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -692,7 +692,14 @@ static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth, { *proto = ((struct ethhdr *)skb->data)->h_proto; *proto = __vlan_get_protocol(skb, *proto, network_depth); - return (*proto == htons(ETH_P_IP) || *proto == htons(ETH_P_IPV6)); + + if (*proto == htons(ETH_P_IP)) + return pskb_may_pull(skb, *network_depth + sizeof(struct iphdr)); + + if (*proto == htons(ETH_P_IPV6)) + return pskb_may_pull(skb, *network_depth + sizeof(struct ipv6hdr)); + + return false; } static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb) -- cgit v1.2.3 From 5e0060b1491b299b1706414e61ede0b02265680e Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Fri, 29 Mar 2019 12:50:37 +0000 Subject: net/mlx5e: Protect against non-uplink representor for encap TC encap offload is supported only for the physical uplink representor. Fail for non uplink representor. Fixes: 3e621b19b0bb ("net/mlx5e: Support TC encapsulation offloads with upper devices") Signed-off-by: Dmytro Linkin Reviewed-by: Eli Britstein Reviewed-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index fa2a3c444cdc..eec07b34b4ad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -39,6 +39,10 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv, return -EOPNOTSUPP; } + if (!(mlx5e_eswitch_rep(*out_dev) && + mlx5e_is_uplink_rep(netdev_priv(*out_dev)))) + return -EOPNOTSUPP; + return 0; } -- cgit v1.2.3 From 8c8811d46d00d119ffbe039a6e52a0b504df1c2c Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 31 Mar 2019 12:53:03 +0000 Subject: Revert "net/mlx5e: Enable reporting checksum unnecessary also for L3 packets" This reverts commit b820e6fb0978f9c2ac438c199d2bb2f35950e9c9. Prior the commit we are reverting, checksum unnecessary was only set when both the L3 OK and L4 OK bits are set on the CQE. This caused packets of IP protocols such as SCTP which are not dealt by the current HW L4 parser (hence the L4 OK bit is not set, but the L4 header type none bit is set) to go through the checksum none code, where currently we wrongly report checksum unnecessary for them, a regression. Fix this by a revert. Note that on our usual track we report checksum complete, so the revert isn't expected to have any notable performance impact. Also, when we are not on the checksum complete track, the L4 protocols for which we report checksum none are not high performance ones, we will still report checksum unnecessary for UDP/TCP. Fixes: b820e6fb0978 ("net/mlx5e: Enable reporting checksum unnecessary also for L3 packets") Signed-off-by: Or Gerlitz Reported-by: Avi Urman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 36fd628188bc..c3b3002ff62f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -847,8 +847,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, csum_unnecessary: if (likely((cqe->hds_ip_ext & CQE_L3_OK) && - ((cqe->hds_ip_ext & CQE_L4_OK) || - (get_cqe_l4_hdr_type(cqe) == CQE_L4_HDR_TYPE_NONE)))) { + (cqe->hds_ip_ext & CQE_L4_OK))) { skb->ip_summed = CHECKSUM_UNNECESSARY; if (cqe_is_tunneled(cqe)) { skb->csum_level = 1; -- cgit v1.2.3 From 7ee2ace9c544a0886e02b54b625e521df8692d20 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Fri, 31 Aug 2018 14:29:16 +0300 Subject: net/mlx5e: Switch to Toeplitz RSS hash by default Although XOR hash function can perform very well on some special use cases, to align with all drivers, mlx5 driver should use Toeplitz hash by default. Toeplitz is more stable for the general use case and it is more standard and reliable. On top of that, since XOR (MLX5_RX_HASH_FN_INVERTED_XOR8) gives only a repeated 8 bits pattern. When used for udp tunneling RSS source port manipulation it results in fixed source port, which will cause bad RSS spread. Fixes: 2be6967cdbc9 ("net/mlx5e: Support ETH_RSS_HASH_XOR") Signed-off-by: Konstantin Khlebnikov Reviewed-by: Tariq Toukan Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 4187c43b20ad..f7eb521db580 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4564,7 +4564,7 @@ void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, { enum mlx5e_traffic_types tt; - rss_params->hfunc = ETH_RSS_HASH_XOR; + rss_params->hfunc = ETH_RSS_HASH_TOP; netdev_rss_key_fill(rss_params->toeplitz_hash_key, sizeof(rss_params->toeplitz_hash_key)); mlx5e_build_default_indir_rqt(rss_params->indirection_rqt, -- cgit v1.2.3 From dbe7208c6c4aec083571f2ec742870a0d0edbea3 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Sun, 7 Apr 2019 11:12:48 -0700 Subject: power: supply: cpcap-battery: Fix division by zero If called fast enough so samples do not increment, we can get division by zero in kernel: __div0 cpcap_battery_cc_raw_div cpcap_battery_get_property power_supply_get_property.part.1 power_supply_get_property power_supply_show_property power_supply_uevent Fixes: 874b2adbed12 ("power: supply: cpcap-battery: Add a battery driver") Signed-off-by: Tony Lindgren Acked-by: Pavel Machek Signed-off-by: Sebastian Reichel --- drivers/power/supply/cpcap-battery.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/power/supply/cpcap-battery.c b/drivers/power/supply/cpcap-battery.c index 08d5037fd052..6887870ba32c 100644 --- a/drivers/power/supply/cpcap-battery.c +++ b/drivers/power/supply/cpcap-battery.c @@ -221,6 +221,9 @@ static int cpcap_battery_cc_raw_div(struct cpcap_battery_ddata *ddata, int avg_current; u32 cc_lsb; + if (!divider) + return 0; + sample &= 0xffffff; /* 24-bits, unsigned */ offset &= 0x7ff; /* 10-bits, signed */ -- cgit v1.2.3 From 6ec4bae178d8a1e9814eb3bfdd321b0475de0468 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Wed, 3 Apr 2019 17:38:20 -0700 Subject: dt-bindings: clock: sifive: add FU540-C000 PRCI clock constants Add preprocessor macros for the important PRCI output clocks that are needed by both the FU540 PRCI driver and DT data. Details are available in the FU540 manual in Chapter 7 of https://static.dev.sifive.com/FU540-C000-v1.0.pdf Signed-off-by: Paul Walmsley Reviewed-by: Rob Herring Signed-off-by: Palmer Dabbelt --- include/dt-bindings/clock/sifive-fu540-prci.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 include/dt-bindings/clock/sifive-fu540-prci.h diff --git a/include/dt-bindings/clock/sifive-fu540-prci.h b/include/dt-bindings/clock/sifive-fu540-prci.h new file mode 100644 index 000000000000..6a0b70a37d78 --- /dev/null +++ b/include/dt-bindings/clock/sifive-fu540-prci.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018-2019 SiFive, Inc. + * Wesley Terpstra + * Paul Walmsley + */ + +#ifndef __DT_BINDINGS_CLOCK_SIFIVE_FU540_PRCI_H +#define __DT_BINDINGS_CLOCK_SIFIVE_FU540_PRCI_H + +/* Clock indexes for use by Device Tree data and the PRCI driver */ + +#define PRCI_CLK_COREPLL 0 +#define PRCI_CLK_DDRPLL 1 +#define PRCI_CLK_GEMGXLPLL 2 +#define PRCI_CLK_TLCLK 3 + +#endif -- cgit v1.2.3 From 771acc7e4a6e5dba779cb1a7fd851a164bc81033 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Tue, 9 Apr 2019 11:49:17 -0700 Subject: Bluetooth: btusb: request wake pin with NOAUTOEN Badly-designed systems might have (for example) active-high wake pins that default to high (e.g., because of external pull ups) until they have an active firmware which starts driving it low. This can cause an interrupt storm in the time between request_irq() and disable_irq(). We don't support shared interrupts here, so let's just pre-configure the interrupt to avoid auto-enabling it. Fixes: fd913ef7ce61 ("Bluetooth: btusb: Add out-of-band wakeup support") Fixes: 5364a0b4f4be ("arm64: dts: rockchip: move QCA6174A wakeup pin into its USB node") Signed-off-by: Brian Norris Reviewed-by: Matthias Kaehlcke Signed-off-by: Linus Torvalds --- drivers/bluetooth/btusb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index ded198328f21..7db48ae65cd2 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -2942,6 +2942,7 @@ static int btusb_config_oob_wake(struct hci_dev *hdev) return 0; } + irq_set_status_flags(irq, IRQ_NOAUTOEN); ret = devm_request_irq(&hdev->dev, irq, btusb_oob_wake_handler, 0, "OOB Wake-on-BT", data); if (ret) { @@ -2956,7 +2957,6 @@ static int btusb_config_oob_wake(struct hci_dev *hdev) } data->oob_wake_irq = irq; - disable_irq(irq); bt_dev_info(hdev, "OOB Wake-on-BT configured at IRQ %u", irq); return 0; } -- cgit v1.2.3 From cf7cf6977f531acd5dfe55250d0ee8cbbb6f1ae8 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 9 Apr 2019 15:43:11 +1000 Subject: powerpc/mm: Define MAX_PHYSMEM_BITS for all 64-bit configs The recent commit 8bc086899816 ("powerpc/mm: Only define MAX_PHYSMEM_BITS in SPARSEMEM configurations") removed our definition of MAX_PHYSMEM_BITS when SPARSEMEM is disabled. This inadvertently broke some 64-bit FLATMEM using configs with eg: arch/powerpc/include/asm/book3s/64/mmu-hash.h:584:6: error: "MAX_PHYSMEM_BITS" is not defined, evaluates to 0 #if (MAX_PHYSMEM_BITS > MAX_EA_BITS_PER_CONTEXT) ^~~~~~~~~~~~~~~~ Fix it by making sure we define MAX_PHYSMEM_BITS for all 64-bit configs regardless of SPARSEMEM. Fixes: 8bc086899816 ("powerpc/mm: Only define MAX_PHYSMEM_BITS in SPARSEMEM configurations") Reported-by: Andreas Schwab Reported-by: Hugh Dickins Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mmu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 598cdcdd1355..8ddd4a91bdc1 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -352,7 +352,7 @@ static inline bool strict_kernel_rwx_enabled(void) #if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) && \ defined (CONFIG_PPC_64K_PAGES) #define MAX_PHYSMEM_BITS 51 -#elif defined(CONFIG_SPARSEMEM) +#elif defined(CONFIG_PPC64) #define MAX_PHYSMEM_BITS 46 #endif -- cgit v1.2.3 From 9752c37cc89f43675e70cf9acff23519fa84b48c Mon Sep 17 00:00:00 2001 From: Vitor Soares Date: Tue, 9 Apr 2019 18:59:59 +0200 Subject: i3c: Fix the verification of random PID The validation of random PID should be done by checking the boardinfo->pid instead of info.pid which is empty. Doing the change the info struture declaration is no longer necessary. Cc: Boris Brezillon Cc: Fixes: 3a379bbcea0a ("i3c: Add core I3C infrastructure") Signed-off-by: Vitor Soares Signed-off-by: Boris Brezillon --- drivers/i3c/master.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c index 2dc628d4f1ae..1412abcff010 100644 --- a/drivers/i3c/master.c +++ b/drivers/i3c/master.c @@ -1980,7 +1980,6 @@ of_i3c_master_add_i3c_boardinfo(struct i3c_master_controller *master, { struct i3c_dev_boardinfo *boardinfo; struct device *dev = &master->dev; - struct i3c_device_info info = { }; enum i3c_addr_slot_status addrstatus; u32 init_dyn_addr = 0; @@ -2012,8 +2011,8 @@ of_i3c_master_add_i3c_boardinfo(struct i3c_master_controller *master, boardinfo->pid = ((u64)reg[1] << 32) | reg[2]; - if ((info.pid & GENMASK_ULL(63, 48)) || - I3C_PID_RND_LOWER_32BITS(info.pid)) + if ((boardinfo->pid & GENMASK_ULL(63, 48)) || + I3C_PID_RND_LOWER_32BITS(boardinfo->pid)) return -EINVAL; boardinfo->init_dyn_addr = init_dyn_addr; -- cgit v1.2.3 From 907621e94d49b85cd76f13110eceb940a182c69e Mon Sep 17 00:00:00 2001 From: Vitor Soares Date: Mon, 8 Apr 2019 13:13:34 +0200 Subject: i3c: dw: Fix dw_i3c_master_disable controller by using correct mask The controller was being disabled incorrectly. The correct way is to clear the DEV_CTRL_ENABLE bit. Fix this by clearing this bit. Cc: Boris Brezillon Cc: Fixes: 1dd728f5d4d4 ("i3c: master: Add driver for Synopsys DesignWare IP") Signed-off-by: Vitor Soares Signed-off-by: Boris Brezillon --- drivers/i3c/master/dw-i3c-master.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i3c/master/dw-i3c-master.c b/drivers/i3c/master/dw-i3c-master.c index 59279224e07f..10c26ffaa8ef 100644 --- a/drivers/i3c/master/dw-i3c-master.c +++ b/drivers/i3c/master/dw-i3c-master.c @@ -300,7 +300,7 @@ to_dw_i3c_master(struct i3c_master_controller *master) static void dw_i3c_master_disable(struct dw_i3c_master *master) { - writel(readl(master->regs + DEVICE_CTRL) & DEV_CTRL_ENABLE, + writel(readl(master->regs + DEVICE_CTRL) & ~DEV_CTRL_ENABLE, master->regs + DEVICE_CTRL); } -- cgit v1.2.3 From 709a53e1948494cc4f6c4636c6f84a4d36a8117e Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Sat, 30 Mar 2019 09:02:14 +0100 Subject: MAINTAINERS: Fix the I3C entry There's no include/dt-bindings/i3c/ directory, remove this F: entry from the I3C file patterns. Cc: Greg Kroah-Hartman Cc: Joe Perches Reported-by: Joe Perches Fixes: 4f26d0666961 ("MAINTAINERS: Add myself as the I3C subsystem maintainer") Signed-off-by: Boris Brezillon --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2359e12e4c41..30d3010c8825 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7333,7 +7333,6 @@ F: Documentation/devicetree/bindings/i3c/ F: Documentation/driver-api/i3c F: drivers/i3c/ F: include/linux/i3c/ -F: include/dt-bindings/i3c/ I3C DRIVER FOR SYNOPSYS DESIGNWARE M: Vitor Soares -- cgit v1.2.3 From 3966c3feca3fd10b2935caa0b4a08c7dd59469e5 Mon Sep 17 00:00:00 2001 From: "Lendacky, Thomas" Date: Tue, 2 Apr 2019 15:21:18 +0000 Subject: x86/perf/amd: Remove need to check "running" bit in NMI handler Spurious interrupt support was added to perf in the following commit, almost a decade ago: 63e6be6d98e1 ("perf, x86: Catch spurious interrupts after disabling counters") The two previous patches (resolving the race condition when disabling a PMC and NMI latency mitigation) allow for the removal of this older spurious interrupt support. Currently in x86_pmu_stop(), the bit for the PMC in the active_mask bitmap is cleared before disabling the PMC, which sets up a race condition. This race condition was mitigated by introducing the running bitmap. That race condition can be eliminated by first disabling the PMC, waiting for PMC reset on overflow and then clearing the bit for the PMC in the active_mask bitmap. The NMI handler will not re-enable a disabled counter. If x86_pmu_stop() is called from the perf NMI handler, the NMI latency mitigation support will guard against any unhandled NMI messages. Signed-off-by: Tom Lendacky Signed-off-by: Peter Zijlstra (Intel) Cc: # 4.14.x- Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: https://lkml.kernel.org/r/Message-ID: Signed-off-by: Ingo Molnar --- arch/x86/events/amd/core.c | 21 +++++++++++++++++++-- arch/x86/events/core.c | 13 +++---------- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 34c191453ce3..0ecfac84ba91 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -4,8 +4,8 @@ #include #include #include -#include #include +#include #include "../perf_event.h" @@ -491,6 +491,23 @@ static void amd_pmu_disable_all(void) } } +static void amd_pmu_disable_event(struct perf_event *event) +{ + x86_pmu_disable_event(event); + + /* + * This can be called from NMI context (via x86_pmu_stop). The counter + * may have overflowed, but either way, we'll never see it get reset + * by the NMI if we're already in the NMI. And the NMI latency support + * below will take care of any pending NMI that might have been + * generated by the overflow. + */ + if (in_nmi()) + return; + + amd_pmu_wait_on_overflow(event->hw.idx); +} + /* * Because of NMI latency, if multiple PMC counters are active or other sources * of NMIs are received, the perf NMI handler can handle one or more overflowed @@ -738,7 +755,7 @@ static __initconst const struct x86_pmu amd_pmu = { .disable_all = amd_pmu_disable_all, .enable_all = x86_pmu_enable_all, .enable = x86_pmu_enable_event, - .disable = x86_pmu_disable_event, + .disable = amd_pmu_disable_event, .hw_config = amd_pmu_hw_config, .schedule_events = x86_schedule_events, .eventsel = MSR_K7_EVNTSEL0, diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index e2b1447192a8..81911e11a15d 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1349,8 +1349,9 @@ void x86_pmu_stop(struct perf_event *event, int flags) struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) { + if (test_bit(hwc->idx, cpuc->active_mask)) { x86_pmu.disable(event); + __clear_bit(hwc->idx, cpuc->active_mask); cpuc->events[hwc->idx] = NULL; WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); hwc->state |= PERF_HES_STOPPED; @@ -1447,16 +1448,8 @@ int x86_pmu_handle_irq(struct pt_regs *regs) apic_write(APIC_LVTPC, APIC_DM_NMI); for (idx = 0; idx < x86_pmu.num_counters; idx++) { - if (!test_bit(idx, cpuc->active_mask)) { - /* - * Though we deactivated the counter some cpus - * might still deliver spurious interrupts still - * in flight. Catch them: - */ - if (__test_and_clear_bit(idx, cpuc->running)) - handled++; + if (!test_bit(idx, cpuc->active_mask)) continue; - } event = cpuc->events[idx]; -- cgit v1.2.3 From d6ba3f815bc5f3c4249d15c8bc5fbb012651b4a4 Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Mon, 8 Apr 2019 17:08:58 +0800 Subject: ASoC: Intel: kbl: fix wrong number of channels Fix wrong setting on number of channels. The context wants to set constraint to 2 channels instead of 4. Signed-off-by: Tzung-Bi Shih Acked-by: Pierre-Louis Bossart Signed-off-by: Mark Brown --- sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c index 7044d8c2b187..879f14257a3e 100644 --- a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c +++ b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c @@ -405,7 +405,7 @@ static const struct snd_pcm_hw_constraint_list constraints_dmic_channels = { }; static const unsigned int dmic_2ch[] = { - 4, + 2, }; static const struct snd_pcm_hw_constraint_list constraints_dmic_2ch = { -- cgit v1.2.3 From e37c2deafe7058cf7989c4c47bbf1140cc867d89 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Wed, 10 Apr 2019 10:08:36 +0200 Subject: ASoC: stm32: sai: fix master clock management When master clock is used, master clock rate is set exclusively. Parent clocks of master clock cannot be changed after a call to clk_set_rate_exclusive(). So the parent clock of SAI kernel clock must be set before. Ensure also that exclusive rate operations are balanced in STM32 SAI driver. Signed-off-by: Olivier Moysan Signed-off-by: Mark Brown --- sound/soc/stm/stm32_sai_sub.c | 64 +++++++++++++++++++++++++++++++------------ 1 file changed, 47 insertions(+), 17 deletions(-) diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c index 83d8a7ac56f4..d7045aa520de 100644 --- a/sound/soc/stm/stm32_sai_sub.c +++ b/sound/soc/stm/stm32_sai_sub.c @@ -70,6 +70,7 @@ #define SAI_IEC60958_STATUS_BYTES 24 #define SAI_MCLK_NAME_LEN 32 +#define SAI_RATE_11K 11025 /** * struct stm32_sai_sub_data - private data of SAI sub block (block A or B) @@ -309,6 +310,25 @@ static int stm32_sai_set_clk_div(struct stm32_sai_sub_data *sai, return ret; } +static int stm32_sai_set_parent_clock(struct stm32_sai_sub_data *sai, + unsigned int rate) +{ + struct platform_device *pdev = sai->pdev; + struct clk *parent_clk = sai->pdata->clk_x8k; + int ret; + + if (!(rate % SAI_RATE_11K)) + parent_clk = sai->pdata->clk_x11k; + + ret = clk_set_parent(sai->sai_ck, parent_clk); + if (ret) + dev_err(&pdev->dev, " Error %d setting sai_ck parent clock. %s", + ret, ret == -EBUSY ? + "Active stream rates conflict\n" : "\n"); + + return ret; +} + static long stm32_sai_mclk_round_rate(struct clk_hw *hw, unsigned long rate, unsigned long *prate) { @@ -490,25 +510,29 @@ static int stm32_sai_set_sysclk(struct snd_soc_dai *cpu_dai, struct stm32_sai_sub_data *sai = snd_soc_dai_get_drvdata(cpu_dai); int ret; - if (dir == SND_SOC_CLOCK_OUT) { + if (dir == SND_SOC_CLOCK_OUT && sai->sai_mclk) { ret = regmap_update_bits(sai->regmap, STM_SAI_CR1_REGX, SAI_XCR1_NODIV, (unsigned int)~SAI_XCR1_NODIV); if (ret < 0) return ret; - dev_dbg(cpu_dai->dev, "SAI MCLK frequency is %uHz\n", freq); - sai->mclk_rate = freq; + /* If master clock is used, set parent clock now */ + ret = stm32_sai_set_parent_clock(sai, freq); + if (ret) + return ret; - if (sai->sai_mclk) { - ret = clk_set_rate_exclusive(sai->sai_mclk, - sai->mclk_rate); - if (ret) { - dev_err(cpu_dai->dev, - "Could not set mclk rate\n"); - return ret; - } + ret = clk_set_rate_exclusive(sai->sai_mclk, freq); + if (ret) { + dev_err(cpu_dai->dev, + ret == -EBUSY ? + "Active streams have incompatible rates" : + "Could not set mclk rate\n"); + return ret; } + + dev_dbg(cpu_dai->dev, "SAI MCLK frequency is %uHz\n", freq); + sai->mclk_rate = freq; } return 0; @@ -916,11 +940,13 @@ static int stm32_sai_configure_clock(struct snd_soc_dai *cpu_dai, int div = 0, cr1 = 0; int sai_clk_rate, mclk_ratio, den; unsigned int rate = params_rate(params); + int ret; - if (!(rate % 11025)) - clk_set_parent(sai->sai_ck, sai->pdata->clk_x11k); - else - clk_set_parent(sai->sai_ck, sai->pdata->clk_x8k); + if (!sai->sai_mclk) { + ret = stm32_sai_set_parent_clock(sai, rate); + if (ret) + return ret; + } sai_clk_rate = clk_get_rate(sai->sai_ck); if (STM_SAI_IS_F4(sai->pdata)) { @@ -1079,9 +1105,13 @@ static void stm32_sai_shutdown(struct snd_pcm_substream *substream, regmap_update_bits(sai->regmap, STM_SAI_CR1_REGX, SAI_XCR1_NODIV, SAI_XCR1_NODIV); - clk_disable_unprepare(sai->sai_ck); + /* Release mclk rate only if rate was actually set */ + if (sai->mclk_rate) { + clk_rate_exclusive_put(sai->sai_mclk); + sai->mclk_rate = 0; + } - clk_rate_exclusive_put(sai->sai_mclk); + clk_disable_unprepare(sai->sai_ck); spin_lock_irqsave(&sai->irq_lock, flags); sai->substream = NULL; -- cgit v1.2.3 From e33c1b9923775d17ad246946fe67fcb9be288677 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 8 Apr 2019 09:07:06 -0700 Subject: apparmor: Restore Y/N in /sys for apparmor's "enabled" Before commit c5459b829b71 ("LSM: Plumb visibility into optional "enabled" state"), /sys/module/apparmor/parameters/enabled would show "Y" or "N" since it was using the "bool" handler. After being changed to "int", this switched to "1" or "0", breaking the userspace AppArmor detection of dbus-broker. This restores the Y/N output while keeping the LSM infrastructure happy. Before: $ cat /sys/module/apparmor/parameters/enabled 1 After: $ cat /sys/module/apparmor/parameters/enabled Y Reported-by: David Rheinsberg Reviewed-by: David Rheinsberg Link: https://lkml.kernel.org/r/CADyDSO6k8vYb1eryT4g6+EHrLCvb68GAbHVWuULkYjcZcYNhhw@mail.gmail.com Fixes: c5459b829b71 ("LSM: Plumb visibility into optional "enabled" state") Signed-off-by: Kees Cook Signed-off-by: John Johansen --- security/apparmor/lsm.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 49d664ddff44..87500bde5a92 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1336,9 +1336,16 @@ module_param_named(path_max, aa_g_path_max, aauint, S_IRUSR); bool aa_g_paranoid_load = true; module_param_named(paranoid_load, aa_g_paranoid_load, aabool, S_IRUGO); +static int param_get_aaintbool(char *buffer, const struct kernel_param *kp); +static int param_set_aaintbool(const char *val, const struct kernel_param *kp); +#define param_check_aaintbool param_check_int +static const struct kernel_param_ops param_ops_aaintbool = { + .set = param_set_aaintbool, + .get = param_get_aaintbool +}; /* Boot time disable flag */ static int apparmor_enabled __lsm_ro_after_init = 1; -module_param_named(enabled, apparmor_enabled, int, 0444); +module_param_named(enabled, apparmor_enabled, aaintbool, 0444); static int __init apparmor_enabled_setup(char *str) { @@ -1413,6 +1420,46 @@ static int param_get_aauint(char *buffer, const struct kernel_param *kp) return param_get_uint(buffer, kp); } +/* Can only be set before AppArmor is initialized (i.e. on boot cmdline). */ +static int param_set_aaintbool(const char *val, const struct kernel_param *kp) +{ + struct kernel_param kp_local; + bool value; + int error; + + if (apparmor_initialized) + return -EPERM; + + /* Create local copy, with arg pointing to bool type. */ + value = !!*((int *)kp->arg); + memcpy(&kp_local, kp, sizeof(kp_local)); + kp_local.arg = &value; + + error = param_set_bool(val, &kp_local); + if (!error) + *((int *)kp->arg) = *((bool *)kp_local.arg); + return error; +} + +/* + * To avoid changing /sys/module/apparmor/parameters/enabled from Y/N to + * 1/0, this converts the "int that is actually bool" back to bool for + * display in the /sys filesystem, while keeping it "int" for the LSM + * infrastructure. + */ +static int param_get_aaintbool(char *buffer, const struct kernel_param *kp) +{ + struct kernel_param kp_local; + bool value; + + /* Create local copy, with arg pointing to bool type. */ + value = !!*((int *)kp->arg); + memcpy(&kp_local, kp, sizeof(kp_local)); + kp_local.arg = &value; + + return param_get_bool(buffer, &kp_local); +} + static int param_get_audit(char *buffer, const struct kernel_param *kp) { if (!apparmor_enabled) -- cgit v1.2.3 From 70802487bb9145a4f8b26f5a11d0e7f83c25100a Mon Sep 17 00:00:00 2001 From: Ranjani Sridharan Date: Mon, 8 Apr 2019 12:30:25 -0700 Subject: ASoC: pcm: fix error handling when try_module_get() fails. Handle error before returning when try_module_get() fails to prevent inconsistent mutex lock/unlock. Fixes: 52034add7 (ASoC: pcm: update module refcount if module_get_upon_open is set) Signed-off-by: Ranjani Sridharan Signed-off-by: Mark Brown --- sound/soc/soc-pcm.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index d21247546f7f..be80a12fba27 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -518,8 +518,10 @@ static int soc_pcm_open(struct snd_pcm_substream *substream) continue; if (component->driver->module_get_upon_open && - !try_module_get(component->dev->driver->owner)) - return -ENODEV; + !try_module_get(component->dev->driver->owner)) { + ret = -ENODEV; + goto module_err; + } ret = component->driver->ops->open(substream); if (ret < 0) { @@ -636,7 +638,7 @@ codec_dai_err: component_err: soc_pcm_components_close(substream, component); - +module_err: if (cpu_dai->driver->ops->shutdown) cpu_dai->driver->ops->shutdown(substream, cpu_dai); out: -- cgit v1.2.3 From 90c1cba2b3b3851c151229f61801919b2904d437 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 3 Apr 2019 16:35:52 -0700 Subject: locking/lockdep: Zap lock classes even with lock debugging disabled The following commit: a0b0fd53e1e6 ("locking/lockdep: Free lock classes that are no longer in use") changed the behavior of lockdep_free_key_range() from unconditionally zapping lock classes into only zapping lock classes if debug_lock == true. Not zapping lock classes if debug_lock == false leaves dangling pointers in several lockdep datastructures, e.g. lock_class::name in the all_lock_classes list. The shell command "cat /proc/lockdep" causes the kernel to iterate the all_lock_classes list. Hence the "unable to handle kernel paging request" cash that Shenghui encountered by running cat /proc/lockdep. Since the new behavior can cause cat /proc/lockdep to crash, restore the pre-v5.1 behavior. This patch avoids that cat /proc/lockdep triggers the following crash with debug_lock == false: BUG: unable to handle kernel paging request at fffffbfff40ca448 RIP: 0010:__asan_load1+0x28/0x50 Call Trace: string+0xac/0x180 vsnprintf+0x23e/0x820 seq_vprintf+0x82/0xc0 seq_printf+0x92/0xb0 print_name+0x34/0xb0 l_show+0x184/0x200 seq_read+0x59e/0x6c0 proc_reg_read+0x11f/0x170 __vfs_read+0x4d/0x90 vfs_read+0xc5/0x1f0 ksys_read+0xab/0x130 __x64_sys_read+0x43/0x50 do_syscall_64+0x71/0x210 entry_SYSCALL_64_after_hwframe+0x49/0xbe Reported-by: shenghui Signed-off-by: Bart Van Assche Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Waiman Long Cc: Will Deacon Fixes: a0b0fd53e1e6 ("locking/lockdep: Free lock classes that are no longer in use") # v5.1-rc1. Link: https://lkml.kernel.org/r/20190403233552.124673-1-bvanassche@acm.org Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 34cdcbedda49..e16766ff184b 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -4689,8 +4689,8 @@ static void free_zapped_rcu(struct rcu_head *ch) return; raw_local_irq_save(flags); - if (!graph_lock()) - goto out_irq; + arch_spin_lock(&lockdep_lock); + current->lockdep_recursion = 1; /* closed head */ pf = delayed_free.pf + (delayed_free.index ^ 1); @@ -4702,8 +4702,8 @@ static void free_zapped_rcu(struct rcu_head *ch) */ call_rcu_zapped(delayed_free.pf + delayed_free.index); - graph_unlock(); -out_irq: + current->lockdep_recursion = 0; + arch_spin_unlock(&lockdep_lock); raw_local_irq_restore(flags); } @@ -4744,21 +4744,17 @@ static void lockdep_free_key_range_reg(void *start, unsigned long size) { struct pending_free *pf; unsigned long flags; - int locked; init_data_structures_once(); raw_local_irq_save(flags); - locked = graph_lock(); - if (!locked) - goto out_irq; - + arch_spin_lock(&lockdep_lock); + current->lockdep_recursion = 1; pf = get_pending_free(); __lockdep_free_key_range(pf, start, size); call_rcu_zapped(pf); - - graph_unlock(); -out_irq: + current->lockdep_recursion = 0; + arch_spin_unlock(&lockdep_lock); raw_local_irq_restore(flags); /* @@ -4911,9 +4907,8 @@ void lockdep_unregister_key(struct lock_class_key *key) return; raw_local_irq_save(flags); - if (!graph_lock()) - goto out_irq; - + arch_spin_lock(&lockdep_lock); + current->lockdep_recursion = 1; pf = get_pending_free(); hlist_for_each_entry_rcu(k, hash_head, hash_entry) { if (k == key) { @@ -4925,8 +4920,8 @@ void lockdep_unregister_key(struct lock_class_key *key) WARN_ON_ONCE(!found); __lockdep_free_key_range(pf, key, 1); call_rcu_zapped(pf); - graph_unlock(); -out_irq: + current->lockdep_recursion = 0; + arch_spin_unlock(&lockdep_lock); raw_local_irq_restore(flags); /* Wait until is_dynamic_key() has finished accessing k->hash_entry. */ -- cgit v1.2.3 From e9f33a8fee53c2d4bcdeec9a89478b4bf17bfbbc Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 9 Apr 2019 14:11:20 +0200 Subject: mac80211: fix RX STBC override byte order The original patch neglected to take byte order conversions into account, fix that. Fixes: d9bb410888ce ("mac80211: allow overriding HT STBC capabilities") Signed-off-by: Johannes Berg Reviewed-by: Sergey Matyukevich Signed-off-by: Johannes Berg --- net/mac80211/ht.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index e03c46ac8e4d..c62101857b9b 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -112,8 +112,9 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, IEEE80211_HT_CAP_TX_STBC); /* Allow user to configure RX STBC bits */ - if (ht_capa_mask->cap_info & IEEE80211_HT_CAP_RX_STBC) - ht_cap->cap |= ht_capa->cap_info & IEEE80211_HT_CAP_RX_STBC; + if (ht_capa_mask->cap_info & cpu_to_le16(IEEE80211_HT_CAP_RX_STBC)) + ht_cap->cap |= le16_to_cpu(ht_capa->cap_info) & + IEEE80211_HT_CAP_RX_STBC; /* Allow user to decrease AMPDU factor */ if (ht_capa_mask->ampdu_params_info & -- cgit v1.2.3 From 07d7e12091f4ab869cc6a4bb276399057e73b0b3 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sun, 7 Apr 2019 21:15:42 -0700 Subject: alarmtimer: Return correct remaining time To calculate a remaining time, it's required to subtract the current time from the expiration time. In alarm_timer_remaining() the arguments of ktime_sub are swapped. Fixes: d653d8457c76 ("alarmtimer: Implement remaining callback") Signed-off-by: Andrei Vagin Signed-off-by: Thomas Gleixner Reviewed-by: Mukesh Ojha Cc: Stephen Boyd Cc: John Stultz Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20190408041542.26338-1-avagin@gmail.com --- kernel/time/alarmtimer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 2c97e8c2d29f..0519a8805aab 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -594,7 +594,7 @@ static ktime_t alarm_timer_remaining(struct k_itimer *timr, ktime_t now) { struct alarm *alarm = &timr->it.alarm.alarmtimer; - return ktime_sub(now, alarm->node.expires); + return ktime_sub(alarm->node.expires, now); } /** -- cgit v1.2.3 From d7a181da2dfa3190487c446042ba01e07d851c74 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 10 Apr 2019 12:49:55 +0200 Subject: ALSA: hda: Fix racy display power access snd_hdac_display_power() doesn't handle the concurrent calls carefully enough, and it may lead to the doubly get_power or put_power calls, when a runtime PM and an async work get called in racy way. This patch addresses it by reusing the bus->lock mutex that has been used for protecting the link state change in ext bus code, so that it can protect against racy display state changes. The initialization of bus->lock was moved from snd_hdac_ext_bus_init() to snd_hdac_bus_init() as well accordingly. Testcase: igt/i915_pm_rpm/module-reload #glk-dsi Reported-by: Chris Wilson Reviewed-by: Chris Wilson Cc: Imre Deak Signed-off-by: Takashi Iwai --- sound/hda/ext/hdac_ext_bus.c | 1 - sound/hda/hdac_bus.c | 1 + sound/hda/hdac_component.c | 6 +++++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/hda/ext/hdac_ext_bus.c b/sound/hda/ext/hdac_ext_bus.c index 9c37d9af3023..ec7715c6b0c0 100644 --- a/sound/hda/ext/hdac_ext_bus.c +++ b/sound/hda/ext/hdac_ext_bus.c @@ -107,7 +107,6 @@ int snd_hdac_ext_bus_init(struct hdac_bus *bus, struct device *dev, INIT_LIST_HEAD(&bus->hlink_list); bus->idx = idx++; - mutex_init(&bus->lock); bus->cmd_dma_state = true; return 0; diff --git a/sound/hda/hdac_bus.c b/sound/hda/hdac_bus.c index 012305177f68..ad8eee08013f 100644 --- a/sound/hda/hdac_bus.c +++ b/sound/hda/hdac_bus.c @@ -38,6 +38,7 @@ int snd_hdac_bus_init(struct hdac_bus *bus, struct device *dev, INIT_WORK(&bus->unsol_work, snd_hdac_bus_process_unsol_events); spin_lock_init(&bus->reg_lock); mutex_init(&bus->cmd_mutex); + mutex_init(&bus->lock); bus->irq = -1; return 0; } diff --git a/sound/hda/hdac_component.c b/sound/hda/hdac_component.c index a6d37b9d6413..6b5caee61c6e 100644 --- a/sound/hda/hdac_component.c +++ b/sound/hda/hdac_component.c @@ -69,13 +69,15 @@ void snd_hdac_display_power(struct hdac_bus *bus, unsigned int idx, bool enable) dev_dbg(bus->dev, "display power %s\n", enable ? "enable" : "disable"); + + mutex_lock(&bus->lock); if (enable) set_bit(idx, &bus->display_power_status); else clear_bit(idx, &bus->display_power_status); if (!acomp || !acomp->ops) - return; + goto unlock; if (bus->display_power_status) { if (!bus->display_power_active) { @@ -92,6 +94,8 @@ void snd_hdac_display_power(struct hdac_bus *bus, unsigned int idx, bool enable) bus->display_power_active = false; } } + unlock: + mutex_unlock(&bus->lock); } EXPORT_SYMBOL_GPL(snd_hdac_display_power); -- cgit v1.2.3 From eed47d19d9362bdd958e4ab56af480b9dbf6b2b6 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Wed, 10 Apr 2019 10:38:33 +0200 Subject: block, bfq: fix use after free in bfq_bfqq_expire The function bfq_bfqq_expire() invokes the function __bfq_bfqq_expire(), and the latter may free the in-service bfq-queue. If this happens, then no other instruction of bfq_bfqq_expire() must be executed, or a use-after-free will occur. Basing on the assumption that __bfq_bfqq_expire() invokes bfq_put_queue() on the in-service bfq-queue exactly once, the queue is assumed to be freed if its refcounter is equal to one right before invoking __bfq_bfqq_expire(). But, since commit 9dee8b3b057e ("block, bfq: fix queue removal from weights tree") this assumption is false. __bfq_bfqq_expire() may also invoke bfq_weights_tree_remove() and, since commit 9dee8b3b057e ("block, bfq: fix queue removal from weights tree"), also the latter function may invoke bfq_put_queue(). So __bfq_bfqq_expire() may invoke bfq_put_queue() twice, and this is the actual case where the in-service queue may happen to be freed. To address this issue, this commit moves the check on the refcounter of the queue right around the last bfq_put_queue() that may be invoked on the queue. Fixes: 9dee8b3b057e ("block, bfq: fix queue removal from weights tree") Reported-by: Dmitrii Tcvetkov Reported-by: Douglas Anderson Tested-by: Dmitrii Tcvetkov Tested-by: Douglas Anderson Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 15 +++++++-------- block/bfq-iosched.h | 2 +- block/bfq-wf2q.c | 17 +++++++++++++++-- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index fac188dd78fa..dfb8cb0af13a 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -2822,7 +2822,7 @@ static void bfq_dispatch_remove(struct request_queue *q, struct request *rq) bfq_remove_request(q, rq); } -static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) +static bool __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) { /* * If this bfqq is shared between multiple processes, check @@ -2855,9 +2855,11 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) /* * All in-service entities must have been properly deactivated * or requeued before executing the next function, which - * resets all in-service entites as no more in service. + * resets all in-service entities as no more in service. This + * may cause bfqq to be freed. If this happens, the next + * function returns true. */ - __bfq_bfqd_reset_in_service(bfqd); + return __bfq_bfqd_reset_in_service(bfqd); } /** @@ -3262,7 +3264,6 @@ void bfq_bfqq_expire(struct bfq_data *bfqd, bool slow; unsigned long delta = 0; struct bfq_entity *entity = &bfqq->entity; - int ref; /* * Check whether the process is slow (see bfq_bfqq_is_slow). @@ -3347,10 +3348,8 @@ void bfq_bfqq_expire(struct bfq_data *bfqd, * reason. */ __bfq_bfqq_recalc_budget(bfqd, bfqq, reason); - ref = bfqq->ref; - __bfq_bfqq_expire(bfqd, bfqq); - - if (ref == 1) /* bfqq is gone, no more actions on it */ + if (__bfq_bfqq_expire(bfqd, bfqq)) + /* bfqq is gone, no more actions on it */ return; bfqq->injected_service = 0; diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 062e1c4787f4..86394e503ca9 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -995,7 +995,7 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree); bool next_queue_may_preempt(struct bfq_data *bfqd); struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd); -void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd); +bool __bfq_bfqd_reset_in_service(struct bfq_data *bfqd); void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, bool ins_into_idle_tree, bool expiration); void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq); diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index a11bef75483d..ae4d000ac0af 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -1605,7 +1605,8 @@ struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) return bfqq; } -void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd) +/* returns true if the in-service queue gets freed */ +bool __bfq_bfqd_reset_in_service(struct bfq_data *bfqd) { struct bfq_queue *in_serv_bfqq = bfqd->in_service_queue; struct bfq_entity *in_serv_entity = &in_serv_bfqq->entity; @@ -1629,8 +1630,20 @@ void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd) * service tree either, then release the service reference to * the queue it represents (taken with bfq_get_entity). */ - if (!in_serv_entity->on_st) + if (!in_serv_entity->on_st) { + /* + * If no process is referencing in_serv_bfqq any + * longer, then the service reference may be the only + * reference to the queue. If this is the case, then + * bfqq gets freed here. + */ + int ref = in_serv_bfqq->ref; bfq_put_queue(in_serv_bfqq); + if (ref == 1) + return true; + } + + return false; } void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, -- cgit v1.2.3 From bf348f9b78d413e75bb079462751a1d86b6de36c Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Wed, 27 Mar 2019 18:36:34 +0800 Subject: virtio-blk: limit number of hw queues by nr_cpu_ids When tag_set->nr_maps is 1, the block layer limits the number of hw queues by nr_cpu_ids. No matter how many hw queues are used by virtio-blk, as it has (tag_set->nr_maps == 1), it can use at most nr_cpu_ids hw queues. In addition, specifically for pci scenario, when the 'num-queues' specified by qemu is more than maxcpus, virtio-blk would not be able to allocate more than maxcpus vectors in order to have a vector for each queue. As a result, it falls back into MSI-X with one vector for config and one shared for queues. Considering above reasons, this patch limits the number of hw queues used by virtio-blk by nr_cpu_ids. Reviewed-by: Stefan Hajnoczi Signed-off-by: Dongli Zhang Signed-off-by: Jens Axboe --- drivers/block/virtio_blk.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 4bc083b7c9b5..2a7ca4a1e6f7 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -513,6 +513,8 @@ static int init_vq(struct virtio_blk *vblk) if (err) num_vqs = 1; + num_vqs = min_t(unsigned int, nr_cpu_ids, num_vqs); + vblk->vqs = kmalloc_array(num_vqs, sizeof(*vblk->vqs), GFP_KERNEL); if (!vblk->vqs) return -ENOMEM; -- cgit v1.2.3 From 1978f30a87732d4d9072a20abeded9fe17884f1b Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Wed, 27 Mar 2019 18:36:35 +0800 Subject: scsi: virtio_scsi: limit number of hw queues by nr_cpu_ids When tag_set->nr_maps is 1, the block layer limits the number of hw queues by nr_cpu_ids. No matter how many hw queues are used by virtio-scsi, as it has (tag_set->nr_maps == 1), it can use at most nr_cpu_ids hw queues. In addition, specifically for pci scenario, when the 'num_queues' specified by qemu is more than maxcpus, virtio-scsi would not be able to allocate more than maxcpus vectors in order to have a vector for each queue. As a result, it falls back into MSI-X with one vector for config and one shared for queues. Considering above reasons, this patch limits the number of hw queues used by virtio-scsi by nr_cpu_ids. Reviewed-by: Stefan Hajnoczi Signed-off-by: Dongli Zhang Signed-off-by: Jens Axboe --- drivers/scsi/virtio_scsi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 8af01777d09c..f8cb7c23305b 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -793,6 +793,7 @@ static int virtscsi_probe(struct virtio_device *vdev) /* We need to know how many queues before we allocate. */ num_queues = virtscsi_config_get(vdev, num_queues) ? : 1; + num_queues = min_t(unsigned int, nr_cpu_ids, num_queues); num_targets = virtscsi_config_get(vdev, max_target) + 1; -- cgit v1.2.3 From 5712f3301a12c0c3de9cc423484496b0464f2faf Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 3 Apr 2019 09:13:34 +0200 Subject: s390/3270: fix lockdep false positive on view->lock The spinlock in the raw3270_view structure is used by con3270, tty3270 and fs3270 in different ways. For con3270 the lock can be acquired in irq context, for tty3270 and fs3270 the highest context is bh. Lockdep sees the view->lock as a single class and if the 3270 driver is used for the console the following message is generated: WARNING: inconsistent lock state 5.1.0-rc3-05157-g5c168033979d #12 Not tainted -------------------------------- inconsistent {IN-HARDIRQ-W} -> {HARDIRQ-ON-W} usage. swapper/0/1 [HC0[0]:SC1[1]:HE1:SE0] takes: (____ptrval____) (&(&view->lock)->rlock){?.-.}, at: tty3270_update+0x7c/0x330 Introduce a lockdep subclass for the view lock to distinguish bh from irq locks. Signed-off-by: Martin Schwidefsky --- drivers/s390/char/con3270.c | 2 +- drivers/s390/char/fs3270.c | 3 ++- drivers/s390/char/raw3270.c | 3 ++- drivers/s390/char/raw3270.h | 4 +++- drivers/s390/char/tty3270.c | 3 ++- 5 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/s390/char/con3270.c b/drivers/s390/char/con3270.c index fd2146bcc0ad..e17364e13d2f 100644 --- a/drivers/s390/char/con3270.c +++ b/drivers/s390/char/con3270.c @@ -629,7 +629,7 @@ con3270_init(void) (void (*)(unsigned long)) con3270_read_tasklet, (unsigned long) condev->read); - raw3270_add_view(&condev->view, &con3270_fn, 1); + raw3270_add_view(&condev->view, &con3270_fn, 1, RAW3270_VIEW_LOCK_IRQ); INIT_LIST_HEAD(&condev->freemem); for (i = 0; i < CON3270_STRING_PAGES; i++) { diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c index 8f3a2eeb28dc..8b48ba9c598e 100644 --- a/drivers/s390/char/fs3270.c +++ b/drivers/s390/char/fs3270.c @@ -463,7 +463,8 @@ fs3270_open(struct inode *inode, struct file *filp) init_waitqueue_head(&fp->wait); fp->fs_pid = get_pid(task_pid(current)); - rc = raw3270_add_view(&fp->view, &fs3270_fn, minor); + rc = raw3270_add_view(&fp->view, &fs3270_fn, minor, + RAW3270_VIEW_LOCK_BH); if (rc) { fs3270_free_view(&fp->view); goto out; diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c index f8cd2935fbfd..63a41b168761 100644 --- a/drivers/s390/char/raw3270.c +++ b/drivers/s390/char/raw3270.c @@ -920,7 +920,7 @@ raw3270_deactivate_view(struct raw3270_view *view) * Add view to device with minor "minor". */ int -raw3270_add_view(struct raw3270_view *view, struct raw3270_fn *fn, int minor) +raw3270_add_view(struct raw3270_view *view, struct raw3270_fn *fn, int minor, int subclass) { unsigned long flags; struct raw3270 *rp; @@ -942,6 +942,7 @@ raw3270_add_view(struct raw3270_view *view, struct raw3270_fn *fn, int minor) view->cols = rp->cols; view->ascebc = rp->ascebc; spin_lock_init(&view->lock); + lockdep_set_subclass(&view->lock, subclass); list_add(&view->list, &rp->view_list); rc = 0; spin_unlock_irqrestore(get_ccwdev_lock(rp->cdev), flags); diff --git a/drivers/s390/char/raw3270.h b/drivers/s390/char/raw3270.h index 114ca7cbf889..3afaa35f7351 100644 --- a/drivers/s390/char/raw3270.h +++ b/drivers/s390/char/raw3270.h @@ -150,6 +150,8 @@ struct raw3270_fn { struct raw3270_view { struct list_head list; spinlock_t lock; +#define RAW3270_VIEW_LOCK_IRQ 0 +#define RAW3270_VIEW_LOCK_BH 1 atomic_t ref_count; struct raw3270 *dev; struct raw3270_fn *fn; @@ -158,7 +160,7 @@ struct raw3270_view { unsigned char *ascebc; /* ascii -> ebcdic table */ }; -int raw3270_add_view(struct raw3270_view *, struct raw3270_fn *, int); +int raw3270_add_view(struct raw3270_view *, struct raw3270_fn *, int, int); int raw3270_activate_view(struct raw3270_view *); void raw3270_del_view(struct raw3270_view *); void raw3270_deactivate_view(struct raw3270_view *); diff --git a/drivers/s390/char/tty3270.c b/drivers/s390/char/tty3270.c index 2b0c36c2c568..98d7fc152e32 100644 --- a/drivers/s390/char/tty3270.c +++ b/drivers/s390/char/tty3270.c @@ -980,7 +980,8 @@ static int tty3270_install(struct tty_driver *driver, struct tty_struct *tty) return PTR_ERR(tp); rc = raw3270_add_view(&tp->view, &tty3270_fn, - tty->index + RAW3270_FIRSTMINOR); + tty->index + RAW3270_FIRSTMINOR, + RAW3270_VIEW_LOCK_BH); if (rc) { tty3270_free_view(tp); return rc; -- cgit v1.2.3 From 16222cfb96b02a4a3e38e52012f2a6304850c3c9 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Wed, 3 Apr 2019 13:18:22 +0200 Subject: s390/zcrypt: fix possible deadlock situation on ap queue remove With commit 01396a374c3d ("s390/zcrypt: revisit ap device remove procedure") the ap queue remove is now a two stage process. However, a del_timer_sync() call may trigger the timer function which may try to lock the very same spinlock as is held by the function just initiating the del_timer_sync() call. This could end up in a deadlock situation. Very unlikely but possible as you need to remove an ap queue at the exact sime time when a timeout of a request occurs. Signed-off-by: Harald Freudenberger Reported-by: Pierre Morel Fixes: commit 01396a374c3d ("s390/zcrypt: revisit ap device remove procedure") Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/ap_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index 6a340f2c3556..5ea83dc4f1d7 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -751,8 +751,8 @@ void ap_queue_prepare_remove(struct ap_queue *aq) __ap_flush_queue(aq); /* set REMOVE state to prevent new messages are queued in */ aq->state = AP_STATE_REMOVE; - del_timer_sync(&aq->timeout); spin_unlock_bh(&aq->lock); + del_timer_sync(&aq->timeout); } void ap_queue_remove(struct ap_queue *aq) -- cgit v1.2.3 From 1b8f21b74c3c9c82fce5a751d7aefb7cc0b8d33d Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 9 Apr 2019 06:31:21 +0800 Subject: blk-mq: introduce blk_mq_complete_request_sync() In NVMe's error handler, follows the typical steps of tearing down hardware for recovering controller: 1) stop blk_mq hw queues 2) stop the real hw queues 3) cancel in-flight requests via blk_mq_tagset_busy_iter(tags, cancel_request, ...) cancel_request(): mark the request as abort blk_mq_complete_request(req); 4) destroy real hw queues However, there may be race between #3 and #4, because blk_mq_complete_request() may run q->mq_ops->complete(rq) remotelly and asynchronously, and ->complete(rq) may be run after #4. This patch introduces blk_mq_complete_request_sync() for fixing the above race. Cc: Sagi Grimberg Cc: Bart Van Assche Cc: James Smart Cc: linux-nvme@lists.infradead.org Reviewed-by: Keith Busch Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq.c | 7 +++++++ include/linux/blk-mq.h | 1 + 2 files changed, 8 insertions(+) diff --git a/block/blk-mq.c b/block/blk-mq.c index a9354835cf51..9516304a38ee 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -654,6 +654,13 @@ bool blk_mq_complete_request(struct request *rq) } EXPORT_SYMBOL(blk_mq_complete_request); +void blk_mq_complete_request_sync(struct request *rq) +{ + WRITE_ONCE(rq->state, MQ_RQ_COMPLETE); + rq->q->mq_ops->complete(rq); +} +EXPORT_SYMBOL_GPL(blk_mq_complete_request_sync); + int blk_mq_request_started(struct request *rq) { return blk_mq_rq_state(rq) != MQ_RQ_IDLE; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index cb2aa7ecafff..db29928de467 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -302,6 +302,7 @@ void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list); void blk_mq_kick_requeue_list(struct request_queue *q); void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs); bool blk_mq_complete_request(struct request *rq); +void blk_mq_complete_request_sync(struct request *rq); bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list, struct bio *bio); bool blk_mq_queue_stopped(struct request_queue *q); -- cgit v1.2.3 From eb3afb75b57c28599af0dfa03a99579d410749e9 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 9 Apr 2019 06:31:22 +0800 Subject: nvme: cancel request synchronously nvme_cancel_request() is used in error handler, and it is always reliable to cancel request synchronously, and avoids possible race in which request may be completed after real hw queue is destroyed. One issue is reported by our customer on NVMe RDMA, in which freed ib queue pair may be used in nvme_rdma_complete_rq(). Cc: Sagi Grimberg Cc: Bart Van Assche Cc: James Smart Cc: linux-nvme@lists.infradead.org Reviewed-by: Keith Busch Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 470601980794..2c43e12b70af 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -288,7 +288,7 @@ bool nvme_cancel_request(struct request *req, void *data, bool reserved) "Cancelling I/O %d", req->tag); nvme_req(req)->status = NVME_SC_ABORT_REQ; - blk_mq_complete_request(req); + blk_mq_complete_request_sync(req); return true; } EXPORT_SYMBOL_GPL(nvme_cancel_request); -- cgit v1.2.3 From 20eea462bf2fbff3a4be375cc8424a544235a432 Mon Sep 17 00:00:00 2001 From: Vandita Kulkarni Date: Mon, 25 Mar 2019 16:56:41 +0530 Subject: drm/i915/icl: Ungate ddi clocks before IO enable IO enable sequencing needs ddi clocks enabled. These clocks will be gated at a later point in the enable sequence. v2: Fix the commit header (Uma) v3: Remove the redundant read (Ville) Fixes: 949fc52af19e ("drm/i915/icl: add pll mapping for DSI") Signed-off-by: Vandita Kulkarni Reviewed-by: Uma Shankar Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1553513202-13863-1-git-send-email-vandita.kulkarni@intel.com (cherry picked from commit c5b81a325263a891d5811aabe938c87e03db4c37) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/icl_dsi.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/icl_dsi.c b/drivers/gpu/drm/i915/icl_dsi.c index 83cd8284e807..43b7b80ffa21 100644 --- a/drivers/gpu/drm/i915/icl_dsi.c +++ b/drivers/gpu/drm/i915/icl_dsi.c @@ -598,6 +598,12 @@ static void gen11_dsi_map_pll(struct intel_encoder *encoder, val |= DPCLKA_CFGCR0_DDI_CLK_SEL(pll->info->id, port); } I915_WRITE(DPCLKA_CFGCR0_ICL, val); + + for_each_dsi_port(port, intel_dsi->ports) { + val &= ~DPCLKA_CFGCR0_DDI_CLK_OFF(port); + } + I915_WRITE(DPCLKA_CFGCR0_ICL, val); + POSTING_READ(DPCLKA_CFGCR0_ICL); mutex_unlock(&dev_priv->dpll_lock); -- cgit v1.2.3 From 4690985e00ac38334d4a68f99c56ac310ef0527b Mon Sep 17 00:00:00 2001 From: Vandita Kulkarni Date: Mon, 25 Mar 2019 16:56:42 +0530 Subject: drm/i915/icl: Fix port disable sequence for mipi-dsi Re-enable clock gating of DDI clocks. v2: Fix the default ddi clk state for mipi-dsi (Imre) Fixes: 1026bea00381 ("drm/i915/icl: Ungate DSI clocks") Signed-off-by: Vandita Kulkarni Reviewed-by: Uma Shankar Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1553513202-13863-2-git-send-email-vandita.kulkarni@intel.com (cherry picked from commit 942d1cf48eae3fcd7e973cfb708d5c4860f0c713) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/icl_dsi.c | 2 +- drivers/gpu/drm/i915/intel_ddi.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/icl_dsi.c b/drivers/gpu/drm/i915/icl_dsi.c index 43b7b80ffa21..641e0778fa9c 100644 --- a/drivers/gpu/drm/i915/icl_dsi.c +++ b/drivers/gpu/drm/i915/icl_dsi.c @@ -1132,7 +1132,7 @@ static void gen11_dsi_disable_port(struct intel_encoder *encoder) DRM_ERROR("DDI port:%c buffer not idle\n", port_name(port)); } - gen11_dsi_ungate_clocks(encoder); + gen11_dsi_gate_clocks(encoder); } static void gen11_dsi_disable_io_power(struct intel_encoder *encoder) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index deba8e90360a..ab4e60dfd6a3 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -2824,10 +2824,10 @@ void icl_sanitize_encoder_pll_mapping(struct intel_encoder *encoder) return; } /* - * DSI ports should have their DDI clock ungated when disabled - * and gated when enabled. + * For DSI we keep the ddi clocks gated + * except during enable/disable sequence. */ - ddi_clk_needed = !encoder->base.crtc; + ddi_clk_needed = false; } val = I915_READ(DPCLKA_CFGCR0_ICL); -- cgit v1.2.3 From 21635d7311734d2d1b177f8a95e2f9386174b76d Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 5 Apr 2019 10:52:20 +0300 Subject: drm/i915/dp: revert back to max link rate and lane count on eDP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 7769db588384 ("drm/i915/dp: optimize eDP 1.4+ link config fast and narrow") started to optize the eDP 1.4+ link config, both per spec and as preparation for display stream compression support. Sadly, we again face panels that flat out fail with parameters they claim to support. Revert, and go back to the drawing board. v2: Actually revert to max params instead of just wide-and-slow. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109959 Fixes: 7769db588384 ("drm/i915/dp: optimize eDP 1.4+ link config fast and narrow") Cc: Ville Syrjälä Cc: Manasi Navare Cc: Rodrigo Vivi Cc: Matt Atwood Cc: "Lee, Shawn C" Cc: Dave Airlie Cc: intel-gfx@lists.freedesktop.org Cc: # v5.0+ Reviewed-by: Rodrigo Vivi Reviewed-by: Manasi Navare Tested-by: Albert Astals Cid # v5.0 backport Tested-by: Emanuele Panigati # v5.0 backport Tested-by: Matteo Iervasi # v5.0 backport Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190405075220.9815-1-jani.nikula@intel.com (cherry picked from commit f11cb1c19ad0563b3c1ea5eb16a6bac0e401f428) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_dp.c | 69 ++++++----------------------------------- 1 file changed, 10 insertions(+), 59 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index cf709835fb9a..8891f29a8c7f 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1859,42 +1859,6 @@ intel_dp_compute_link_config_wide(struct intel_dp *intel_dp, return -EINVAL; } -/* Optimize link config in order: max bpp, min lanes, min clock */ -static int -intel_dp_compute_link_config_fast(struct intel_dp *intel_dp, - struct intel_crtc_state *pipe_config, - const struct link_config_limits *limits) -{ - struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; - int bpp, clock, lane_count; - int mode_rate, link_clock, link_avail; - - for (bpp = limits->max_bpp; bpp >= limits->min_bpp; bpp -= 2 * 3) { - mode_rate = intel_dp_link_required(adjusted_mode->crtc_clock, - bpp); - - for (lane_count = limits->min_lane_count; - lane_count <= limits->max_lane_count; - lane_count <<= 1) { - for (clock = limits->min_clock; clock <= limits->max_clock; clock++) { - link_clock = intel_dp->common_rates[clock]; - link_avail = intel_dp_max_data_rate(link_clock, - lane_count); - - if (mode_rate <= link_avail) { - pipe_config->lane_count = lane_count; - pipe_config->pipe_bpp = bpp; - pipe_config->port_clock = link_clock; - - return 0; - } - } - } - } - - return -EINVAL; -} - static int intel_dp_dsc_compute_bpp(struct intel_dp *intel_dp, u8 dsc_max_bpc) { int i, num_bpc; @@ -2031,15 +1995,13 @@ intel_dp_compute_link_config(struct intel_encoder *encoder, limits.min_bpp = 6 * 3; limits.max_bpp = intel_dp_compute_bpp(intel_dp, pipe_config); - if (intel_dp_is_edp(intel_dp) && intel_dp->edp_dpcd[0] < DP_EDP_14) { + if (intel_dp_is_edp(intel_dp)) { /* * Use the maximum clock and number of lanes the eDP panel - * advertizes being capable of. The eDP 1.3 and earlier panels - * are generally designed to support only a single clock and - * lane configuration, and typically these values correspond to - * the native resolution of the panel. With eDP 1.4 rate select - * and DSC, this is decreasingly the case, and we need to be - * able to select less than maximum link config. + * advertizes being capable of. The panels are generally + * designed to support only a single clock and lane + * configuration, and typically these values correspond to the + * native resolution of the panel. */ limits.min_lane_count = limits.max_lane_count; limits.min_clock = limits.max_clock; @@ -2053,22 +2015,11 @@ intel_dp_compute_link_config(struct intel_encoder *encoder, intel_dp->common_rates[limits.max_clock], limits.max_bpp, adjusted_mode->crtc_clock); - if (intel_dp_is_edp(intel_dp)) - /* - * Optimize for fast and narrow. eDP 1.3 section 3.3 and eDP 1.4 - * section A.1: "It is recommended that the minimum number of - * lanes be used, using the minimum link rate allowed for that - * lane configuration." - * - * Note that we use the max clock and lane count for eDP 1.3 and - * earlier, and fast vs. wide is irrelevant. - */ - ret = intel_dp_compute_link_config_fast(intel_dp, pipe_config, - &limits); - else - /* Optimize for slow and wide. */ - ret = intel_dp_compute_link_config_wide(intel_dp, pipe_config, - &limits); + /* + * Optimize for slow and wide. This is the place to add alternative + * optimization policy. + */ + ret = intel_dp_compute_link_config_wide(intel_dp, pipe_config, &limits); /* enable compression if the mode doesn't fit available BW */ DRM_DEBUG_KMS("Force DSC en = %d\n", intel_dp->force_dsc_en); -- cgit v1.2.3 From ac71317e6be01812cc0c54d8be6d3c1139c8380b Mon Sep 17 00:00:00 2001 From: Marc Gonzalez Date: Wed, 10 Apr 2019 16:23:38 +0200 Subject: ASoC: wcd9335: Fix missing regmap requirement wcd9335.c: undefined reference to 'devm_regmap_add_irq_chip' Signed-off-by: Marc Gonzalez Signed-off-by: Mark Brown --- sound/soc/codecs/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index 419114edfd57..667fc1d59e18 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -1151,6 +1151,7 @@ config SND_SOC_WCD9335 tristate "WCD9335 Codec" depends on SLIMBUS select REGMAP_SLIMBUS + select REGMAP_IRQ help The WCD9335 is a standalone Hi-Fi audio CODEC IC, supports Qualcomm Technologies, Inc. (QTI) multimedia solutions, -- cgit v1.2.3 From f05badde4e20d2e0f8c39d07a6873b2bfb0754f8 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Fri, 5 Apr 2019 05:49:34 +0000 Subject: RISC-V: Fix Maximum Physical Memory 2GiB option for 64bit systems The Maximum Physical Memory 2GiB option for 64bit systems is currently broken because kernel hangs at boot-time when this option is enabled and the underlying system has more than 2GiB memory. This issue can be easily reproduced on SiFive Unleashed board where we have 8GiB of memory. This patch fixes above issue by removing unusable memory region in setup_bootmem(). Signed-off-by: Anup Patel Reviewed-by: Christoph Hellwig Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 5fd8c922e1c2..bc7b77e34d09 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -121,6 +121,14 @@ void __init setup_bootmem(void) */ memblock_reserve(reg->base, vmlinux_end - reg->base); mem_size = min(reg->size, (phys_addr_t)-PAGE_OFFSET); + + /* + * Remove memblock from the end of usable area to the + * end of region + */ + if (reg->base + mem_size < end) + memblock_remove(reg->base + mem_size, + end - reg->base - mem_size); } } BUG_ON(mem_size == 0); -- cgit v1.2.3 From d737b25b1ae0540ba13cbd45ebb9b58a1d6d7f0d Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Wed, 10 Apr 2019 06:27:05 -0700 Subject: IB/hfi1: Do not flush send queue in the TID RDMA second leg When a QP is put into error state, the send queue will be flushed. This mechanism is implemented in both the first and the second leg of the send engine. Since the second leg is only responsible for data transactions in the KDETH space for the TID RDMA WRITE request, it should not perform the flushing of the send queue. This patch removes the flushing function of the second leg, but still keeps the bailing out of the QP if it is put into error state. Fixes: 70dcb2e3dc6a ("IB/hfi1: Add the TID second leg send packet builder") Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/tid_rdma.c | 31 ++++++++----------------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index fdda33aca77f..43cbce7a19ea 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -5017,24 +5017,14 @@ int hfi1_make_tid_rdma_pkt(struct rvt_qp *qp, struct hfi1_pkt_state *ps) make_tid_rdma_ack(qp, ohdr, ps)) return 1; - if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) { - if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) - goto bail; - /* We are in the error state, flush the work request. */ - if (qp->s_last == READ_ONCE(qp->s_head)) - goto bail; - /* If DMAs are in progress, we can't flush immediately. */ - if (iowait_sdma_pending(&priv->s_iowait)) { - qp->s_flags |= RVT_S_WAIT_DMA; - goto bail; - } - clear_ahg(qp); - wqe = rvt_get_swqe_ptr(qp, qp->s_last); - hfi1_trdma_send_complete(qp, wqe, qp->s_last != qp->s_acked ? - IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR); - /* will get called again */ - goto done_free_tx; - } + /* + * Bail out if we can't send data. + * Be reminded that this check must been done after the call to + * make_tid_rdma_ack() because the responding QP could be in + * RTR state where it can send TID RDMA ACK, not TID RDMA WRITE DATA. + */ + if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) + goto bail; if (priv->s_flags & RVT_S_WAIT_ACK) goto bail; @@ -5144,11 +5134,6 @@ int hfi1_make_tid_rdma_pkt(struct rvt_qp *qp, struct hfi1_pkt_state *ps) hfi1_make_ruc_header(qp, ohdr, (opcode << 24), bth1, bth2, middle, ps); return 1; -done_free_tx: - hfi1_put_txreq(ps->s_txreq); - ps->s_txreq = NULL; - return 1; - bail: hfi1_put_txreq(ps->s_txreq); bail_no_tx: -- cgit v1.2.3 From b2b3a70cd9984fe39ed5aaa9ce596476051ce5de Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Wed, 10 Apr 2019 19:56:43 +0200 Subject: lightnvm: pblk: fix crash in pblk_end_partial_read due to multipage bvecs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The introduction of multipage bio vectors broke pblk's partial read logic due to it not being prepared for multipage bio vectors. Use bio vector iterators instead of direct bio vector indexing. Fixes: 07173c3ec276 ("block: enable multipage bvecs") Reported-by: Klaus Jensen Signed-off-by: Hans Holmberg Updated description. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-read.c | 50 +++++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c index 3789185144da..0b7d5fb4548d 100644 --- a/drivers/lightnvm/pblk-read.c +++ b/drivers/lightnvm/pblk-read.c @@ -231,14 +231,14 @@ static void pblk_end_partial_read(struct nvm_rq *rqd) struct pblk_sec_meta *meta; struct bio *new_bio = rqd->bio; struct bio *bio = pr_ctx->orig_bio; - struct bio_vec src_bv, dst_bv; void *meta_list = rqd->meta_list; - int bio_init_idx = pr_ctx->bio_init_idx; unsigned long *read_bitmap = pr_ctx->bitmap; + struct bvec_iter orig_iter = BVEC_ITER_ALL_INIT; + struct bvec_iter new_iter = BVEC_ITER_ALL_INIT; int nr_secs = pr_ctx->orig_nr_secs; int nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs); void *src_p, *dst_p; - int hole, i; + int bit, i; if (unlikely(nr_holes == 1)) { struct ppa_addr ppa; @@ -257,33 +257,39 @@ static void pblk_end_partial_read(struct nvm_rq *rqd) /* Fill the holes in the original bio */ i = 0; - hole = find_first_zero_bit(read_bitmap, nr_secs); - do { - struct pblk_line *line; + for (bit = 0; bit < nr_secs; bit++) { + if (!test_bit(bit, read_bitmap)) { + struct bio_vec dst_bv, src_bv; + struct pblk_line *line; - line = pblk_ppa_to_line(pblk, rqd->ppa_list[i]); - kref_put(&line->ref, pblk_line_put); + line = pblk_ppa_to_line(pblk, rqd->ppa_list[i]); + kref_put(&line->ref, pblk_line_put); - meta = pblk_get_meta(pblk, meta_list, hole); - meta->lba = cpu_to_le64(pr_ctx->lba_list_media[i]); + meta = pblk_get_meta(pblk, meta_list, bit); + meta->lba = cpu_to_le64(pr_ctx->lba_list_media[i]); - src_bv = new_bio->bi_io_vec[i++]; - dst_bv = bio->bi_io_vec[bio_init_idx + hole]; + dst_bv = bio_iter_iovec(bio, orig_iter); + src_bv = bio_iter_iovec(new_bio, new_iter); - src_p = kmap_atomic(src_bv.bv_page); - dst_p = kmap_atomic(dst_bv.bv_page); + src_p = kmap_atomic(src_bv.bv_page); + dst_p = kmap_atomic(dst_bv.bv_page); - memcpy(dst_p + dst_bv.bv_offset, - src_p + src_bv.bv_offset, - PBLK_EXPOSED_PAGE_SIZE); + memcpy(dst_p + dst_bv.bv_offset, + src_p + src_bv.bv_offset, + PBLK_EXPOSED_PAGE_SIZE); - kunmap_atomic(src_p); - kunmap_atomic(dst_p); + kunmap_atomic(src_p); + kunmap_atomic(dst_p); - mempool_free(src_bv.bv_page, &pblk->page_bio_pool); + flush_dcache_page(dst_bv.bv_page); + mempool_free(src_bv.bv_page, &pblk->page_bio_pool); - hole = find_next_zero_bit(read_bitmap, nr_secs, hole + 1); - } while (hole < nr_secs); + bio_advance_iter(new_bio, &new_iter, + PBLK_EXPOSED_PAGE_SIZE); + i++; + } + bio_advance_iter(bio, &orig_iter, PBLK_EXPOSED_PAGE_SIZE); + } bio_put(new_bio); kfree(pr_ctx); -- cgit v1.2.3 From d4d0e40977ac450f32f2db5e4d8e23c9d2578899 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 10 Apr 2019 06:58:12 +0000 Subject: mlxsw: spectrum_switchdev: Add MDB entries in prepare phase The driver cannot guarantee in the prepare phase that it will be able to write an MDB entry to the device. In case the driver returned success during the prepare phase, but then failed to add the entry in the commit phase, a WARNING [1] will be generated by the switchdev core. Fix this by doing the work in the prepare phase instead. [1] [ 358.544486] swp12s0: Commit of object (id=2) failed. [ 358.550061] WARNING: CPU: 0 PID: 30 at net/switchdev/switchdev.c:281 switchdev_port_obj_add_now+0x9b/0xe0 [ 358.560754] CPU: 0 PID: 30 Comm: kworker/0:1 Not tainted 5.0.0-custom-13382-gf2449babf221 #1350 [ 358.570472] Hardware name: Mellanox Technologies Ltd. MSN2100-CB2FO/SA001017, BIOS 5.6.5 06/07/2016 [ 358.580582] Workqueue: events switchdev_deferred_process_work [ 358.587001] RIP: 0010:switchdev_port_obj_add_now+0x9b/0xe0 ... [ 358.614109] RSP: 0018:ffffa6b900d6fe18 EFLAGS: 00010286 [ 358.619943] RAX: 0000000000000000 RBX: ffff8b00797ff000 RCX: 0000000000000000 [ 358.627912] RDX: ffff8b00b7a1d4c0 RSI: ffff8b00b7a152e8 RDI: ffff8b00b7a152e8 [ 358.635881] RBP: ffff8b005c3f5bc0 R08: 000000000000022b R09: 0000000000000000 [ 358.643850] R10: 0000000000000000 R11: ffffa6b900d6fcc8 R12: 0000000000000000 [ 358.651819] R13: dead000000000100 R14: ffff8b00b65a23c0 R15: 0ffff8b00b7a2200 [ 358.659790] FS: 0000000000000000(0000) GS:ffff8b00b7a00000(0000) knlGS:0000000000000000 [ 358.668820] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 358.675228] CR2: 00007f00aad90de0 CR3: 00000001ca80d000 CR4: 00000000001006f0 [ 358.683188] Call Trace: [ 358.685918] switchdev_port_obj_add_deferred+0x13/0x60 [ 358.691655] switchdev_deferred_process+0x6b/0xf0 [ 358.696907] switchdev_deferred_process_work+0xa/0x10 [ 358.702548] process_one_work+0x1f5/0x3f0 [ 358.707022] worker_thread+0x28/0x3c0 [ 358.711099] ? process_one_work+0x3f0/0x3f0 [ 358.715768] kthread+0x10d/0x130 [ 358.719369] ? __kthread_create_on_node+0x180/0x180 [ 358.724815] ret_from_fork+0x35/0x40 Fixes: 3a49b4fde2a1 ("mlxsw: Adding layer 2 multicast support") Signed-off-by: Ido Schimmel Reported-by: Alex Kushnarov Tested-by: Alex Kushnarov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index f6ce386c3036..50111f228d77 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1630,7 +1630,7 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid_index; int err = 0; - if (switchdev_trans_ph_prepare(trans)) + if (switchdev_trans_ph_commit(trans)) return 0; bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev); -- cgit v1.2.3 From a8c133b06183c529c51cd0d54eb57d6b7078370c Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 10 Apr 2019 06:58:13 +0000 Subject: mlxsw: core: Do not use WQ_MEM_RECLAIM for EMAD workqueue The EMAD workqueue is used to handle retransmission of EMAD packets that contain configuration data for the device's firmware. Given the workers need to allocate these packets and that the code is not called as part of memory reclaim path, remove the WQ_MEM_RECLAIM flag. Fixes: d965465b60ba ("mlxsw: core: Fix possible deadlock") Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index d23d53c0e284..91cd6fa42e9a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -568,7 +568,7 @@ static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core) if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX)) return 0; - emad_wq = alloc_workqueue("mlxsw_core_emad", WQ_MEM_RECLAIM, 0); + emad_wq = alloc_workqueue("mlxsw_core_emad", 0, 0); if (!emad_wq) return -ENOMEM; mlxsw_core->emad_wq = emad_wq; -- cgit v1.2.3 From 4af0699782e2cc7d0d89db9eb6f8844dd3df82dc Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 10 Apr 2019 06:58:14 +0000 Subject: mlxsw: core: Do not use WQ_MEM_RECLAIM for mlxsw ordered workqueue The ordered workqueue is used to offload various objects such as routes and neighbours in the order they are notified. It should not be called as part of memory reclaim path, so remove the WQ_MEM_RECLAIM flag. This can also result in a warning [1], if a worker tries to flush a non-WQ_MEM_RECLAIM workqueue. [1] [97703.542861] workqueue: WQ_MEM_RECLAIM mlxsw_core_ordered:mlxsw_sp_router_fib6_event_work [mlxsw_spectrum] is flushing !WQ_MEM_RECLAIM events:rht_deferred_worker [97703.542884] WARNING: CPU: 1 PID: 32492 at kernel/workqueue.c:2605 check_flush_dependency+0xb5/0x130 ... [97703.542988] Hardware name: Mellanox Technologies Ltd. MSN3700C/VMOD0008, BIOS 5.11 10/10/2018 [97703.543049] Workqueue: mlxsw_core_ordered mlxsw_sp_router_fib6_event_work [mlxsw_spectrum] [97703.543061] RIP: 0010:check_flush_dependency+0xb5/0x130 ... [97703.543071] RSP: 0018:ffffb3f08137bc00 EFLAGS: 00010086 [97703.543076] RAX: 0000000000000000 RBX: ffff96e07740ae00 RCX: 0000000000000000 [97703.543080] RDX: 0000000000000094 RSI: ffffffff82dc1934 RDI: 0000000000000046 [97703.543084] RBP: ffffb3f08137bc20 R08: ffffffff82dc18a0 R09: 00000000000225c0 [97703.543087] R10: 0000000000000000 R11: 0000000000007eec R12: ffffffff816e4ee0 [97703.543091] R13: ffff96e06f6a5c00 R14: ffff96e077ba7700 R15: ffffffff812ab0c0 [97703.543097] FS: 0000000000000000(0000) GS:ffff96e077a80000(0000) knlGS:0000000000000000 [97703.543101] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [97703.543104] CR2: 00007f8cd135b280 CR3: 00000001e860e003 CR4: 00000000003606e0 [97703.543109] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [97703.543112] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [97703.543115] Call Trace: [97703.543129] __flush_work+0xbd/0x1e0 [97703.543137] ? __cancel_work_timer+0x136/0x1b0 [97703.543145] ? pwq_dec_nr_in_flight+0x49/0xa0 [97703.543154] __cancel_work_timer+0x136/0x1b0 [97703.543175] ? mlxsw_reg_trans_bulk_wait+0x145/0x400 [mlxsw_core] [97703.543184] cancel_work_sync+0x10/0x20 [97703.543191] rhashtable_free_and_destroy+0x23/0x140 [97703.543198] rhashtable_destroy+0xd/0x10 [97703.543254] mlxsw_sp_fib_destroy+0xb1/0xf0 [mlxsw_spectrum] [97703.543310] mlxsw_sp_vr_put+0xa8/0xc0 [mlxsw_spectrum] [97703.543364] mlxsw_sp_fib_node_put+0xbf/0x140 [mlxsw_spectrum] [97703.543418] ? mlxsw_sp_fib6_entry_destroy+0xe8/0x110 [mlxsw_spectrum] [97703.543475] mlxsw_sp_router_fib6_event_work+0x6cd/0x7f0 [mlxsw_spectrum] [97703.543484] process_one_work+0x1fd/0x400 [97703.543493] worker_thread+0x34/0x410 [97703.543500] kthread+0x121/0x140 [97703.543507] ? process_one_work+0x400/0x400 [97703.543512] ? kthread_park+0x90/0x90 [97703.543523] ret_from_fork+0x35/0x40 Fixes: a3832b31898f ("mlxsw: core: Create an ordered workqueue for FIB offload") Signed-off-by: Ido Schimmel Reported-by: Semion Lisyansky Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 91cd6fa42e9a..2c0e3281bd7a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1961,7 +1961,7 @@ static int __init mlxsw_core_module_init(void) mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, WQ_MEM_RECLAIM, 0); if (!mlxsw_wq) return -ENOMEM; - mlxsw_owq = alloc_ordered_workqueue("%s_ordered", WQ_MEM_RECLAIM, + mlxsw_owq = alloc_ordered_workqueue("%s_ordered", 0, mlxsw_core_driver_name); if (!mlxsw_owq) { err = -ENOMEM; -- cgit v1.2.3 From b442fed1b724af0de087912a5718ddde1b87acbb Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 10 Apr 2019 06:58:15 +0000 Subject: mlxsw: core: Do not use WQ_MEM_RECLAIM for mlxsw workqueue The workqueue is used to periodically update the networking stack about activity / statistics of various objects such as neighbours and TC actions. It should not be called as part of memory reclaim path, so remove the WQ_MEM_RECLAIM flag. Fixes: 3d5479e92087 ("mlxsw: core: Remove deprecated create_workqueue") Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 2c0e3281bd7a..f26a4ca29363 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1958,7 +1958,7 @@ static int __init mlxsw_core_module_init(void) { int err; - mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, WQ_MEM_RECLAIM, 0); + mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, 0, 0); if (!mlxsw_wq) return -ENOMEM; mlxsw_owq = alloc_ordered_workqueue("%s_ordered", 0, -- cgit v1.2.3 From 972fae683cbad5cf348268e76abc6d55cfb3ba87 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 10 Apr 2019 06:58:15 +0000 Subject: mlxsw: spectrum_router: Do not check VRF MAC address Commit 74bc99397438 ("mlxsw: spectrum_router: Veto unsupported RIF MAC addresses") enabled the driver to veto router interface (RIF) MAC addresses that it cannot support. This check should only be performed for interfaces for which the driver actually configures a RIF. A VRF upper is not one of them, so ignore it. Without this patch it is not possible to set an IP address on the VRF device and use it as a loopback. Fixes: 74bc99397438 ("mlxsw: spectrum_router: Veto unsupported RIF MAC addresses") Signed-off-by: Ido Schimmel Reported-by: Alexander Petrovskiy Tested-by: Alexander Petrovskiy Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 52fed8c7bf1e..902e766a8ed3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -6781,7 +6781,7 @@ static int mlxsw_sp_router_port_check_rif_addr(struct mlxsw_sp *mlxsw_sp, /* A RIF is not created for macvlan netdevs. Their MAC is used to * populate the FDB */ - if (netif_is_macvlan(dev)) + if (netif_is_macvlan(dev) || netif_is_l3_master(dev)) return 0; for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { -- cgit v1.2.3 From 7052e2436373cc2c46981e165d1cbc5023f20dd7 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 10 Apr 2019 06:58:16 +0000 Subject: selftests: mlxsw: Test VRF MAC vetoing Test that it is possible to set an IP address on a VRF and that it is not vetoed. Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- .../testing/selftests/drivers/net/mlxsw/rtnetlink.sh | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh index c4cf6e6d800e..a6c196c8534c 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh @@ -11,6 +11,7 @@ lib_dir=$(dirname $0)/../../../net/forwarding ALL_TESTS=" rif_set_addr_test + rif_vrf_set_addr_test rif_inherit_bridge_addr_test rif_non_inherit_bridge_addr_test vlan_interface_deletion_test @@ -98,6 +99,25 @@ rif_set_addr_test() ip link set dev $swp1 addr $swp1_mac } +rif_vrf_set_addr_test() +{ + # Test that it is possible to set an IP address on a VRF upper despite + # its random MAC address. + RET=0 + + ip link add name vrf-test type vrf table 10 + ip link set dev $swp1 master vrf-test + + ip -4 address add 192.0.2.1/24 dev vrf-test + check_err $? "failed to set IPv4 address on VRF" + ip -6 address add 2001:db8:1::1/64 dev vrf-test + check_err $? "failed to set IPv6 address on VRF" + + log_test "RIF - setting IP address on VRF" + + ip link del dev vrf-test +} + rif_inherit_bridge_addr_test() { RET=0 -- cgit v1.2.3 From d5949d92c29ce147a9cb9e21fcf8ad7c1ff327b1 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 10 Apr 2019 06:58:17 +0000 Subject: mlxsw: spectrum_buffers: Add a multicast pool for Spectrum-2 In Spectrum-1, when a multicast packet is admitted to the shared buffer it increases the quotas of all the ports and {port, TC} to which it is forwarded to. The above means that multicast packets are accounted multiple times in the shared buffer and can therefore cause the associated shared buffer pool to fill up very quickly. To work around this issue, commit e83c045e53d7 ("mlxsw: spectrum_buffers: Configure MC pool") added a dedicated multicast pool in which multicast packets are accounted. The issue is not present in Spectrum-2, but in order to be backward compatible with Spectrum-1, its default behavior is to allow a multicast packet to increase multiple egress quotas instead of one. Until the new (non-backward compatible) mode is supported, configure a dedicated multicast pool as in Spectrum-1. Fixes: fe099bf682ab ("mlxsw: spectrum_buffers: Add Spectrum-2 shared buffer configuration") Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_buffers.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 9a79b5e11597..d633bef5f105 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -70,6 +70,7 @@ static const struct mlxsw_sp_sb_pool_des mlxsw_sp2_sb_pool_dess[] = { {MLXSW_REG_SBXX_DIR_EGRESS, 1}, {MLXSW_REG_SBXX_DIR_EGRESS, 2}, {MLXSW_REG_SBXX_DIR_EGRESS, 3}, + {MLXSW_REG_SBXX_DIR_EGRESS, 15}, }; #define MLXSW_SP_SB_ING_TC_COUNT 8 @@ -428,6 +429,7 @@ static const struct mlxsw_sp_sb_pr mlxsw_sp2_sb_prs[] = { MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, MLXSW_SP_SB_INFI), }; static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp, @@ -517,14 +519,14 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp2_sb_cms_egress[] = { MLXSW_SP_SB_CM(0, 7, 4), MLXSW_SP_SB_CM(0, 7, 4), MLXSW_SP_SB_CM(0, 7, 4), - MLXSW_SP_SB_CM(0, 7, 4), - MLXSW_SP_SB_CM(0, 7, 4), - MLXSW_SP_SB_CM(0, 7, 4), - MLXSW_SP_SB_CM(0, 7, 4), - MLXSW_SP_SB_CM(0, 7, 4), - MLXSW_SP_SB_CM(0, 7, 4), - MLXSW_SP_SB_CM(0, 7, 4), - MLXSW_SP_SB_CM(0, 7, 4), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), MLXSW_SP_SB_CM(1, 0xff, 4), }; @@ -671,6 +673,7 @@ static const struct mlxsw_sp_sb_pm mlxsw_sp2_sb_pms[] = { MLXSW_SP_SB_PM(0, 0), MLXSW_SP_SB_PM(0, 0), MLXSW_SP_SB_PM(0, 0), + MLXSW_SP_SB_PM(10000, 90000), }; static int mlxsw_sp_port_sb_pms_init(struct mlxsw_sp_port *mlxsw_sp_port) -- cgit v1.2.3 From 1bfb97b9a51901103677a4d1a2386d223c15bc71 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 12 Mar 2019 10:50:05 -0700 Subject: MAINTAINERS: BMIPS: Add internal Broadcom mailing list There is a patchwork instance behind bcm-kernel-feedback-list that is helpful to track submissions, add this list for the MIPS BMIPS entry. Signed-off-by: Florian Fainelli Signed-off-by: Paul Burton Cc: linux-mips@linux-mips.org --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 2359e12e4c41..92df0eb1aa6c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3121,6 +3121,7 @@ F: drivers/cpufreq/bmips-cpufreq.c BROADCOM BMIPS MIPS ARCHITECTURE M: Kevin Cernekee M: Florian Fainelli +L: bcm-kernel-feedback-list@broadcom.com L: linux-mips@vger.kernel.org T: git git://github.com/broadcom/stblinux.git S: Maintained -- cgit v1.2.3 From b66b7bd2bdc1a74c46a0a470f9ac19629320d212 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Wed, 10 Apr 2019 11:06:59 -0500 Subject: ibmvnic: Enable GRO Enable Generic Receive Offload in the ibmvnic driver. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 51cfe95f3e24..cc22c5351513 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3837,7 +3837,7 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter) adapter->ip_offload_ctrl.large_rx_ipv4 = 0; adapter->ip_offload_ctrl.large_rx_ipv6 = 0; - adapter->netdev->features = NETIF_F_SG | NETIF_F_GSO; + adapter->netdev->hw_features = NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO; if (buf->tcp_ipv4_chksum || buf->udp_ipv4_chksum) adapter->netdev->features |= NETIF_F_IP_CSUM; -- cgit v1.2.3 From dde746a35f8b7da4b9515dd3dc4708a9926fbd65 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Wed, 10 Apr 2019 11:07:00 -0500 Subject: ibmvnic: Fix netdev feature clobbering during a reset While determining offload capabilities of backing hardware during a device reset, the driver is clobbering current feature settings. Update hw_features on reset instead of features unless a feature is enabled that is no longer supported on the current backing device. Also enable features that were not supported prior to the reset but were previously enabled or requested by the user. This can occur if the reset is the result of a carrier change, such as a device failover or partition migration. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index cc22c5351513..3dfb2d131eb7 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3762,6 +3762,7 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter) { struct device *dev = &adapter->vdev->dev; struct ibmvnic_query_ip_offload_buffer *buf = &adapter->ip_offload_buf; + netdev_features_t old_hw_features = 0; union ibmvnic_crq crq; int i; @@ -3837,24 +3838,41 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter) adapter->ip_offload_ctrl.large_rx_ipv4 = 0; adapter->ip_offload_ctrl.large_rx_ipv6 = 0; + if (adapter->state != VNIC_PROBING) { + old_hw_features = adapter->netdev->hw_features; + adapter->netdev->hw_features = 0; + } + adapter->netdev->hw_features = NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO; if (buf->tcp_ipv4_chksum || buf->udp_ipv4_chksum) - adapter->netdev->features |= NETIF_F_IP_CSUM; + adapter->netdev->hw_features |= NETIF_F_IP_CSUM; if (buf->tcp_ipv6_chksum || buf->udp_ipv6_chksum) - adapter->netdev->features |= NETIF_F_IPV6_CSUM; + adapter->netdev->hw_features |= NETIF_F_IPV6_CSUM; if ((adapter->netdev->features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))) - adapter->netdev->features |= NETIF_F_RXCSUM; + adapter->netdev->hw_features |= NETIF_F_RXCSUM; if (buf->large_tx_ipv4) - adapter->netdev->features |= NETIF_F_TSO; + adapter->netdev->hw_features |= NETIF_F_TSO; if (buf->large_tx_ipv6) - adapter->netdev->features |= NETIF_F_TSO6; + adapter->netdev->hw_features |= NETIF_F_TSO6; - adapter->netdev->hw_features |= adapter->netdev->features; + if (adapter->state == VNIC_PROBING) { + adapter->netdev->features |= adapter->netdev->hw_features; + } else if (old_hw_features != adapter->netdev->hw_features) { + netdev_features_t tmp = 0; + + /* disable features no longer supported */ + adapter->netdev->features &= adapter->netdev->hw_features; + /* turn on features now supported if previously enabled */ + tmp = (old_hw_features ^ adapter->netdev->hw_features) & + adapter->netdev->hw_features; + adapter->netdev->features |= + tmp & adapter->netdev->wanted_features; + } memset(&crq, 0, sizeof(crq)); crq.control_ip_offload.first = IBMVNIC_CRQ_CMD; -- cgit v1.2.3 From 2a29e9f6b9b499f1fc5f4a48220dc3f4428499f9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Apr 2019 21:34:34 +0200 Subject: sparc64/pci_sun4v: fix ATU checks for large DMA masks Now that we allow drivers to always need to set larger than required DMA masks we need to be a little more careful in the sun4v PCI iommu driver to chose when to select the ATU support - a larger DMA mask can be set even when the platform does not support ATU, so we always have to check if it is avaiable before using it. Add a little helper for that and use it in all the places where we make ATU usage decisions based on the DMA mask. Fixes: 24132a419c68 ("sparc64/pci_sun4v: allow large DMA masks") Reported-by: Meelis Roos Signed-off-by: Christoph Hellwig Tested-by: Meelis Roos Acked-by: David S. Miller --- arch/sparc/kernel/pci_sun4v.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index a8af6023c126..14b93c5564e3 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -73,6 +73,11 @@ static inline void iommu_batch_start(struct device *dev, unsigned long prot, uns p->npages = 0; } +static inline bool iommu_use_atu(struct iommu *iommu, u64 mask) +{ + return iommu->atu && mask > DMA_BIT_MASK(32); +} + /* Interrupts must be disabled. */ static long iommu_batch_flush(struct iommu_batch *p, u64 mask) { @@ -92,7 +97,7 @@ static long iommu_batch_flush(struct iommu_batch *p, u64 mask) prot &= (HV_PCI_MAP_ATTR_READ | HV_PCI_MAP_ATTR_WRITE); while (npages != 0) { - if (mask <= DMA_BIT_MASK(32) || !pbm->iommu->atu) { + if (!iommu_use_atu(pbm->iommu, mask)) { num = pci_sun4v_iommu_map(devhandle, HV_PCI_TSBID(0, entry), npages, @@ -179,7 +184,6 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size, unsigned long flags, order, first_page, npages, n; unsigned long prot = 0; struct iommu *iommu; - struct atu *atu; struct iommu_map_table *tbl; struct page *page; void *ret; @@ -205,13 +209,11 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size, memset((char *)first_page, 0, PAGE_SIZE << order); iommu = dev->archdata.iommu; - atu = iommu->atu; - mask = dev->coherent_dma_mask; - if (mask <= DMA_BIT_MASK(32) || !atu) + if (!iommu_use_atu(iommu, mask)) tbl = &iommu->tbl; else - tbl = &atu->tbl; + tbl = &iommu->atu->tbl; entry = iommu_tbl_range_alloc(dev, tbl, npages, NULL, (unsigned long)(-1), 0); @@ -333,7 +335,7 @@ static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu, atu = iommu->atu; devhandle = pbm->devhandle; - if (dvma <= DMA_BIT_MASK(32)) { + if (!iommu_use_atu(iommu, dvma)) { tbl = &iommu->tbl; iotsb_num = 0; /* we don't care for legacy iommu */ } else { @@ -374,7 +376,7 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page, npages >>= IO_PAGE_SHIFT; mask = *dev->dma_mask; - if (mask <= DMA_BIT_MASK(32)) + if (!iommu_use_atu(iommu, mask)) tbl = &iommu->tbl; else tbl = &atu->tbl; @@ -510,7 +512,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, IO_PAGE_SIZE) >> IO_PAGE_SHIFT; mask = *dev->dma_mask; - if (mask <= DMA_BIT_MASK(32)) + if (!iommu_use_atu(iommu, mask)) tbl = &iommu->tbl; else tbl = &atu->tbl; -- cgit v1.2.3 From 5a03bc73abed6ae196c15e9950afde19d48be12c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 10 Apr 2019 11:04:30 -0700 Subject: net/tls: fix the IV leaks Commit f66de3ee2c16 ("net/tls: Split conf to rx + tx") made freeing of IV and record sequence number conditional to SW path only, but commit e8f69799810c ("net/tls: Add generic NIC offload infrastructure") also allocates that state for the device offload configuration. Remember to free it. Fixes: e8f69799810c ("net/tls: Add generic NIC offload infrastructure") Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/tls/tls_device.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 135a7ee9db03..38b3b2a9835a 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -52,8 +52,11 @@ static DEFINE_SPINLOCK(tls_device_lock); static void tls_device_free_ctx(struct tls_context *ctx) { - if (ctx->tx_conf == TLS_HW) + if (ctx->tx_conf == TLS_HW) { kfree(tls_offload_ctx_tx(ctx)); + kfree(ctx->tx.rec_seq); + kfree(ctx->tx.iv); + } if (ctx->rx_conf == TLS_HW) kfree(tls_offload_ctx_rx(ctx)); -- cgit v1.2.3 From 35b71a34ada62c9573847a324bf06a133fe11b11 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 10 Apr 2019 11:04:31 -0700 Subject: net/tls: don't leak partially sent record in device mode David reports that tls triggers warnings related to sk->sk_forward_alloc not being zero at destruction time: WARNING: CPU: 5 PID: 6831 at net/core/stream.c:206 sk_stream_kill_queues+0x103/0x110 WARNING: CPU: 5 PID: 6831 at net/ipv4/af_inet.c:160 inet_sock_destruct+0x15b/0x170 When sender fills up the write buffer and dies from SIGPIPE. This is due to the device implementation not cleaning up the partially_sent_record. This is because commit a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance") moved the partial record cleanup to the SW-only path. Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance") Reported-by: David Beckett Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/net/tls.h | 2 ++ net/tls/tls_device.c | 7 +++++++ net/tls/tls_main.c | 22 ++++++++++++++++++++++ net/tls/tls_sw.c | 15 +-------------- 4 files changed, 32 insertions(+), 14 deletions(-) diff --git a/include/net/tls.h b/include/net/tls.h index a5a938583295..c7f7dc344e73 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -307,6 +307,7 @@ int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tls_device_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); void tls_device_sk_destruct(struct sock *sk); +void tls_device_free_resources_tx(struct sock *sk); void tls_device_init(void); void tls_device_cleanup(void); int tls_tx_records(struct sock *sk, int flags); @@ -330,6 +331,7 @@ int tls_push_sg(struct sock *sk, struct tls_context *ctx, int flags); int tls_push_partial_record(struct sock *sk, struct tls_context *ctx, int flags); +bool tls_free_partial_record(struct sock *sk, struct tls_context *ctx); static inline struct tls_msg *tls_msg(struct sk_buff *skb) { diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 38b3b2a9835a..9f3bdbc1e593 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -219,6 +219,13 @@ void tls_device_sk_destruct(struct sock *sk) } EXPORT_SYMBOL(tls_device_sk_destruct); +void tls_device_free_resources_tx(struct sock *sk) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + + tls_free_partial_record(sk, tls_ctx); +} + static void tls_append_frag(struct tls_record_info *record, struct page_frag *pfrag, int size) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index df921a2904b9..a3cca1ef0098 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -208,6 +208,26 @@ int tls_push_partial_record(struct sock *sk, struct tls_context *ctx, return tls_push_sg(sk, ctx, sg, offset, flags); } +bool tls_free_partial_record(struct sock *sk, struct tls_context *ctx) +{ + struct scatterlist *sg; + + sg = ctx->partially_sent_record; + if (!sg) + return false; + + while (1) { + put_page(sg_page(sg)); + sk_mem_uncharge(sk, sg->length); + + if (sg_is_last(sg)) + break; + sg++; + } + ctx->partially_sent_record = NULL; + return true; +} + static void tls_write_space(struct sock *sk) { struct tls_context *ctx = tls_get_ctx(sk); @@ -267,6 +287,8 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) kfree(ctx->tx.rec_seq); kfree(ctx->tx.iv); tls_sw_free_resources_tx(sk); + } else if (ctx->tx_conf == TLS_HW) { + tls_device_free_resources_tx(sk); } if (ctx->rx_conf == TLS_SW) { diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 20b191227969..b50ced862f6f 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2052,20 +2052,7 @@ void tls_sw_free_resources_tx(struct sock *sk) /* Free up un-sent records in tx_list. First, free * the partially sent record if any at head of tx_list. */ - if (tls_ctx->partially_sent_record) { - struct scatterlist *sg = tls_ctx->partially_sent_record; - - while (1) { - put_page(sg_page(sg)); - sk_mem_uncharge(sk, sg->length); - - if (sg_is_last(sg)) - break; - sg++; - } - - tls_ctx->partially_sent_record = NULL; - + if (tls_free_partial_record(sk, tls_ctx)) { rec = list_first_entry(&ctx->tx_list, struct tls_rec, list); list_del(&rec->list); -- cgit v1.2.3 From 4a9c2e3746e6151fd5d077259d79ce9ca86d47d7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 10 Apr 2019 11:04:32 -0700 Subject: net: strparser: partially revert "strparser: Call skb_unclone conditionally" This reverts the first part of commit 4e485d06bb8c ("strparser: Call skb_unclone conditionally"). To build a message with multiple fragments we need our own root of frag_list. We can't simply use the frag_list of orig_skb, because it will lead to linking all orig_skbs together creating very long frag chains, and causing stack overflow on kfree_skb() (which is called recursively on the frag_lists). BUG: stack guard page was hit at 00000000d40fad41 (stack is 0000000029dde9f4..000000008cce03d5) kernel stack overflow (double-fault): 0000 [#1] PREEMPT SMP RIP: 0010:free_one_page+0x2b/0x490 Call Trace: __free_pages_ok+0x143/0x2c0 skb_release_data+0x8e/0x140 ? skb_release_data+0xad/0x140 kfree_skb+0x32/0xb0 [...] skb_release_data+0xad/0x140 ? skb_release_data+0xad/0x140 kfree_skb+0x32/0xb0 skb_release_data+0xad/0x140 ? skb_release_data+0xad/0x140 kfree_skb+0x32/0xb0 skb_release_data+0xad/0x140 ? skb_release_data+0xad/0x140 kfree_skb+0x32/0xb0 skb_release_data+0xad/0x140 ? skb_release_data+0xad/0x140 kfree_skb+0x32/0xb0 skb_release_data+0xad/0x140 __kfree_skb+0xe/0x20 tcp_disconnect+0xd6/0x4d0 tcp_close+0xf4/0x430 ? tcp_check_oom+0xf0/0xf0 tls_sk_proto_close+0xe4/0x1e0 [tls] inet_release+0x36/0x60 __sock_release+0x37/0xa0 sock_close+0x11/0x20 __fput+0xa2/0x1d0 task_work_run+0x89/0xb0 exit_to_usermode_loop+0x9a/0xa0 do_syscall_64+0xc0/0xf0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Let's leave the second unclone conditional, as I'm not entirely sure what is its purpose :) Fixes: 4e485d06bb8c ("strparser: Call skb_unclone conditionally") Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/strparser/strparser.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index 860dcfb95ee4..fa6c977b4c41 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -140,13 +140,11 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, /* We are going to append to the frags_list of head. * Need to unshare the frag_list. */ - if (skb_has_frag_list(head)) { - err = skb_unclone(head, GFP_ATOMIC); - if (err) { - STRP_STATS_INCR(strp->stats.mem_fail); - desc->error = err; - return 0; - } + err = skb_unclone(head, GFP_ATOMIC); + if (err) { + STRP_STATS_INCR(strp->stats.mem_fail); + desc->error = err; + return 0; } if (unlikely(skb_shinfo(head)->frag_list)) { -- cgit v1.2.3 From 3943af9d01e94330d0cfac6fccdbc829aad50c92 Mon Sep 17 00:00:00 2001 From: Sergey Miroshnichenko Date: Tue, 12 Mar 2019 15:05:48 +0300 Subject: PCI: pciehp: Ignore Link State Changes after powering off a slot During a safe hot remove, the OS powers off the slot, which may cause a Data Link Layer State Changed event. The slot has already been set to OFF_STATE, so that event results in re-enabling the device, making it impossible to safely remove it. Clear out the Presence Detect Changed and Data Link Layer State Changed events when the disabled slot has settled down. It is still possible to re-enable the device if it remains in the slot after pressing the Attention Button by pressing it again. Fixes the problem that Micah reported below: an NVMe drive power button may not actually turn off the drive. Link: https://bugzilla.kernel.org/show_bug.cgi?id=203237 Reported-by: Micah Parrish Tested-by: Micah Parrish Signed-off-by: Sergey Miroshnichenko [bhelgaas: changelog, add bugzilla URL] Signed-off-by: Bjorn Helgaas Reviewed-by: Lukas Wunner Cc: stable@vger.kernel.org # v4.19+ --- drivers/pci/hotplug/pciehp_ctrl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c index 3f3df4c29f6e..905282a8ddaa 100644 --- a/drivers/pci/hotplug/pciehp_ctrl.c +++ b/drivers/pci/hotplug/pciehp_ctrl.c @@ -115,6 +115,10 @@ static void remove_board(struct controller *ctrl, bool safe_removal) * removed from the slot/adapter. */ msleep(1000); + + /* Ignore link or presence changes caused by power off */ + atomic_and(~(PCI_EXP_SLTSTA_DLLSC | PCI_EXP_SLTSTA_PDC), + &ctrl->pending_events); } /* turn off Green LED */ -- cgit v1.2.3 From a3761c3c91209b58b6f33bf69dd8bb8ec0c9d925 Mon Sep 17 00:00:00 2001 From: Jérôme Glisse Date: Wed, 10 Apr 2019 16:27:51 -0400 Subject: block: do not leak memory in bio_copy_user_iov() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When bio_add_pc_page() fails in bio_copy_user_iov() we should free the page we just allocated otherwise we are leaking it. Cc: linux-block@vger.kernel.org Cc: Linus Torvalds Cc: stable@vger.kernel.org Reviewed-by: Chaitanya Kulkarni Signed-off-by: Jérôme Glisse Signed-off-by: Jens Axboe --- block/bio.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/block/bio.c b/block/bio.c index b64cedc7f87c..716510ecd7ff 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1298,8 +1298,11 @@ struct bio *bio_copy_user_iov(struct request_queue *q, } } - if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) + if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) { + if (!map_data) + __free_page(page); break; + } len -= bytes; offset = 0; -- cgit v1.2.3 From 7c2e07130090ae001a97a6b65597830d6815e93e Mon Sep 17 00:00:00 2001 From: David Müller Date: Mon, 8 Apr 2019 15:33:54 +0200 Subject: clk: x86: Add system specific quirk to mark clocks as critical MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 648e921888ad ("clk: x86: Stop marking clocks as CLK_IS_CRITICAL"), the pmc_plt_clocks of the Bay Trail SoC are unconditionally gated off. Unfortunately this will break systems where these clocks are used for external purposes beyond the kernel's knowledge. Fix it by implementing a system specific quirk to mark the necessary pmc_plt_clks as critical. Fixes: 648e921888ad ("clk: x86: Stop marking clocks as CLK_IS_CRITICAL") Signed-off-by: David Müller Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Signed-off-by: Stephen Boyd --- drivers/clk/x86/clk-pmc-atom.c | 14 +++++++++++--- drivers/platform/x86/pmc_atom.c | 21 +++++++++++++++++++++ include/linux/platform_data/x86/clk-pmc-atom.h | 3 +++ 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/drivers/clk/x86/clk-pmc-atom.c b/drivers/clk/x86/clk-pmc-atom.c index d977193842df..19174835693b 100644 --- a/drivers/clk/x86/clk-pmc-atom.c +++ b/drivers/clk/x86/clk-pmc-atom.c @@ -165,7 +165,7 @@ static const struct clk_ops plt_clk_ops = { }; static struct clk_plt *plt_clk_register(struct platform_device *pdev, int id, - void __iomem *base, + const struct pmc_clk_data *pmc_data, const char **parent_names, int num_parents) { @@ -184,9 +184,17 @@ static struct clk_plt *plt_clk_register(struct platform_device *pdev, int id, init.num_parents = num_parents; pclk->hw.init = &init; - pclk->reg = base + PMC_CLK_CTL_OFFSET + id * PMC_CLK_CTL_SIZE; + pclk->reg = pmc_data->base + PMC_CLK_CTL_OFFSET + id * PMC_CLK_CTL_SIZE; spin_lock_init(&pclk->lock); + /* + * On some systems, the pmc_plt_clocks already enabled by the + * firmware are being marked as critical to avoid them being + * gated by the clock framework. + */ + if (pmc_data->critical && plt_clk_is_enabled(&pclk->hw)) + init.flags |= CLK_IS_CRITICAL; + ret = devm_clk_hw_register(&pdev->dev, &pclk->hw); if (ret) { pclk = ERR_PTR(ret); @@ -332,7 +340,7 @@ static int plt_clk_probe(struct platform_device *pdev) return PTR_ERR(parent_names); for (i = 0; i < PMC_CLK_NUM; i++) { - data->clks[i] = plt_clk_register(pdev, i, pmc_data->base, + data->clks[i] = plt_clk_register(pdev, i, pmc_data, parent_names, data->nparents); if (IS_ERR(data->clks[i])) { err = PTR_ERR(data->clks[i]); diff --git a/drivers/platform/x86/pmc_atom.c b/drivers/platform/x86/pmc_atom.c index 8f018b3f3cd4..eaec2d306481 100644 --- a/drivers/platform/x86/pmc_atom.c +++ b/drivers/platform/x86/pmc_atom.c @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -391,11 +392,27 @@ static int pmc_dbgfs_register(struct pmc_dev *pmc) } #endif /* CONFIG_DEBUG_FS */ +/* + * Some systems need one or more of their pmc_plt_clks to be + * marked as critical. + */ +static const struct dmi_system_id critclk_systems[] __initconst = { + { + .ident = "MPL CEC1x", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "MPL AG"), + DMI_MATCH(DMI_PRODUCT_NAME, "CEC10 Family"), + }, + }, + { /*sentinel*/ } +}; + static int pmc_setup_clks(struct pci_dev *pdev, void __iomem *pmc_regmap, const struct pmc_data *pmc_data) { struct platform_device *clkdev; struct pmc_clk_data *clk_data; + const struct dmi_system_id *d = dmi_first_match(critclk_systems); clk_data = kzalloc(sizeof(*clk_data), GFP_KERNEL); if (!clk_data) @@ -403,6 +420,10 @@ static int pmc_setup_clks(struct pci_dev *pdev, void __iomem *pmc_regmap, clk_data->base = pmc_regmap; /* offset is added by client */ clk_data->clks = pmc_data->clks; + if (d) { + clk_data->critical = true; + pr_info("%s critclks quirk enabled\n", d->ident); + } clkdev = platform_device_register_data(&pdev->dev, "clk-pmc-atom", PLATFORM_DEVID_NONE, diff --git a/include/linux/platform_data/x86/clk-pmc-atom.h b/include/linux/platform_data/x86/clk-pmc-atom.h index 3ab892208343..7a37ac27d0fb 100644 --- a/include/linux/platform_data/x86/clk-pmc-atom.h +++ b/include/linux/platform_data/x86/clk-pmc-atom.h @@ -35,10 +35,13 @@ struct pmc_clk { * * @base: PMC clock register base offset * @clks: pointer to set of registered clocks, typically 0..5 + * @critical: flag to indicate if firmware enabled pmc_plt_clks + * should be marked as critial or not */ struct pmc_clk_data { void __iomem *base; const struct pmc_clk *clks; + bool critical; }; #endif /* __PLATFORM_DATA_X86_CLK_PMC_ATOM_H */ -- cgit v1.2.3 From 903f1a187776bb8d79b13618ec05b25f86318885 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 10 Apr 2019 16:23:39 -0700 Subject: net/tls: fix build without CONFIG_TLS_DEVICE buildbot noticed that TLS_HW is not defined if CONFIG_TLS_DEVICE=n. Wrap the cleanup branch into an ifdef, tls_device_free_resources_tx() wouldn't be compiled either in this case. Fixes: 35b71a34ada6 ("net/tls: don't leak partially sent record in device mode") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/tls/tls_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index a3cca1ef0098..9547cea0ce3b 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -287,8 +287,10 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) kfree(ctx->tx.rec_seq); kfree(ctx->tx.iv); tls_sw_free_resources_tx(sk); +#ifdef CONFIG_TLS_DEVICE } else if (ctx->tx_conf == TLS_HW) { tls_device_free_resources_tx(sk); +#endif } if (ctx->rx_conf == TLS_SW) { -- cgit v1.2.3 From 43c2adb9df7ddd6560fd3546d925b42cef92daa0 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 8 Apr 2019 16:45:17 +0800 Subject: team: set slave to promisc if team is already in promisc mode After adding a team interface to bridge, the team interface will enter promisc mode. Then if we add a new slave to team0, the slave will keep promisc off. Fix it by setting slave to promisc on if team master is already in promisc mode, also do the same for allmulti. v2: add promisc and allmulti checking when delete ports Fixes: 3d249d4ca7d0 ("net: introduce ethernet teaming device") Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- drivers/net/team/team.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 6ed96fdfd96d..9ce61b019aad 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1246,6 +1246,23 @@ static int team_port_add(struct team *team, struct net_device *port_dev, goto err_option_port_add; } + /* set promiscuity level to new slave */ + if (dev->flags & IFF_PROMISC) { + err = dev_set_promiscuity(port_dev, 1); + if (err) + goto err_set_slave_promisc; + } + + /* set allmulti level to new slave */ + if (dev->flags & IFF_ALLMULTI) { + err = dev_set_allmulti(port_dev, 1); + if (err) { + if (dev->flags & IFF_PROMISC) + dev_set_promiscuity(port_dev, -1); + goto err_set_slave_promisc; + } + } + netif_addr_lock_bh(dev); dev_uc_sync_multiple(port_dev, dev); dev_mc_sync_multiple(port_dev, dev); @@ -1262,6 +1279,9 @@ static int team_port_add(struct team *team, struct net_device *port_dev, return 0; +err_set_slave_promisc: + __team_option_inst_del_port(team, port); + err_option_port_add: team_upper_dev_unlink(team, port); @@ -1307,6 +1327,12 @@ static int team_port_del(struct team *team, struct net_device *port_dev) team_port_disable(team, port); list_del_rcu(&port->list); + + if (dev->flags & IFF_PROMISC) + dev_set_promiscuity(port_dev, -1); + if (dev->flags & IFF_ALLMULTI) + dev_set_allmulti(port_dev, -1); + team_upper_dev_unlink(team, port); netdev_rx_handler_unregister(port_dev); team_port_disable_netpoll(port); -- cgit v1.2.3 From cd7879f79f83aec4bb13f0f823f323911dc5397b Mon Sep 17 00:00:00 2001 From: Xiong Zhang Date: Wed, 10 Apr 2019 12:16:33 +0800 Subject: drm/i915/gvt: Roundup fb->height into tile's height at calucation fb->size When fb is tiled and fb->height isn't the multiple of tile's height, the format fb->size = fb->stride * fb->height, will get a smaller size than the actual size. As the memory height of tiled fb should be multiple of tile's height. Fixes: 7f1a93b1f1d1 ("drm/i915/gvt: Correct the calculation of plane size") Reviewed-by: Zhenyu Wang Signed-off-by: Xiong Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/dmabuf.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c index 5d887f7cc0d5..69a9a1b2ea4a 100644 --- a/drivers/gpu/drm/i915/gvt/dmabuf.c +++ b/drivers/gpu/drm/i915/gvt/dmabuf.c @@ -209,7 +209,7 @@ static int vgpu_get_plane_info(struct drm_device *dev, struct drm_i915_private *dev_priv = to_i915(dev); struct intel_vgpu_primary_plane_format p; struct intel_vgpu_cursor_plane_format c; - int ret; + int ret, tile_height = 1; if (plane_id == DRM_PLANE_TYPE_PRIMARY) { ret = intel_vgpu_decode_primary_plane(vgpu, &p); @@ -228,12 +228,15 @@ static int vgpu_get_plane_info(struct drm_device *dev, break; case PLANE_CTL_TILED_X: info->drm_format_mod = I915_FORMAT_MOD_X_TILED; + tile_height = 8; break; case PLANE_CTL_TILED_Y: info->drm_format_mod = I915_FORMAT_MOD_Y_TILED; + tile_height = 32; break; case PLANE_CTL_TILED_YF: info->drm_format_mod = I915_FORMAT_MOD_Yf_TILED; + tile_height = 32; break; default: gvt_vgpu_err("invalid tiling mode: %x\n", p.tiled); @@ -264,8 +267,8 @@ static int vgpu_get_plane_info(struct drm_device *dev, return -EINVAL; } - info->size = (info->stride * info->height + PAGE_SIZE - 1) - >> PAGE_SHIFT; + info->size = (info->stride * roundup(info->height, tile_height) + + PAGE_SIZE - 1) >> PAGE_SHIFT; if (info->size == 0) { gvt_vgpu_err("fb size is zero\n"); return -EINVAL; -- cgit v1.2.3 From 8065a779f17e94536a1c4dcee4f9d88011672f97 Mon Sep 17 00:00:00 2001 From: Si-Wei Liu Date: Mon, 8 Apr 2019 19:45:27 -0400 Subject: failover: allow name change on IFF_UP slave interfaces When a netdev appears through hot plug then gets enslaved by a failover master that is already up and running, the slave will be opened right away after getting enslaved. Today there's a race that userspace (udev) may fail to rename the slave if the kernel (net_failover) opens the slave earlier than when the userspace rename happens. Unlike bond or team, the primary slave of failover can't be renamed by userspace ahead of time, since the kernel initiated auto-enslavement is unable to, or rather, is never meant to be synchronized with the rename request from userspace. As the failover slave interfaces are not designed to be operated directly by userspace apps: IP configuration, filter rules with regard to network traffic passing and etc., should all be done on master interface. In general, userspace apps only care about the name of master interface, while slave names are less important as long as admin users can see reliable names that may carry other information describing the netdev. For e.g., they can infer that "ens3nsby" is a standby slave of "ens3", while for a name like "eth0" they can't tell which master it belongs to. Historically the name of IFF_UP interface can't be changed because there might be admin script or management software that is already relying on such behavior and assumes that the slave name can't be changed once UP. But failover is special: with the in-kernel auto-enslavement mechanism, the userspace expectation for device enumeration and bring-up order is already broken. Previously initramfs and various userspace config tools were modified to bypass failover slaves because of auto-enslavement and duplicate MAC address. Similarly, in case that users care about seeing reliable slave name, the new type of failover slaves needs to be taken care of specifically in userspace anyway. It's less risky to lift up the rename restriction on failover slave which is already UP. Although it's possible this change may potentially break userspace component (most likely configuration scripts or management software) that assumes slave name can't be changed while UP, it's relatively a limited and controllable set among all userspace components, which can be fixed specifically to listen for the rename events on failover slaves. Userspace component interacting with slaves is expected to be changed to operate on failover master interface instead, as the failover slave is dynamic in nature which may come and go at any point. The goal is to make the role of failover slaves less relevant, and userspace components should only deal with failover master in the long run. Fixes: 30c8bd5aa8b2 ("net: Introduce generic failover module") Signed-off-by: Si-Wei Liu Reviewed-by: Liran Alon Acked-by: Sridhar Samudrala Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ net/core/dev.c | 16 +++++++++++++++- net/core/failover.c | 6 +++--- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 26f69cf763f4..324e872c91d1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1500,6 +1500,7 @@ struct net_device_ops { * @IFF_FAILOVER: device is a failover master device * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device * @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device + * @IFF_LIVE_RENAME_OK: rename is allowed while device is up and running */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1532,6 +1533,7 @@ enum netdev_priv_flags { IFF_FAILOVER = 1<<27, IFF_FAILOVER_SLAVE = 1<<28, IFF_L3MDEV_RX_HANDLER = 1<<29, + IFF_LIVE_RENAME_OK = 1<<30, }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -1563,6 +1565,7 @@ enum netdev_priv_flags { #define IFF_FAILOVER IFF_FAILOVER #define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE #define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER +#define IFF_LIVE_RENAME_OK IFF_LIVE_RENAME_OK /** * struct net_device - The DEVICE structure. diff --git a/net/core/dev.c b/net/core/dev.c index fdcff29df915..f409406254dd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1184,7 +1184,21 @@ int dev_change_name(struct net_device *dev, const char *newname) BUG_ON(!dev_net(dev)); net = dev_net(dev); - if (dev->flags & IFF_UP) + + /* Some auto-enslaved devices e.g. failover slaves are + * special, as userspace might rename the device after + * the interface had been brought up and running since + * the point kernel initiated auto-enslavement. Allow + * live name change even when these slave devices are + * up and running. + * + * Typically, users of these auto-enslaving devices + * don't actually care about slave name change, as + * they are supposed to operate on master interface + * directly. + */ + if (dev->flags & IFF_UP && + likely(!(dev->priv_flags & IFF_LIVE_RENAME_OK))) return -EBUSY; write_seqcount_begin(&devnet_rename_seq); diff --git a/net/core/failover.c b/net/core/failover.c index 4a92a98ccce9..b5cd3c727285 100644 --- a/net/core/failover.c +++ b/net/core/failover.c @@ -80,14 +80,14 @@ static int failover_slave_register(struct net_device *slave_dev) goto err_upper_link; } - slave_dev->priv_flags |= IFF_FAILOVER_SLAVE; + slave_dev->priv_flags |= (IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK); if (fops && fops->slave_register && !fops->slave_register(slave_dev, failover_dev)) return NOTIFY_OK; netdev_upper_dev_unlink(slave_dev, failover_dev); - slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE; + slave_dev->priv_flags &= ~(IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK); err_upper_link: netdev_rx_handler_unregister(slave_dev); done: @@ -121,7 +121,7 @@ int failover_slave_unregister(struct net_device *slave_dev) netdev_rx_handler_unregister(slave_dev); netdev_upper_dev_unlink(slave_dev, failover_dev); - slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE; + slave_dev->priv_flags &= ~(IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK); if (fops && fops->slave_unregister && !fops->slave_unregister(slave_dev, failover_dev)) -- cgit v1.2.3 From b4f47f3848eb70986f75d06112af7b48b7f5f462 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 8 Apr 2019 17:59:50 -0700 Subject: net/tls: prevent bad memory access in tls_is_sk_tx_device_offloaded() Unlike '&&' operator, the '&' does not have short-circuit evaluation semantics. IOW both sides of the operator always get evaluated. Fix the wrong operator in tls_is_sk_tx_device_offloaded(), which would lead to out-of-bounds access for for non-full sockets. Fixes: 4799ac81e52a ("tls: Add rx inline crypto offload") Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/net/tls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/tls.h b/include/net/tls.h index c7f7dc344e73..5934246b2c6f 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -381,7 +381,7 @@ tls_validate_xmit_skb(struct sock *sk, struct net_device *dev, static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk) { #ifdef CONFIG_SOCK_VALIDATE_XMIT - return sk_fullsock(sk) & + return sk_fullsock(sk) && (smp_load_acquire(&sk->sk_validate_xmit_skb) == &tls_validate_xmit_skb); #else -- cgit v1.2.3 From 813dbeb656d6c90266f251d8bd2b02d445afa63f Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 9 Apr 2019 12:10:25 +0800 Subject: vhost: reject zero size iova range We used to accept zero size iova range which will lead a infinite loop in translate_desc(). Fixing this by failing the request in this case. Reported-by: syzbot+d21e6e297322a900c128@syzkaller.appspotmail.com Fixes: 6b1e6cc7 ("vhost: new device IOTLB API") Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/vhost/vhost.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 5ace833de746..351af88231ad 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -911,8 +911,12 @@ static int vhost_new_umem_range(struct vhost_umem *umem, u64 start, u64 size, u64 end, u64 userspace_addr, int perm) { - struct vhost_umem_node *tmp, *node = kmalloc(sizeof(*node), GFP_ATOMIC); + struct vhost_umem_node *tmp, *node; + if (!size) + return -EFAULT; + + node = kmalloc(sizeof(*node), GFP_ATOMIC); if (!node) return -ENOMEM; -- cgit v1.2.3 From d1841533e54876f152a30ac398a34f47ad6590b1 Mon Sep 17 00:00:00 2001 From: Hoang Le Date: Tue, 9 Apr 2019 14:59:24 +0700 Subject: tipc: missing entries in name table of publications When binding multiple services with specific type 1Ki, 2Ki.., this leads to some entries in the name table of publications missing when listed out via 'tipc name show'. The problem is at identify zero last_type conditional provided via netlink. The first is initial 'type' when starting name table dummping. The second is continuously with zero type (node state service type). Then, lookup function failure to finding node state service type in next iteration. To solve this, adding more conditional to marked as dirty type and lookup correct service type for the next iteration instead of select the first service as initial 'type' zero. Acked-by: Jon Maloy Signed-off-by: Hoang Le Signed-off-by: David S. Miller --- net/tipc/name_table.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index bff241f03525..89993afe0fbd 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -909,7 +909,8 @@ static int tipc_nl_service_list(struct net *net, struct tipc_nl_msg *msg, for (; i < TIPC_NAMETBL_SIZE; i++) { head = &tn->nametbl->services[i]; - if (*last_type) { + if (*last_type || + (!i && *last_key && (*last_lower == *last_key))) { service = tipc_service_find(net, *last_type); if (!service) return -EPIPE; -- cgit v1.2.3 From 988dc4a9a3b66be75b30405a5494faf0dc7cffb6 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 9 Apr 2019 11:47:20 +0200 Subject: net: fou: do not use guehdr after iptunnel_pull_offloads in gue_udp_recv gue tunnels run iptunnel_pull_offloads on received skbs. This can determine a possible use-after-free accessing guehdr pointer since the packet will be 'uncloned' running pskb_expand_head if it is a cloned gso skb (e.g if the packet has been sent though a veth device) Fixes: a09a4c8dd1ec ("tunnels: Remove encapsulation offloads on decap") Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller --- net/ipv4/fou.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 79e98e21cdd7..12ce6c526d72 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -121,6 +121,7 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb) struct guehdr *guehdr; void *data; u16 doffset = 0; + u8 proto_ctype; if (!fou) return 1; @@ -212,13 +213,14 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb) if (unlikely(guehdr->control)) return gue_control_message(skb, guehdr); + proto_ctype = guehdr->proto_ctype; __skb_pull(skb, sizeof(struct udphdr) + hdrlen); skb_reset_transport_header(skb); if (iptunnel_pull_offloads(skb)) goto drop; - return -guehdr->proto_ctype; + return -proto_ctype; drop: kfree_skb(skb); -- cgit v1.2.3 From e154592a1d25fa1f50ac1bd8d132d0e1103442ba Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Thu, 11 Apr 2019 00:47:46 +0200 Subject: gpu: host1x: Fix compile error when IOMMU API is not available MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In case the IOMMU API is not available compiling host1x fails with the following error: In file included from drivers/gpu/host1x/hw/host1x06.c:27: drivers/gpu/host1x/hw/channel_hw.c: In function ‘host1x_channel_set_streamid’: drivers/gpu/host1x/hw/channel_hw.c:118:30: error: implicit declaration of function ‘dev_iommu_fwspec_get’; did you mean ‘iommu_fwspec_free’? [-Werror=implicit-function-declaration] struct iommu_fwspec *spec = dev_iommu_fwspec_get(channel->dev->parent); ^~~~~~~~~~~~~~~~~~~~ iommu_fwspec_free Fixes: de5469c21ff9 ("gpu: host1x: Program the channel stream ID") Signed-off-by: Stefan Agner Signed-off-by: Thierry Reding --- drivers/gpu/host1x/hw/channel_hw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c index 27101c04a827..4030d64916f0 100644 --- a/drivers/gpu/host1x/hw/channel_hw.c +++ b/drivers/gpu/host1x/hw/channel_hw.c @@ -114,7 +114,7 @@ static inline void synchronize_syncpt_base(struct host1x_job *job) static void host1x_channel_set_streamid(struct host1x_channel *channel) { -#if HOST1X_HW >= 6 +#if IS_ENABLED(CONFIG_IOMMU_API) && HOST1X_HW >= 6 struct iommu_fwspec *spec = dev_iommu_fwspec_get(channel->dev->parent); u32 sid = spec ? spec->ids[0] & 0xffff : 0x7f; -- cgit v1.2.3 From 5c41ea6d52003b5bc77c2a82fd5ca7d480237d89 Mon Sep 17 00:00:00 2001 From: Faiz Abbas Date: Thu, 11 Apr 2019 14:29:37 +0530 Subject: mmc: sdhci-omap: Don't finish_mrq() on a command error during tuning commit 5b0d62108b46 ("mmc: sdhci-omap: Add platform specific reset callback") skips data resets during tuning operation. Because of this, a data error or data finish interrupt might still arrive after a command error has been handled and the mrq ended. This ends up with a "mmc0: Got data interrupt 0x00000002 even though no data operation was in progress" error message. Fix this by adding a platform specific callback for sdhci_irq. Mark the mrq as a failure but wait for a data interrupt instead of calling finish_mrq(). Fixes: 5b0d62108b46 ("mmc: sdhci-omap: Add platform specific reset callback") Signed-off-by: Faiz Abbas Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-omap.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c index 5bbed477c9b1..9f20fff9781b 100644 --- a/drivers/mmc/host/sdhci-omap.c +++ b/drivers/mmc/host/sdhci-omap.c @@ -797,6 +797,43 @@ void sdhci_omap_reset(struct sdhci_host *host, u8 mask) sdhci_reset(host, mask); } +#define CMD_ERR_MASK (SDHCI_INT_CRC | SDHCI_INT_END_BIT | SDHCI_INT_INDEX |\ + SDHCI_INT_TIMEOUT) +#define CMD_MASK (CMD_ERR_MASK | SDHCI_INT_RESPONSE) + +static u32 sdhci_omap_irq(struct sdhci_host *host, u32 intmask) +{ + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct sdhci_omap_host *omap_host = sdhci_pltfm_priv(pltfm_host); + + if (omap_host->is_tuning && host->cmd && !host->data_early && + (intmask & CMD_ERR_MASK)) { + + /* + * Since we are not resetting data lines during tuning + * operation, data error or data complete interrupts + * might still arrive. Mark this request as a failure + * but still wait for the data interrupt + */ + if (intmask & SDHCI_INT_TIMEOUT) + host->cmd->error = -ETIMEDOUT; + else + host->cmd->error = -EILSEQ; + + host->cmd = NULL; + + /* + * Sometimes command error interrupts and command complete + * interrupt will arrive together. Clear all command related + * interrupts here. + */ + sdhci_writel(host, intmask & CMD_MASK, SDHCI_INT_STATUS); + intmask &= ~CMD_MASK; + } + + return intmask; +} + static struct sdhci_ops sdhci_omap_ops = { .set_clock = sdhci_omap_set_clock, .set_power = sdhci_omap_set_power, @@ -807,6 +844,7 @@ static struct sdhci_ops sdhci_omap_ops = { .platform_send_init_74_clocks = sdhci_omap_init_74_clocks, .reset = sdhci_omap_reset, .set_uhs_signaling = sdhci_omap_set_uhs_signaling, + .irq = sdhci_omap_irq, }; static int sdhci_omap_set_capabilities(struct sdhci_omap_host *omap_host) -- cgit v1.2.3 From 102bbe34b31c9159e714432afd64458f6f3876d7 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Wed, 10 Apr 2019 15:47:54 +0800 Subject: gpio: eic: sprd: Fix incorrect irq type setting for the sync EIC When setting sync EIC as IRQ_TYPE_EDGE_BOTH type, we missed to set the SPRD_EIC_SYNC_INTMODE register to 0, which means detecting edge signals. Thus this patch fixes the issue. Fixes: 25518e024e3a ("gpio: Add Spreadtrum EIC driver support") Cc: Signed-off-by: Baolin Wang Signed-off-by: Linus Walleij --- drivers/gpio/gpio-eic-sprd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-eic-sprd.c b/drivers/gpio/gpio-eic-sprd.c index f0223cee9774..77092268ee95 100644 --- a/drivers/gpio/gpio-eic-sprd.c +++ b/drivers/gpio/gpio-eic-sprd.c @@ -414,6 +414,7 @@ static int sprd_eic_irq_set_type(struct irq_data *data, unsigned int flow_type) irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_EDGE_BOTH: + sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 1); irq_set_handler_locked(data, handle_edge_irq); break; -- cgit v1.2.3 From a66477b0efe511d98dde3e4aaeb189790e6f0a39 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 2 Apr 2019 09:26:52 +0200 Subject: drm/ttm: fix out-of-bounds read in ttm_put_pages() v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When ttm_put_pages() tries to figure out whether it's dealing with transparent hugepages, it just reads past the bounds of the pages array without a check. v2: simplify the test if enough pages are left in the array (Christian). Signed-off-by: Jann Horn Signed-off-by: Christian König Fixes: 5c42c64f7d54 ("drm/ttm: fix the fix for huge compound pages") Cc: stable@vger.kernel.org Reviewed-by: Michel Dänzer Reviewed-by: Junwei Zhang Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/ttm/ttm_page_alloc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index f841accc2c00..f77c81db161b 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c @@ -730,7 +730,8 @@ static void ttm_put_pages(struct page **pages, unsigned npages, int flags, } #ifdef CONFIG_TRANSPARENT_HUGEPAGE - if (!(flags & TTM_PAGE_FLAG_DMA32)) { + if (!(flags & TTM_PAGE_FLAG_DMA32) && + (npages - i) >= HPAGE_PMD_NR) { for (j = 0; j < HPAGE_PMD_NR; ++j) if (p++ != pages[i + j]) break; @@ -759,7 +760,7 @@ static void ttm_put_pages(struct page **pages, unsigned npages, int flags, unsigned max_size, n2free; spin_lock_irqsave(&huge->lock, irq_flags); - while (i < npages) { + while ((npages - i) >= HPAGE_PMD_NR) { struct page *p = pages[i]; unsigned j; -- cgit v1.2.3 From ac1e516d5a4c56bf0cb4a3dfc0672f689131cfd4 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 2 Apr 2019 09:29:35 +0200 Subject: drm/ttm: fix start page for huge page check in ttm_put_pages() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The first page entry is always the same with itself. Signed-off-by: Christian König Reviewed-by: Michel Dänzer Reviewed-by: Junwei Zhang Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/ttm/ttm_page_alloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index f77c81db161b..c74147f0cbe3 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c @@ -732,7 +732,7 @@ static void ttm_put_pages(struct page **pages, unsigned npages, int flags, #ifdef CONFIG_TRANSPARENT_HUGEPAGE if (!(flags & TTM_PAGE_FLAG_DMA32) && (npages - i) >= HPAGE_PMD_NR) { - for (j = 0; j < HPAGE_PMD_NR; ++j) + for (j = 1; j < HPAGE_PMD_NR; ++j) if (p++ != pages[i + j]) break; @@ -767,7 +767,7 @@ static void ttm_put_pages(struct page **pages, unsigned npages, int flags, if (!p) break; - for (j = 0; j < HPAGE_PMD_NR; ++j) + for (j = 1; j < HPAGE_PMD_NR; ++j) if (p++ != pages[i + j]) break; -- cgit v1.2.3 From 453393369dc9806d2455151e329c599684762428 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 10 Apr 2019 11:43:43 +0200 Subject: drm/ttm: fix incrementing the page pointer for huge pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we increment the counter we need to increment the pointer as well. Signed-off-by: Christian König Fixes: e16858a7e6e7 drm/ttm: fix start page for huge page check in ttm_put_pages() Reviewed-by: Michel Dänzer Acked-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/ttm/ttm_page_alloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index c74147f0cbe3..627f8dc91d0e 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c @@ -733,7 +733,7 @@ static void ttm_put_pages(struct page **pages, unsigned npages, int flags, if (!(flags & TTM_PAGE_FLAG_DMA32) && (npages - i) >= HPAGE_PMD_NR) { for (j = 1; j < HPAGE_PMD_NR; ++j) - if (p++ != pages[i + j]) + if (++p != pages[i + j]) break; if (j == HPAGE_PMD_NR) @@ -768,7 +768,7 @@ static void ttm_put_pages(struct page **pages, unsigned npages, int flags, break; for (j = 1; j < HPAGE_PMD_NR; ++j) - if (p++ != pages[i + j]) + if (++p != pages[i + j]) break; if (j != HPAGE_PMD_NR) -- cgit v1.2.3 From 543c364d8eeeb42c0edfaac9764f4e9f3d777ec1 Mon Sep 17 00:00:00 2001 From: Lin Yi Date: Wed, 10 Apr 2019 10:23:34 +0800 Subject: drm/ttm: fix dma_fence refcount imbalance on error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit the ttm_bo_add_move_fence takes a reference to the struct dma_fence, but failed to release it on the error path, leading to a memory leak. add dma_fence_put before return when error occur. Signed-off-by: Lin Yi Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/ttm/ttm_bo.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 3f56647cdb35..0fa5034b9f9e 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -876,8 +876,10 @@ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo, reservation_object_add_shared_fence(bo->resv, fence); ret = reservation_object_reserve_shared(bo->resv, 1); - if (unlikely(ret)) + if (unlikely(ret)) { + dma_fence_put(fence); return ret; + } dma_fence_put(bo->moving); bo->moving = fence; -- cgit v1.2.3 From f4bbebf8e7eb4d294b040ab2d2ba71e70e69b930 Mon Sep 17 00:00:00 2001 From: Martin Leung Date: Tue, 26 Mar 2019 13:14:11 -0400 Subject: drm/amd/display: extending AUX SW Timeout [Why] AUX takes longer to reply when using active DP-DVI dongle on some asics resulting in up to 2000+ us edid read (timeout). [How] 1. Adjust AUX poll to match spec 2. Extend the SW timeout. This does not affect normal operation since we exit the loop as soon as AUX acks. Signed-off-by: Martin Leung Reviewed-by: Jun Lei Acked-by: Joshua Aberback Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_aux.c | 9 ++++++--- drivers/gpu/drm/amd/display/dc/dce/dce_aux.h | 6 +++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c index 4febf4ef7240..4fe3664fb495 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c @@ -190,6 +190,12 @@ static void submit_channel_request( 1, 0); } + + REG_UPDATE(AUX_INTERRUPT_CONTROL, AUX_SW_DONE_ACK, 1); + + REG_WAIT(AUX_SW_STATUS, AUX_SW_DONE, 0, + 10, aux110->timeout_period/10); + /* set the delay and the number of bytes to write */ /* The length include @@ -242,9 +248,6 @@ static void submit_channel_request( } } - REG_UPDATE(AUX_INTERRUPT_CONTROL, AUX_SW_DONE_ACK, 1); - REG_WAIT(AUX_SW_STATUS, AUX_SW_DONE, 0, - 10, aux110->timeout_period/10); REG_UPDATE(AUX_SW_CONTROL, AUX_SW_GO, 1); } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h index d27f22c05e4b..e28ed6a00ff4 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h @@ -71,11 +71,11 @@ enum { /* This is the timeout as defined in DP 1.2a, * at most within ~240usec. That means, * increasing this timeout will not affect normal operation, * and we'll timeout after - * SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD = 1600usec. + * SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD = 2400usec. * This timeout is especially important for - * resume from S3 and CTS. + * converters, resume from S3, and CTS. */ - SW_AUX_TIMEOUT_PERIOD_MULTIPLIER = 4 + SW_AUX_TIMEOUT_PERIOD_MULTIPLIER = 6 }; struct dce_aux { -- cgit v1.2.3 From 67f471b6ed3b09033c4ac77ea03f92afdb1989fe Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 8 Apr 2019 11:15:19 -0700 Subject: nvme-fc: correct csn initialization and increments on error This patch fixes a long-standing bug that initialized the FC-NVME cmnd iu CSN value to 1. Early FC-NVME specs had the connection starting with CSN=1. By the time the spec reached approval, the language had changed to state a connection should start with CSN=0. This patch corrects the initialization value for FC-NVME connections. Additionally, in reviewing the transport, the CSN value is assigned to the new IU early in the start routine. It's possible that a later dma map request may fail, causing the command to never be sent to the controller. Change the location of the assignment so that it is immediately prior to calling the lldd. Add a comment block to explain the impacts if the lldd were to additionally fail sending the command. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Ewan D. Milne Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index f3b9d91ba0df..6d8451356eac 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1845,7 +1845,7 @@ nvme_fc_init_queue(struct nvme_fc_ctrl *ctrl, int idx) memset(queue, 0, sizeof(*queue)); queue->ctrl = ctrl; queue->qnum = idx; - atomic_set(&queue->csn, 1); + atomic_set(&queue->csn, 0); queue->dev = ctrl->dev; if (idx > 0) @@ -1887,7 +1887,7 @@ nvme_fc_free_queue(struct nvme_fc_queue *queue) */ queue->connection_id = 0; - atomic_set(&queue->csn, 1); + atomic_set(&queue->csn, 0); } static void @@ -2183,7 +2183,6 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, { struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; struct nvme_command *sqe = &cmdiu->sqe; - u32 csn; int ret, opstate; /* @@ -2198,8 +2197,6 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, /* format the FC-NVME CMD IU and fcp_req */ cmdiu->connection_id = cpu_to_be64(queue->connection_id); - csn = atomic_inc_return(&queue->csn); - cmdiu->csn = cpu_to_be32(csn); cmdiu->data_len = cpu_to_be32(data_len); switch (io_dir) { case NVMEFC_FCP_WRITE: @@ -2257,11 +2254,24 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, if (!(op->flags & FCOP_FLAGS_AEN)) blk_mq_start_request(op->rq); + cmdiu->csn = cpu_to_be32(atomic_inc_return(&queue->csn)); ret = ctrl->lport->ops->fcp_io(&ctrl->lport->localport, &ctrl->rport->remoteport, queue->lldd_handle, &op->fcp_req); if (ret) { + /* + * If the lld fails to send the command is there an issue with + * the csn value? If the command that fails is the Connect, + * no - as the connection won't be live. If it is a command + * post-connect, it's possible a gap in csn may be created. + * Does this matter? As Linux initiators don't send fused + * commands, no. The gap would exist, but as there's nothing + * that depends on csn order to be delivered on the target + * side, it shouldn't hurt. It would be difficult for a + * target to even detect the csn gap as it has no idea when the + * cmd with the csn was supposed to arrive. + */ opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE); __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate); -- cgit v1.2.3 From d808b7f759b50acf0784ce6230ffa63e12ef465d Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 9 Apr 2019 10:03:59 -0600 Subject: nvmet: fix discover log page when offsets are used The nvme target hadn't been taking the Get Log Page offset parameter into consideration, and so has been returning corrupted log pages when offsets are used. Since many tools, including nvme-cli, split the log request to 4k, we've been breaking discovery log responses when more than 3 subsystems exist. Fix the returned data by internally generating the entire discovery log page and copying only the requested bytes into the user buffer. The command log page offset type has been modified to a native __le64 to make it easier to extract the value from a command. Signed-off-by: Keith Busch Tested-by: Minwoo Im Reviewed-by: Chaitanya Kulkarni Reviewed-by: Hannes Reinecke Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 5 +++ drivers/nvme/target/discovery.c | 68 +++++++++++++++++++++++++++-------------- drivers/nvme/target/nvmet.h | 1 + include/linux/nvme.h | 9 ++++-- 4 files changed, 58 insertions(+), 25 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 76250181fee0..9f72d515fc4b 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -24,6 +24,11 @@ u32 nvmet_get_log_page_len(struct nvme_command *cmd) return len; } +u64 nvmet_get_log_page_offset(struct nvme_command *cmd) +{ + return le64_to_cpu(cmd->get_log_page.lpo); +} + static void nvmet_execute_get_log_page_noop(struct nvmet_req *req) { nvmet_req_complete(req, nvmet_zero_sgl(req, 0, req->data_len)); diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index c872b47a88f3..33ed95e72d6b 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -131,54 +131,76 @@ static void nvmet_set_disc_traddr(struct nvmet_req *req, struct nvmet_port *port memcpy(traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE); } +static size_t discovery_log_entries(struct nvmet_req *req) +{ + struct nvmet_ctrl *ctrl = req->sq->ctrl; + struct nvmet_subsys_link *p; + struct nvmet_port *r; + size_t entries = 0; + + list_for_each_entry(p, &req->port->subsystems, entry) { + if (!nvmet_host_allowed(p->subsys, ctrl->hostnqn)) + continue; + entries++; + } + list_for_each_entry(r, &req->port->referrals, entry) + entries++; + return entries; +} + static void nvmet_execute_get_disc_log_page(struct nvmet_req *req) { const int entry_size = sizeof(struct nvmf_disc_rsp_page_entry); struct nvmet_ctrl *ctrl = req->sq->ctrl; struct nvmf_disc_rsp_page_hdr *hdr; + u64 offset = nvmet_get_log_page_offset(req->cmd); size_t data_len = nvmet_get_log_page_len(req->cmd); - size_t alloc_len = max(data_len, sizeof(*hdr)); - int residual_len = data_len - sizeof(*hdr); + size_t alloc_len; struct nvmet_subsys_link *p; struct nvmet_port *r; u32 numrec = 0; u16 status = 0; + void *buffer; + + /* Spec requires dword aligned offsets */ + if (offset & 0x3) { + status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; + goto out; + } /* * Make sure we're passing at least a buffer of response header size. * If host provided data len is less than the header size, only the * number of bytes requested by host will be sent to host. */ - hdr = kzalloc(alloc_len, GFP_KERNEL); - if (!hdr) { + down_read(&nvmet_config_sem); + alloc_len = sizeof(*hdr) + entry_size * discovery_log_entries(req); + buffer = kzalloc(alloc_len, GFP_KERNEL); + if (!buffer) { + up_read(&nvmet_config_sem); status = NVME_SC_INTERNAL; goto out; } - down_read(&nvmet_config_sem); + hdr = buffer; list_for_each_entry(p, &req->port->subsystems, entry) { + char traddr[NVMF_TRADDR_SIZE]; + if (!nvmet_host_allowed(p->subsys, ctrl->hostnqn)) continue; - if (residual_len >= entry_size) { - char traddr[NVMF_TRADDR_SIZE]; - - nvmet_set_disc_traddr(req, req->port, traddr); - nvmet_format_discovery_entry(hdr, req->port, - p->subsys->subsysnqn, traddr, - NVME_NQN_NVME, numrec); - residual_len -= entry_size; - } + + nvmet_set_disc_traddr(req, req->port, traddr); + nvmet_format_discovery_entry(hdr, req->port, + p->subsys->subsysnqn, traddr, + NVME_NQN_NVME, numrec); numrec++; } list_for_each_entry(r, &req->port->referrals, entry) { - if (residual_len >= entry_size) { - nvmet_format_discovery_entry(hdr, r, - NVME_DISC_SUBSYS_NAME, - r->disc_addr.traddr, - NVME_NQN_DISC, numrec); - residual_len -= entry_size; - } + nvmet_format_discovery_entry(hdr, r, + NVME_DISC_SUBSYS_NAME, + r->disc_addr.traddr, + NVME_NQN_DISC, numrec); numrec++; } @@ -190,8 +212,8 @@ static void nvmet_execute_get_disc_log_page(struct nvmet_req *req) up_read(&nvmet_config_sem); - status = nvmet_copy_to_sgl(req, 0, hdr, data_len); - kfree(hdr); + status = nvmet_copy_to_sgl(req, 0, buffer + offset, data_len); + kfree(buffer); out: nvmet_req_complete(req, status); } diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 51e49efd7849..1653d19b187f 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -428,6 +428,7 @@ u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len); u32 nvmet_get_log_page_len(struct nvme_command *cmd); +u64 nvmet_get_log_page_offset(struct nvme_command *cmd); extern struct list_head *nvmet_ports; void nvmet_port_disc_changed(struct nvmet_port *port, diff --git a/include/linux/nvme.h b/include/linux/nvme.h index baa49e6a23cc..c40720cb59ac 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -967,8 +967,13 @@ struct nvme_get_log_page_command { __le16 numdl; __le16 numdu; __u16 rsvd11; - __le32 lpol; - __le32 lpou; + union { + struct { + __le32 lpol; + __le32 lpou; + }; + __le64 lpo; + }; __u32 rsvd14[2]; }; -- cgit v1.2.3 From 8bbad1ba3196814487438d1299cec75de5c74615 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 6 Mar 2019 14:57:43 +0100 Subject: gpu: host1x: Program stream ID to bypass without SMMU If SMMU support is not available, fall back to programming the bypass stream ID (0x7f). Fixes: de5469c21ff9 ("gpu: host1x: Program the channel stream ID") Suggested-by: Mikko Perttunen Signed-off-by: Arnd Bergmann Reviewed-by: Mikko Perttunen [treding@nvidia.com: rebase this on top of a later build fix] Signed-off-by: Thierry Reding --- drivers/gpu/host1x/hw/channel_hw.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c index 4030d64916f0..0c0eb43abf65 100644 --- a/drivers/gpu/host1x/hw/channel_hw.c +++ b/drivers/gpu/host1x/hw/channel_hw.c @@ -114,9 +114,13 @@ static inline void synchronize_syncpt_base(struct host1x_job *job) static void host1x_channel_set_streamid(struct host1x_channel *channel) { -#if IS_ENABLED(CONFIG_IOMMU_API) && HOST1X_HW >= 6 +#if HOST1X_HW >= 6 + u32 sid = 0x7f; +#ifdef CONFIG_IOMMU_API struct iommu_fwspec *spec = dev_iommu_fwspec_get(channel->dev->parent); - u32 sid = spec ? spec->ids[0] & 0xffff : 0x7f; + if (spec) + sid = spec->ids[0] & 0xffff; +#endif host1x_ch_writel(channel, sid, HOST1X_CHANNEL_SMMU_STREAMID); #endif -- cgit v1.2.3 From b995dcca7cf12f208cfd95fd9d5768dca7cccec7 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 11 Apr 2019 10:22:43 -0700 Subject: platform/x86: pmc_atom: Drop __initconst on dmi table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's used by probe and that isn't an init function. Drop this so that we don't get a section mismatch. Reported-by: kbuild test robot Cc: David Müller Cc: Hans de Goede Cc: Andy Shevchenko Fixes: 7c2e07130090 ("clk: x86: Add system specific quirk to mark clocks as critical") Signed-off-by: Stephen Boyd --- drivers/platform/x86/pmc_atom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/pmc_atom.c b/drivers/platform/x86/pmc_atom.c index eaec2d306481..c7039f52ad51 100644 --- a/drivers/platform/x86/pmc_atom.c +++ b/drivers/platform/x86/pmc_atom.c @@ -396,7 +396,7 @@ static int pmc_dbgfs_register(struct pmc_dev *pmc) * Some systems need one or more of their pmc_plt_clks to be * marked as critical. */ -static const struct dmi_system_id critclk_systems[] __initconst = { +static const struct dmi_system_id critclk_systems[] = { { .ident = "MPL CEC1x", .matches = { -- cgit v1.2.3 From 8c5165430c0194df92369162d1c7f53f8672baa5 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Wed, 10 Apr 2019 16:59:25 -0500 Subject: dma-debug: only skip one stackframe entry With skip set to 1, I get a traceback like this: [ 106.867637] DMA-API: Mapped at: [ 106.870784] afu_dma_map_region+0x2cd/0x4f0 [dfl_afu] [ 106.875839] afu_ioctl+0x258/0x380 [dfl_afu] [ 106.880108] do_vfs_ioctl+0xa9/0x720 [ 106.883688] ksys_ioctl+0x60/0x90 [ 106.887007] __x64_sys_ioctl+0x16/0x20 With the previous value of 2, afu_dma_map_region was being omitted. I suspect that the code paths have simply changed since the value of 2 was chosen a decade ago, but it's also possible that it varies based on which mapping function was used, compiler inlining choices, etc. In any case, it's best to err on the side of skipping less. Signed-off-by: Scott Wood Signed-off-by: Christoph Hellwig --- kernel/dma/debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 45d51e8e26f6..a218e43cc382 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -706,7 +706,7 @@ static struct dma_debug_entry *dma_entry_alloc(void) #ifdef CONFIG_STACKTRACE entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES; entry->stacktrace.entries = entry->st_entries; - entry->stacktrace.skip = 2; + entry->stacktrace.skip = 1; save_stack_trace(&entry->stacktrace); #endif -- cgit v1.2.3 From fd57770dd198f5b2ddd5b9e6bf282cf98d63adb9 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Thu, 11 Apr 2019 11:17:30 +0200 Subject: net/smc: wait for pending work before clcsock release_sock When the clcsock is already released using sock_release() and a pending smc_listen_work accesses the clcsock than that will fail. Solve this by canceling and waiting for the work to complete first. Because the work holds the sock_lock it must make sure that the lock is not hold before the new helper smc_clcsock_release() is invoked. And before the smc_listen_work starts working check if the parent listen socket is still valid, otherwise stop the work early. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 14 ++++++++------ net/smc/smc_close.c | 25 +++++++++++++++++++++---- net/smc/smc_close.h | 1 + 3 files changed, 30 insertions(+), 10 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 77ef53596d18..9bdaed2f2e35 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -167,10 +167,9 @@ static int smc_release(struct socket *sock) if (sk->sk_state == SMC_CLOSED) { if (smc->clcsock) { - mutex_lock(&smc->clcsock_release_lock); - sock_release(smc->clcsock); - smc->clcsock = NULL; - mutex_unlock(&smc->clcsock_release_lock); + release_sock(sk); + smc_clcsock_release(smc); + lock_sock(sk); } if (!smc->use_fallback) smc_conn_free(&smc->conn); @@ -1037,13 +1036,13 @@ static void smc_listen_out(struct smc_sock *new_smc) struct smc_sock *lsmc = new_smc->listen_smc; struct sock *newsmcsk = &new_smc->sk; - lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING); if (lsmc->sk.sk_state == SMC_LISTEN) { + lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING); smc_accept_enqueue(&lsmc->sk, newsmcsk); + release_sock(&lsmc->sk); } else { /* no longer listening */ smc_close_non_accepted(newsmcsk); } - release_sock(&lsmc->sk); /* Wake up accept */ lsmc->sk.sk_data_ready(&lsmc->sk); @@ -1237,6 +1236,9 @@ static void smc_listen_work(struct work_struct *work) int rc = 0; u8 ibport; + if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN) + return smc_listen_out_err(new_smc); + if (new_smc->use_fallback) { smc_listen_out_connected(new_smc); return; diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 2ad37e998509..fc06720b53c1 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -21,6 +21,22 @@ #define SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME (5 * HZ) +/* release the clcsock that is assigned to the smc_sock */ +void smc_clcsock_release(struct smc_sock *smc) +{ + struct socket *tcp; + + if (smc->listen_smc && current_work() != &smc->smc_listen_work) + cancel_work_sync(&smc->smc_listen_work); + mutex_lock(&smc->clcsock_release_lock); + if (smc->clcsock) { + tcp = smc->clcsock; + smc->clcsock = NULL; + sock_release(tcp); + } + mutex_unlock(&smc->clcsock_release_lock); +} + static void smc_close_cleanup_listen(struct sock *parent) { struct sock *sk; @@ -321,6 +337,7 @@ static void smc_close_passive_work(struct work_struct *work) close_work); struct smc_sock *smc = container_of(conn, struct smc_sock, conn); struct smc_cdc_conn_state_flags *rxflags; + bool release_clcsock = false; struct sock *sk = &smc->sk; int old_state; @@ -400,13 +417,13 @@ wakeup: if ((sk->sk_state == SMC_CLOSED) && (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) { smc_conn_free(conn); - if (smc->clcsock) { - sock_release(smc->clcsock); - smc->clcsock = NULL; - } + if (smc->clcsock) + release_clcsock = true; } } release_sock(sk); + if (release_clcsock) + smc_clcsock_release(smc); sock_put(sk); /* sock_hold done by schedulers of close_work */ } diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h index 19eb6a211c23..e0e3b5df25d2 100644 --- a/net/smc/smc_close.h +++ b/net/smc/smc_close.h @@ -23,5 +23,6 @@ void smc_close_wake_tx_prepared(struct smc_sock *smc); int smc_close_active(struct smc_sock *smc); int smc_close_shutdown_write(struct smc_sock *smc); void smc_close_init(struct smc_sock *smc); +void smc_clcsock_release(struct smc_sock *smc); #endif /* SMC_CLOSE_H */ -- cgit v1.2.3 From e183d4e414b64711baf7a04e214b61969ca08dfa Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Thu, 11 Apr 2019 11:17:31 +0200 Subject: net/smc: fix a NULL pointer dereference In case alloc_ordered_workqueue fails, the fix returns NULL to avoid NULL pointer dereference. Signed-off-by: Kangjie Lu Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_ism.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index 2fff79db1a59..e89e918b88e0 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -289,6 +289,11 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name, INIT_LIST_HEAD(&smcd->vlan); smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)", WQ_MEM_RECLAIM, name); + if (!smcd->event_wq) { + kfree(smcd->conn); + kfree(smcd); + return NULL; + } return smcd; } EXPORT_SYMBOL_GPL(smcd_alloc_dev); -- cgit v1.2.3 From 07603b230895a74ebb1e2a1231ac45c29c2a8cd3 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 11 Apr 2019 11:17:32 +0200 Subject: net/smc: propagate file from SMC to TCP socket fcntl(fd, F_SETOWN, getpid()) selects the recipient of SIGURG signals that are delivered when out-of-band data arrives on socket fd. If an SMC socket program makes use of such an fcntl() call, it fails in case of fallback to TCP-mode. In case of fallback the traffic is processed with the internal TCP socket. Propagating field "file" from the SMC socket to the internal TCP socket fixes the issue. Reviewed-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- include/net/sock.h | 6 ------ net/smc/af_smc.c | 38 ++++++++++++++++++++++++++++---------- 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 8de5ee258b93..341f8bafa0cf 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2084,12 +2084,6 @@ static inline bool skwq_has_sleeper(struct socket_wq *wq) * @p: poll_table * * See the comments in the wq_has_sleeper function. - * - * Do not derive sock from filp->private_data here. An SMC socket establishes - * an internal TCP socket that is used in the fallback case. All socket - * operations on the SMC socket are then forwarded to the TCP socket. In case of - * poll, the filp->private_data pointer references the SMC socket because the - * TCP socket has no file assigned. */ static inline void sock_poll_wait(struct file *filp, struct socket *sock, poll_table *p) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 9bdaed2f2e35..d2a0d15f809c 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -445,10 +445,19 @@ static void smc_link_save_peer_info(struct smc_link *link, link->peer_mtu = clc->qp_mtu; } +static void smc_switch_to_fallback(struct smc_sock *smc) +{ + smc->use_fallback = true; + if (smc->sk.sk_socket && smc->sk.sk_socket->file) { + smc->clcsock->file = smc->sk.sk_socket->file; + smc->clcsock->file->private_data = smc->clcsock; + } +} + /* fall back during connect */ static int smc_connect_fallback(struct smc_sock *smc, int reason_code) { - smc->use_fallback = true; + smc_switch_to_fallback(smc); smc->fallback_rsn = reason_code; smc_copy_sock_settings_to_clc(smc); if (smc->sk.sk_state == SMC_INIT) @@ -774,10 +783,14 @@ static void smc_connect_work(struct work_struct *work) smc->sk.sk_err = -rc; out: - if (smc->sk.sk_err) - smc->sk.sk_state_change(&smc->sk); - else - smc->sk.sk_write_space(&smc->sk); + if (!sock_flag(&smc->sk, SOCK_DEAD)) { + if (smc->sk.sk_err) { + smc->sk.sk_state_change(&smc->sk); + } else { /* allow polling before and after fallback decision */ + smc->clcsock->sk->sk_write_space(smc->clcsock->sk); + smc->sk.sk_write_space(&smc->sk); + } + } kfree(smc->connect_info); smc->connect_info = NULL; release_sock(&smc->sk); @@ -934,8 +947,13 @@ struct sock *smc_accept_dequeue(struct sock *parent, sock_put(new_sk); /* final */ continue; } - if (new_sock) + if (new_sock) { sock_graft(new_sk, new_sock); + if (isk->use_fallback) { + smc_sk(new_sk)->clcsock->file = new_sock->file; + isk->clcsock->file->private_data = isk->clcsock; + } + } return new_sk; } return NULL; @@ -1086,7 +1104,7 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code, return; } smc_conn_free(&new_smc->conn); - new_smc->use_fallback = true; + smc_switch_to_fallback(new_smc); new_smc->fallback_rsn = reason_code; if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) { if (smc_clc_send_decline(new_smc, reason_code) < 0) { @@ -1246,7 +1264,7 @@ static void smc_listen_work(struct work_struct *work) /* check if peer is smc capable */ if (!tcp_sk(newclcsock->sk)->syn_smc) { - new_smc->use_fallback = true; + smc_switch_to_fallback(new_smc); new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC; smc_listen_out_connected(new_smc); return; @@ -1503,7 +1521,7 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) if (msg->msg_flags & MSG_FASTOPEN) { if (sk->sk_state == SMC_INIT) { - smc->use_fallback = true; + smc_switch_to_fallback(smc); smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP; } else { rc = -EINVAL; @@ -1705,7 +1723,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, case TCP_FASTOPEN_NO_COOKIE: /* option not supported by SMC */ if (sk->sk_state == SMC_INIT) { - smc->use_fallback = true; + smc_switch_to_fallback(smc); smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP; } else { if (!smc->use_fallback) -- cgit v1.2.3 From 8ef659f1a840c953a59442ff1400ec73baf3b601 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Thu, 11 Apr 2019 11:17:33 +0200 Subject: net/smc: fix return code from FLUSH command The FLUSH command is used to empty the pnet table. No return code is expected from the command. Commit a9d8b0b1e3d6 added namespace support for the pnet table and changed the FLUSH command processing to call smc_pnet_remove_by_pnetid() to remove the pnet entries. This function returns -ENOENT when no entry was deleted, which is now the return code of the FLUSH command. As a result the FLUSH command will return an error when the pnet table is already empty. Restore the expected behavior and let FLUSH always return 0. Fixes: a9d8b0b1e3d6 ("net/smc: add pnet table namespace support") Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_pnet.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 8d2f6296279c..0285c7f9e79b 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -603,7 +603,8 @@ static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); - return smc_pnet_remove_by_pnetid(net, NULL); + smc_pnet_remove_by_pnetid(net, NULL); + return 0; } /* SMC_PNETID generic netlink operation definition */ -- cgit v1.2.3 From f61bca58f6c36e666c2b807697f25e5e98708162 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 11 Apr 2019 11:17:34 +0200 Subject: net/smc: move unhash before release of clcsock Commit <26d92e951fe0> ("net/smc: move unhash as early as possible in smc_release()") fixes one occurrence in the smc code, but the same pattern exists in other places. This patch covers the remaining occurrences and makes sure, the unhash operation is done before the smc->clcsock is released. This avoids a potential use-after-free in smc_diag_dump(). Reviewed-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index d2a0d15f809c..6f869ef49b32 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -884,11 +884,11 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) if (rc < 0) lsk->sk_err = -rc; if (rc < 0 || lsk->sk_state == SMC_CLOSED) { + new_sk->sk_prot->unhash(new_sk); if (new_clcsock) sock_release(new_clcsock); new_sk->sk_state = SMC_CLOSED; sock_set_flag(new_sk, SOCK_DEAD); - new_sk->sk_prot->unhash(new_sk); sock_put(new_sk); /* final */ *new_smc = NULL; goto out; @@ -939,11 +939,11 @@ struct sock *smc_accept_dequeue(struct sock *parent, smc_accept_unlink(new_sk); if (new_sk->sk_state == SMC_CLOSED) { + new_sk->sk_prot->unhash(new_sk); if (isk->clcsock) { sock_release(isk->clcsock); isk->clcsock = NULL; } - new_sk->sk_prot->unhash(new_sk); sock_put(new_sk); /* final */ continue; } @@ -973,6 +973,7 @@ void smc_close_non_accepted(struct sock *sk) sock_set_flag(sk, SOCK_DEAD); sk->sk_shutdown |= SHUTDOWN_MASK; } + sk->sk_prot->unhash(sk); if (smc->clcsock) { struct socket *tcp; @@ -988,7 +989,6 @@ void smc_close_non_accepted(struct sock *sk) smc_conn_free(&smc->conn); } release_sock(sk); - sk->sk_prot->unhash(sk); sock_put(sk); /* final sock_put */ } -- cgit v1.2.3 From 5ee15c101f29e0093ffb5448773ccbc786eb313b Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Thu, 11 Apr 2019 12:26:32 +0200 Subject: net: thunderx: raise XDP MTU to 1508 The thunderx driver splits frames bigger than 1530 bytes to multiple pages, making impossible to run an eBPF program on it. This leads to a maximum MTU of 1508 if QinQ is in use. The thunderx driver forbids to load an eBPF program if the MTU is higher than 1500 bytes. Raise the limit to 1508 so it is possible to use L2 protocols which need some more headroom. Fixes: 05c773f52b96e ("net: thunderx: Add basic XDP support") Signed-off-by: Matteo Croce Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 28eac9056211..debc8c861c6b 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -32,6 +32,13 @@ #define DRV_NAME "nicvf" #define DRV_VERSION "1.0" +/* NOTE: Packets bigger than 1530 are split across multiple pages and XDP needs + * the buffer to be contiguous. Allow XDP to be set up only if we don't exceed + * this value, keeping headroom for the 14 byte Ethernet header and two + * VLAN tags (for QinQ) + */ +#define MAX_XDP_MTU (1530 - ETH_HLEN - VLAN_HLEN * 2) + /* Supported devices */ static const struct pci_device_id nicvf_id_table[] = { { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, @@ -1830,8 +1837,10 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog) bool bpf_attached = false; int ret = 0; - /* For now just support only the usual MTU sized frames */ - if (prog && (dev->mtu > 1500)) { + /* For now just support only the usual MTU sized frames, + * plus some headroom for VLAN, QinQ. + */ + if (prog && dev->mtu > MAX_XDP_MTU) { netdev_warn(dev, "Jumbo frames not yet supported with XDP, current MTU %d.\n", dev->mtu); return -EOPNOTSUPP; -- cgit v1.2.3 From 1f227d16083b2e280b7dde4ca78883d75593f2fd Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Thu, 11 Apr 2019 12:26:33 +0200 Subject: net: thunderx: don't allow jumbo frames with XDP The thunderx driver forbids to load an eBPF program if the MTU is too high, but this can be circumvented by loading the eBPF, then raising the MTU. Fix this by limiting the MTU if an eBPF program is already loaded. Fixes: 05c773f52b96e ("net: thunderx: Add basic XDP support") Signed-off-by: Matteo Croce Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index debc8c861c6b..c032bef1b776 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -1589,6 +1589,15 @@ static int nicvf_change_mtu(struct net_device *netdev, int new_mtu) struct nicvf *nic = netdev_priv(netdev); int orig_mtu = netdev->mtu; + /* For now just support only the usual MTU sized frames, + * plus some headroom for VLAN, QinQ. + */ + if (nic->xdp_prog && new_mtu > MAX_XDP_MTU) { + netdev_warn(netdev, "Jumbo frames not yet supported with XDP, current MTU %d.\n", + netdev->mtu); + return -EINVAL; + } + netdev->mtu = new_mtu; if (!netif_running(netdev)) -- cgit v1.2.3 From c5b493ce192bd7a4e7bd073b5685aad121eeef82 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 11 Apr 2019 15:08:25 +0300 Subject: net: bridge: multicast: use rcu to access port list from br_multicast_start_querier br_multicast_start_querier() walks over the port list but it can be called from a timer with only multicast_lock held which doesn't protect the port list, so use RCU to walk over it. Fixes: c83b8fab06fc ("bridge: Restart queries when last querier expires") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 02da21d771c9..45e7f4173bba 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -2031,7 +2031,8 @@ static void br_multicast_start_querier(struct net_bridge *br, __br_multicast_open(br, query); - list_for_each_entry(port, &br->port_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(port, &br->port_list, list) { if (port->state == BR_STATE_DISABLED || port->state == BR_STATE_BLOCKING) continue; @@ -2043,6 +2044,7 @@ static void br_multicast_start_querier(struct net_bridge *br, br_multicast_enable(&port->ip6_own_query); #endif } + rcu_read_unlock(); } int br_multicast_toggle(struct net_bridge *br, unsigned long val) -- cgit v1.2.3 From 7c2bd9a39845bfb6d72ddb55ce737650271f6f96 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sat, 30 Mar 2019 10:21:07 +0900 Subject: NFS: Forbid setting AF_INET6 to "struct sockaddr_in"->sin_family. syzbot is reporting uninitialized value at rpc_sockaddr2uaddr() [1]. This is because syzbot is setting AF_INET6 to "struct sockaddr_in"->sin_family (which is embedded into user-visible "struct nfs_mount_data" structure) despite nfs23_validate_mount_data() cannot pass sizeof(struct sockaddr_in6) bytes of AF_INET6 address to rpc_sockaddr2uaddr(). Since "struct nfs_mount_data" structure is user-visible, we can't change "struct nfs_mount_data" to use "struct sockaddr_storage". Therefore, assuming that everybody is using AF_INET family when passing address via "struct nfs_mount_data"->addr, reject if its sin_family is not AF_INET. [1] https://syzkaller.appspot.com/bug?id=599993614e7cbbf66bc2656a919ab2a95fb5d75c Reported-by: syzbot Signed-off-by: Tetsuo Handa Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 23790c7b2289..c27ac96a95bd 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2041,7 +2041,8 @@ static int nfs23_validate_mount_data(void *options, memcpy(sap, &data->addr, sizeof(data->addr)); args->nfs_server.addrlen = sizeof(data->addr); args->nfs_server.port = ntohs(data->addr.sin_port); - if (!nfs_verify_server_address(sap)) + if (sap->sa_family != AF_INET || + !nfs_verify_server_address(sap)) goto out_no_address; if (!(data->flags & NFS_MOUNT_TCP)) -- cgit v1.2.3 From 29e7ca715f2a0b6c0a99b1aec1b0956d1f271955 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 9 Apr 2019 10:44:16 -0400 Subject: NFS: Fix handling of reply page vector NFSv4 GETACL and FS_LOCATIONS requests stopped working in v5.1-rc. These two need the extra padding to be added directly to the reply length. Reported-by: Olga Kornievskaia Fixes: 02ef04e432ba ("NFS: Account for XDR pad of buf->pages") Signed-off-by: Chuck Lever Tested-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index cfcabc33e24d..602446158bfb 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2589,7 +2589,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr, ARRAY_SIZE(nfs4_acl_bitmap), &hdr); rpc_prepare_reply_pages(req, args->acl_pages, 0, - args->acl_len, replen); + args->acl_len, replen + 1); encode_nops(&hdr); } @@ -2811,7 +2811,7 @@ static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, } rpc_prepare_reply_pages(req, (struct page **)&args->page, 0, - PAGE_SIZE, replen); + PAGE_SIZE, replen + 1); encode_nops(&hdr); } -- cgit v1.2.3 From e1ede312f17e96a9c5cda9aaa1cdcf442c1a5da8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 9 Apr 2019 17:04:09 -0400 Subject: xprtrdma: Fix helper that drains the transport We want to drain only the RQ first. Otherwise the transport can deadlock on ->close if there are outstanding Send completions. Fixes: 6d2d0ee27c7a ("xprtrdma: Replace rpcrdma_receive_wq ... ") Signed-off-by: Chuck Lever Cc: stable@vger.kernel.org # v5.0+ Signed-off-by: Trond Myklebust --- net/sunrpc/xprtrdma/verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 89a63391d4d4..30cfc0efe699 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -90,7 +90,7 @@ static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt) /* Flush Receives, then wait for deferred Reply work * to complete. */ - ib_drain_qp(ia->ri_id->qp); + ib_drain_rq(ia->ri_id->qp); drain_workqueue(buf->rb_completion_wq); /* Deferred Reply processing might have scheduled -- cgit v1.2.3 From 0769663b4f580566ef6cdf366f3073dbe8022c39 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 11 Apr 2019 14:34:18 -0400 Subject: NFSv4.1 fix incorrect return value in copy_file_range According to the NFSv4.2 spec if the input and output file is the same file, operation should fail with EINVAL. However, linux copy_file_range() system call has no such restrictions. Therefore, in such case let's return EOPNOTSUPP and allow VFS to fallback to doing do_splice_direct(). Also when copy_file_range is called on an NFSv4.0 or 4.1 mount (ie., a server that doesn't support COPY functionality), we also need to return EOPNOTSUPP and fallback to a regular copy. Fixes xfstest generic/075, generic/091, generic/112, generic/263 for all NFSv4.x versions. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 3 --- fs/nfs/nfs4file.c | 4 +++- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index ff6f85fb676b..5196bfa7894d 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -329,9 +329,6 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src, }; ssize_t err, err2; - if (!nfs_server_capable(file_inode(dst), NFS_CAP_COPY)) - return -EOPNOTSUPP; - src_lock = nfs_get_lock_context(nfs_file_open_context(src)); if (IS_ERR(src_lock)) return PTR_ERR(src_lock); diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 45b2322e092d..00d17198ee12 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -133,8 +133,10 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t count, unsigned int flags) { + if (!nfs_server_capable(file_inode(file_out), NFS_CAP_COPY)) + return -EOPNOTSUPP; if (file_inode(file_in) == file_inode(file_out)) - return -EINVAL; + return -EOPNOTSUPP; return nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count); } -- cgit v1.2.3 From af6b61d7ef58099c82d854395a0e002be6bd036c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 11 Apr 2019 15:16:52 -0400 Subject: Revert "SUNRPC: Micro-optimise when the task is known not to be sleeping" This reverts commit 009a82f6437490c262584d65a14094a818bcb747. The ability to optimise here relies on compiler being able to optimise away tail calls to avoid stack overflows. Unfortunately, we are seeing reports of problems, so let's just revert. Reported-by: Daniel Mack Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 8 -------- net/sunrpc/clnt.c | 45 ++++++++------------------------------------ 2 files changed, 8 insertions(+), 45 deletions(-) diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index ec861cd0cfe8..52d41d0c1ae1 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -304,12 +304,4 @@ rpc_clnt_swap_deactivate(struct rpc_clnt *clnt) } #endif /* CONFIG_SUNRPC_SWAP */ -static inline bool -rpc_task_need_resched(const struct rpc_task *task) -{ - if (RPC_IS_QUEUED(task) || task->tk_callback) - return true; - return false; -} - #endif /* _LINUX_SUNRPC_SCHED_H_ */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 187d10443a15..1d0395ef62c9 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1540,7 +1540,6 @@ call_start(struct rpc_task *task) clnt->cl_stats->rpccnt++; task->tk_action = call_reserve; rpc_task_set_transport(task, clnt); - call_reserve(task); } /* @@ -1554,9 +1553,6 @@ call_reserve(struct rpc_task *task) task->tk_status = 0; task->tk_action = call_reserveresult; xprt_reserve(task); - if (rpc_task_need_resched(task)) - return; - call_reserveresult(task); } static void call_retry_reserve(struct rpc_task *task); @@ -1579,7 +1575,6 @@ call_reserveresult(struct rpc_task *task) if (status >= 0) { if (task->tk_rqstp) { task->tk_action = call_refresh; - call_refresh(task); return; } @@ -1605,7 +1600,6 @@ call_reserveresult(struct rpc_task *task) /* fall through */ case -EAGAIN: /* woken up; retry */ task->tk_action = call_retry_reserve; - call_retry_reserve(task); return; case -EIO: /* probably a shutdown */ break; @@ -1628,9 +1622,6 @@ call_retry_reserve(struct rpc_task *task) task->tk_status = 0; task->tk_action = call_reserveresult; xprt_retry_reserve(task); - if (rpc_task_need_resched(task)) - return; - call_reserveresult(task); } /* @@ -1645,9 +1636,6 @@ call_refresh(struct rpc_task *task) task->tk_status = 0; task->tk_client->cl_stats->rpcauthrefresh++; rpcauth_refreshcred(task); - if (rpc_task_need_resched(task)) - return; - call_refreshresult(task); } /* @@ -1666,7 +1654,6 @@ call_refreshresult(struct rpc_task *task) case 0: if (rpcauth_uptodatecred(task)) { task->tk_action = call_allocate; - call_allocate(task); return; } /* Use rate-limiting and a max number of retries if refresh @@ -1685,7 +1672,6 @@ call_refreshresult(struct rpc_task *task) task->tk_cred_retry--; dprintk("RPC: %5u %s: retry refresh creds\n", task->tk_pid, __func__); - call_refresh(task); return; } dprintk("RPC: %5u %s: refresh creds failed with error %d\n", @@ -1711,10 +1697,8 @@ call_allocate(struct rpc_task *task) task->tk_status = 0; task->tk_action = call_encode; - if (req->rq_buffer) { - call_encode(task); + if (req->rq_buffer) return; - } if (proc->p_proc != 0) { BUG_ON(proc->p_arglen == 0); @@ -1740,12 +1724,8 @@ call_allocate(struct rpc_task *task) status = xprt->ops->buf_alloc(task); xprt_inject_disconnect(xprt); - if (status == 0) { - if (rpc_task_need_resched(task)) - return; - call_encode(task); + if (status == 0) return; - } if (status != -ENOMEM) { rpc_exit(task, status); return; @@ -1828,8 +1808,12 @@ call_encode(struct rpc_task *task) xprt_request_enqueue_receive(task); xprt_request_enqueue_transmit(task); out: - task->tk_action = call_bind; - call_bind(task); + task->tk_action = call_transmit; + /* Check that the connection is OK */ + if (!xprt_bound(task->tk_xprt)) + task->tk_action = call_bind; + else if (!xprt_connected(task->tk_xprt)) + task->tk_action = call_connect; } /* @@ -1847,7 +1831,6 @@ rpc_task_handle_transmitted(struct rpc_task *task) { xprt_end_transmit(task); task->tk_action = call_transmit_status; - call_transmit_status(task); } /* @@ -1865,7 +1848,6 @@ call_bind(struct rpc_task *task) if (xprt_bound(xprt)) { task->tk_action = call_connect; - call_connect(task); return; } @@ -1896,7 +1878,6 @@ call_bind_status(struct rpc_task *task) dprint_status(task); task->tk_status = 0; task->tk_action = call_connect; - call_connect(task); return; } @@ -1981,7 +1962,6 @@ call_connect(struct rpc_task *task) if (xprt_connected(xprt)) { task->tk_action = call_transmit; - call_transmit(task); return; } @@ -2051,7 +2031,6 @@ call_connect_status(struct rpc_task *task) case 0: clnt->cl_stats->netreconn++; task->tk_action = call_transmit; - call_transmit(task); return; } rpc_exit(task, status); @@ -2087,9 +2066,6 @@ call_transmit(struct rpc_task *task) xprt_transmit(task); } xprt_end_transmit(task); - if (rpc_task_need_resched(task)) - return; - call_transmit_status(task); } /* @@ -2107,9 +2083,6 @@ call_transmit_status(struct rpc_task *task) if (rpc_task_transmitted(task)) { if (task->tk_status == 0) xprt_request_wait_receive(task); - if (rpc_task_need_resched(task)) - return; - call_status(task); return; } @@ -2170,7 +2143,6 @@ call_bc_encode(struct rpc_task *task) { xprt_request_enqueue_transmit(task); task->tk_action = call_bc_transmit; - call_bc_transmit(task); } /* @@ -2261,7 +2233,6 @@ call_status(struct rpc_task *task) status = task->tk_status; if (status >= 0) { task->tk_action = call_decode; - call_decode(task); return; } -- cgit v1.2.3 From 99834eead2a04e93a120abb112542b87c42ff5e1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 5 Mar 2019 14:24:48 +0100 Subject: clocksource/drivers/npcm: select TIMER_OF When this is disabled, we get a link failure: drivers/clocksource/timer-npcm7xx.o: In function `npcm7xx_timer_init': timer-npcm7xx.c:(.init.text+0xf): undefined reference to `timer_of_init' Fixes: 1c00289ecd12 ("clocksource/drivers/npcm: Add NPCM7xx timer driver") Signed-off-by: Arnd Bergmann Signed-off-by: Daniel Lezcano --- drivers/clocksource/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 171502a356aa..4b3d143f0f8a 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -145,6 +145,7 @@ config VT8500_TIMER config NPCM7XX_TIMER bool "NPCM7xx timer driver" if COMPILE_TEST depends on HAS_IOMEM + select TIMER_OF select CLKSRC_MMIO help Enable 24-bit TIMER0 and TIMER1 counters in the NPCM7xx architecture, -- cgit v1.2.3 From 9155697e20040658438b89e4ceec415ec125f478 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Tue, 5 Mar 2019 12:08:51 -0500 Subject: clocksource/drivers/arm_arch_timer: Remove unneeded pr_fmt macro After this commit ded24019b6b6f(clocksource: arm_arch_timer: clean up printk usage), the previous macro is redundant, so delete it. And move the new macro to the previous position. Signed-off-by: Yangtao Li Signed-off-by: Daniel Lezcano --- drivers/clocksource/arm_arch_timer.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index aa4ec53281ce..ea373cfbcecb 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -9,7 +9,7 @@ * published by the Free Software Foundation. */ -#define pr_fmt(fmt) "arm_arch_timer: " fmt +#define pr_fmt(fmt) "arch_timer: " fmt #include #include @@ -33,9 +33,6 @@ #include -#undef pr_fmt -#define pr_fmt(fmt) "arch_timer: " fmt - #define CNTTIDR 0x08 #define CNTTIDR_VIRT(n) (BIT(1) << ((n) * 4)) -- cgit v1.2.3 From fbc87aa0f7c429999dc31f1bac3b2615008cac32 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 12 Mar 2019 11:32:56 +0100 Subject: clocksource/drivers/oxnas: Fix OX820 compatible The OX820 compatible is wrong is the driver, fix it. Fixes: 2ea3401e2a84 ("clocksource/drivers/oxnas: Add OX820 compatible") Reported-by: Daniel Golle Signed-off-by: Neil Armstrong Signed-off-by: Daniel Lezcano --- drivers/clocksource/timer-oxnas-rps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/timer-oxnas-rps.c b/drivers/clocksource/timer-oxnas-rps.c index eed6feff8b5f..30c6f4ce672b 100644 --- a/drivers/clocksource/timer-oxnas-rps.c +++ b/drivers/clocksource/timer-oxnas-rps.c @@ -296,4 +296,4 @@ err_alloc: TIMER_OF_DECLARE(ox810se_rps, "oxsemi,ox810se-rps-timer", oxnas_rps_timer_init); TIMER_OF_DECLARE(ox820_rps, - "oxsemi,ox820se-rps-timer", oxnas_rps_timer_init); + "oxsemi,ox820-rps-timer", oxnas_rps_timer_init); -- cgit v1.2.3 From d7c3a206e6338e4ccdf030719dec028e26a521d5 Mon Sep 17 00:00:00 2001 From: Andy Duan Date: Tue, 9 Apr 2019 03:40:56 +0000 Subject: net: fec: manage ahb clock in runtime pm Some SOC like i.MX6SX clock have some limits: - ahb clock should be disabled before ipg. - ahb and ipg clocks are required for MAC MII bus. So, move the ahb clock to runtime management together with ipg clock. Signed-off-by: Fugang Duan Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 697c2427f2b7..a96ad20ee484 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1840,13 +1840,9 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable) int ret; if (enable) { - ret = clk_prepare_enable(fep->clk_ahb); - if (ret) - return ret; - ret = clk_prepare_enable(fep->clk_enet_out); if (ret) - goto failed_clk_enet_out; + return ret; if (fep->clk_ptp) { mutex_lock(&fep->ptp_clk_mutex); @@ -1866,7 +1862,6 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable) phy_reset_after_clk_enable(ndev->phydev); } else { - clk_disable_unprepare(fep->clk_ahb); clk_disable_unprepare(fep->clk_enet_out); if (fep->clk_ptp) { mutex_lock(&fep->ptp_clk_mutex); @@ -1885,8 +1880,6 @@ failed_clk_ref: failed_clk_ptp: if (fep->clk_enet_out) clk_disable_unprepare(fep->clk_enet_out); -failed_clk_enet_out: - clk_disable_unprepare(fep->clk_ahb); return ret; } @@ -3470,6 +3463,9 @@ fec_probe(struct platform_device *pdev) ret = clk_prepare_enable(fep->clk_ipg); if (ret) goto failed_clk_ipg; + ret = clk_prepare_enable(fep->clk_ahb); + if (ret) + goto failed_clk_ahb; fep->reg_phy = devm_regulator_get_optional(&pdev->dev, "phy"); if (!IS_ERR(fep->reg_phy)) { @@ -3563,6 +3559,9 @@ failed_reset: pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); failed_regulator: + clk_disable_unprepare(fep->clk_ahb); +failed_clk_ahb: + clk_disable_unprepare(fep->clk_ipg); failed_clk_ipg: fec_enet_clk_enable(ndev, false); failed_clk: @@ -3686,6 +3685,7 @@ static int __maybe_unused fec_runtime_suspend(struct device *dev) struct net_device *ndev = dev_get_drvdata(dev); struct fec_enet_private *fep = netdev_priv(ndev); + clk_disable_unprepare(fep->clk_ahb); clk_disable_unprepare(fep->clk_ipg); return 0; @@ -3695,8 +3695,20 @@ static int __maybe_unused fec_runtime_resume(struct device *dev) { struct net_device *ndev = dev_get_drvdata(dev); struct fec_enet_private *fep = netdev_priv(ndev); + int ret; - return clk_prepare_enable(fep->clk_ipg); + ret = clk_prepare_enable(fep->clk_ahb); + if (ret) + return ret; + ret = clk_prepare_enable(fep->clk_ipg); + if (ret) + goto failed_clk_ipg; + + return 0; + +failed_clk_ipg: + clk_disable_unprepare(fep->clk_ahb); + return ret; } static const struct dev_pm_ops fec_pm_ops = { -- cgit v1.2.3 From d3706566ae3d92677b932dd156157fd6c72534b1 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 9 Apr 2019 19:53:55 +0800 Subject: net: netrom: Fix error cleanup path of nr_proto_init Syzkaller report this: BUG: unable to handle kernel paging request at fffffbfff830524b PGD 237fe8067 P4D 237fe8067 PUD 237e64067 PMD 1c9716067 PTE 0 Oops: 0000 [#1] SMP KASAN PTI CPU: 1 PID: 4465 Comm: syz-executor.0 Not tainted 5.0.0+ #5 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 RIP: 0010:__list_add_valid+0x21/0xe0 lib/list_debug.c:23 Code: 8b 0c 24 e9 17 fd ff ff 90 55 48 89 fd 48 8d 7a 08 53 48 89 d3 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 48 83 ec 08 <80> 3c 02 00 0f 85 8b 00 00 00 48 8b 53 08 48 39 f2 75 35 48 89 f2 RSP: 0018:ffff8881ea2278d0 EFLAGS: 00010282 RAX: dffffc0000000000 RBX: ffffffffc1829250 RCX: 1ffff1103d444ef4 RDX: 1ffffffff830524b RSI: ffffffff85659300 RDI: ffffffffc1829258 RBP: ffffffffc1879250 R08: fffffbfff0acb269 R09: fffffbfff0acb269 R10: ffff8881ea2278f0 R11: fffffbfff0acb268 R12: ffffffffc1829250 R13: dffffc0000000000 R14: 0000000000000008 R15: ffffffffc187c830 FS: 00007fe0361df700(0000) GS:ffff8881f7300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: fffffbfff830524b CR3: 00000001eb39a001 CR4: 00000000007606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: __list_add include/linux/list.h:60 [inline] list_add include/linux/list.h:79 [inline] proto_register+0x444/0x8f0 net/core/sock.c:3375 nr_proto_init+0x73/0x4b3 [netrom] ? 0xffffffffc1628000 ? 0xffffffffc1628000 do_one_initcall+0xbc/0x47d init/main.c:887 do_init_module+0x1b5/0x547 kernel/module.c:3456 load_module+0x6405/0x8c10 kernel/module.c:3804 __do_sys_finit_module+0x162/0x190 kernel/module.c:3898 do_syscall_64+0x9f/0x450 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x462e99 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fe0361dec58 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 RAX: ffffffffffffffda RBX: 000000000073bf00 RCX: 0000000000462e99 RDX: 0000000000000000 RSI: 0000000020000100 RDI: 0000000000000003 RBP: 00007fe0361dec70 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fe0361df6bc R13: 00000000004bcefa R14: 00000000006f6fb0 R15: 0000000000000004 Modules linked in: netrom(+) ax25 fcrypt pcbc af_alg arizona_ldo1 v4l2_common videodev media v4l2_dv_timings hdlc ide_cd_mod snd_soc_sigmadsp_regmap snd_soc_sigmadsp intel_spi_platform intel_spi mtd spi_nor snd_usbmidi_lib usbcore lcd ti_ads7950 hi6421_regulator snd_soc_kbl_rt5663_max98927 snd_soc_hdac_hdmi snd_hda_ext_core snd_hda_core snd_soc_rt5663 snd_soc_core snd_pcm_dmaengine snd_compress snd_soc_rl6231 mac80211 rtc_rc5t583 spi_slave_time leds_pwm hid_gt683r hid industrialio_triggered_buffer kfifo_buf industrialio ir_kbd_i2c rc_core led_class_flash dwc_xlgmac snd_ymfpci gameport snd_mpu401_uart snd_rawmidi snd_ac97_codec snd_pcm ac97_bus snd_opl3_lib snd_timer snd_seq_device snd_hwdep snd soundcore iptable_security iptable_raw iptable_mangle iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 iptable_filter bpfilter ip6_vti ip_vti ip_gre ipip sit tunnel4 ip_tunnel hsr veth netdevsim vxcan batman_adv cfg80211 rfkill chnl_net caif nlmon dummy team bonding vcan bridge stp llc ip6_gre gre ip6_tunnel tunnel6 tun joydev mousedev ppdev tpm kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel ide_pci_generic piix aesni_intel aes_x86_64 crypto_simd cryptd glue_helper ide_core psmouse input_leds i2c_piix4 serio_raw intel_agp intel_gtt ata_generic agpgart pata_acpi parport_pc rtc_cmos parport floppy sch_fq_codel ip_tables x_tables sha1_ssse3 sha1_generic ipv6 [last unloaded: rxrpc] Dumping ftrace buffer: (ftrace buffer empty) CR2: fffffbfff830524b ---[ end trace 039ab24b305c4b19 ]--- If nr_proto_init failed, it may forget to call proto_unregister, tiggering this issue.This patch rearrange code of nr_proto_init to avoid such issues. Reported-by: Hulk Robot Signed-off-by: YueHaibing Signed-off-by: David S. Miller --- include/net/netrom.h | 2 +- net/netrom/af_netrom.c | 76 ++++++++++++++++++++++++++++++------------ net/netrom/nr_loopback.c | 2 +- net/netrom/nr_route.c | 2 +- net/netrom/sysctl_net_netrom.c | 5 ++- 5 files changed, 61 insertions(+), 26 deletions(-) diff --git a/include/net/netrom.h b/include/net/netrom.h index 5a0714ff500f..80f15b1c1a48 100644 --- a/include/net/netrom.h +++ b/include/net/netrom.h @@ -266,7 +266,7 @@ void nr_stop_idletimer(struct sock *); int nr_t1timer_running(struct sock *); /* sysctl_net_netrom.c */ -void nr_register_sysctl(void); +int nr_register_sysctl(void); void nr_unregister_sysctl(void); #endif diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 1d3144d19903..71ffd1a6dc7c 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1392,18 +1392,22 @@ static int __init nr_proto_init(void) int i; int rc = proto_register(&nr_proto, 0); - if (rc != 0) - goto out; + if (rc) + return rc; if (nr_ndevs > 0x7fffffff/sizeof(struct net_device *)) { - printk(KERN_ERR "NET/ROM: nr_proto_init - nr_ndevs parameter to large\n"); - return -1; + pr_err("NET/ROM: %s - nr_ndevs parameter too large\n", + __func__); + rc = -EINVAL; + goto unregister_proto; } dev_nr = kcalloc(nr_ndevs, sizeof(struct net_device *), GFP_KERNEL); - if (dev_nr == NULL) { - printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device array\n"); - return -1; + if (!dev_nr) { + pr_err("NET/ROM: %s - unable to allocate device array\n", + __func__); + rc = -ENOMEM; + goto unregister_proto; } for (i = 0; i < nr_ndevs; i++) { @@ -1413,13 +1417,13 @@ static int __init nr_proto_init(void) sprintf(name, "nr%d", i); dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, nr_setup); if (!dev) { - printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device structure\n"); + rc = -ENOMEM; goto fail; } dev->base_addr = i; - if (register_netdev(dev)) { - printk(KERN_ERR "NET/ROM: nr_proto_init - unable to register network device\n"); + rc = register_netdev(dev); + if (rc) { free_netdev(dev); goto fail; } @@ -1427,36 +1431,64 @@ static int __init nr_proto_init(void) dev_nr[i] = dev; } - if (sock_register(&nr_family_ops)) { - printk(KERN_ERR "NET/ROM: nr_proto_init - unable to register socket family\n"); + rc = sock_register(&nr_family_ops); + if (rc) goto fail; - } - register_netdevice_notifier(&nr_dev_notifier); + rc = register_netdevice_notifier(&nr_dev_notifier); + if (rc) + goto out_sock; ax25_register_pid(&nr_pid); ax25_linkfail_register(&nr_linkfail_notifier); #ifdef CONFIG_SYSCTL - nr_register_sysctl(); + rc = nr_register_sysctl(); + if (rc) + goto out_sysctl; #endif nr_loopback_init(); - proc_create_seq("nr", 0444, init_net.proc_net, &nr_info_seqops); - proc_create_seq("nr_neigh", 0444, init_net.proc_net, &nr_neigh_seqops); - proc_create_seq("nr_nodes", 0444, init_net.proc_net, &nr_node_seqops); -out: - return rc; + rc = -ENOMEM; + if (!proc_create_seq("nr", 0444, init_net.proc_net, &nr_info_seqops)) + goto proc_remove1; + if (!proc_create_seq("nr_neigh", 0444, init_net.proc_net, + &nr_neigh_seqops)) + goto proc_remove2; + if (!proc_create_seq("nr_nodes", 0444, init_net.proc_net, + &nr_node_seqops)) + goto proc_remove3; + + return 0; + +proc_remove3: + remove_proc_entry("nr_neigh", init_net.proc_net); +proc_remove2: + remove_proc_entry("nr", init_net.proc_net); +proc_remove1: + + nr_loopback_clear(); + nr_rt_free(); + +#ifdef CONFIG_SYSCTL + nr_unregister_sysctl(); +out_sysctl: +#endif + ax25_linkfail_release(&nr_linkfail_notifier); + ax25_protocol_release(AX25_P_NETROM); + unregister_netdevice_notifier(&nr_dev_notifier); +out_sock: + sock_unregister(PF_NETROM); fail: while (--i >= 0) { unregister_netdev(dev_nr[i]); free_netdev(dev_nr[i]); } kfree(dev_nr); +unregister_proto: proto_unregister(&nr_proto); - rc = -1; - goto out; + return rc; } module_init(nr_proto_init); diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c index 215ad22a9647..93d13f019981 100644 --- a/net/netrom/nr_loopback.c +++ b/net/netrom/nr_loopback.c @@ -70,7 +70,7 @@ static void nr_loopback_timer(struct timer_list *unused) } } -void __exit nr_loopback_clear(void) +void nr_loopback_clear(void) { del_timer_sync(&loopback_timer); skb_queue_purge(&loopback_queue); diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 6485f593e2f0..b76aa668a94b 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -953,7 +953,7 @@ const struct seq_operations nr_neigh_seqops = { /* * Free all memory associated with the nodes and routes lists. */ -void __exit nr_rt_free(void) +void nr_rt_free(void) { struct nr_neigh *s = NULL; struct nr_node *t = NULL; diff --git a/net/netrom/sysctl_net_netrom.c b/net/netrom/sysctl_net_netrom.c index ba1c368b3f18..771011b84270 100644 --- a/net/netrom/sysctl_net_netrom.c +++ b/net/netrom/sysctl_net_netrom.c @@ -146,9 +146,12 @@ static struct ctl_table nr_table[] = { { } }; -void __init nr_register_sysctl(void) +int __init nr_register_sysctl(void) { nr_table_header = register_net_sysctl(&init_net, "net/netrom", nr_table); + if (!nr_table_header) + return -ENOMEM; + return 0; } void nr_unregister_sysctl(void) -- cgit v1.2.3 From a5f622984a623df9a84cf43f6b098d8dd76fbe05 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 9 Apr 2019 14:23:10 -0700 Subject: selftests: fib_tests: Fix 'Command line is not complete' errors A couple of tests are verifying a route has been removed. The helper expects the prefix as the first part of the expected output. When checking that a route has been deleted the prefix is empty leading to an invalid ip command: $ ip ro ls match Command line is not complete. Try option "help" Fix by moving the comparison of expected output and output to a new function that is used by both check_route and check_route6. Use the new helper for the 2 checks on route removal. Also, remove the reset of 'set -x' in route_setup which overrides the user managed setting. Fixes: d69faad76584c ("selftests: fib_tests: Add prefix route tests with metric") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/fib_tests.sh | 94 ++++++++++++++------------------ 1 file changed, 40 insertions(+), 54 deletions(-) diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 1080ff55a788..0d2a5f4f1e63 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -605,6 +605,39 @@ run_cmd() return $rc } +check_expected() +{ + local out="$1" + local expected="$2" + local rc=0 + + [ "${out}" = "${expected}" ] && return 0 + + if [ -z "${out}" ]; then + if [ "$VERBOSE" = "1" ]; then + printf "\nNo route entry found\n" + printf "Expected:\n" + printf " ${expected}\n" + fi + return 1 + fi + + # tricky way to convert output to 1-line without ip's + # messy '\'; this drops all extra white space + out=$(echo ${out}) + if [ "${out}" != "${expected}" ]; then + rc=1 + if [ "${VERBOSE}" = "1" ]; then + printf " Unexpected route entry. Have:\n" + printf " ${out}\n" + printf " Expected:\n" + printf " ${expected}\n\n" + fi + fi + + return $rc +} + # add route for a prefix, flushing any existing routes first # expected to be the first step of a test add_route6() @@ -652,31 +685,7 @@ check_route6() pfx=$1 out=$($IP -6 ro ls match ${pfx} | sed -e 's/ pref medium//') - [ "${out}" = "${expected}" ] && return 0 - - if [ -z "${out}" ]; then - if [ "$VERBOSE" = "1" ]; then - printf "\nNo route entry found\n" - printf "Expected:\n" - printf " ${expected}\n" - fi - return 1 - fi - - # tricky way to convert output to 1-line without ip's - # messy '\'; this drops all extra white space - out=$(echo ${out}) - if [ "${out}" != "${expected}" ]; then - rc=1 - if [ "${VERBOSE}" = "1" ]; then - printf " Unexpected route entry. Have:\n" - printf " ${out}\n" - printf " Expected:\n" - printf " ${expected}\n\n" - fi - fi - - return $rc + check_expected "${out}" "${expected}" } route_cleanup() @@ -725,7 +734,7 @@ route_setup() ip -netns ns2 addr add 172.16.103.2/24 dev veth4 ip -netns ns2 addr add 172.16.104.1/24 dev dummy1 - set +ex + set +e } # assumption is that basic add of a single path route works @@ -960,7 +969,8 @@ ipv6_addr_metric_test() run_cmd "$IP li set dev dummy2 down" rc=$? if [ $rc -eq 0 ]; then - check_route6 "" + out=$($IP -6 ro ls match 2001:db8:104::/64) + check_expected "${out}" "" rc=$? fi log_test $rc 0 "Prefix route removed on link down" @@ -1091,38 +1101,13 @@ check_route() local pfx local expected="$1" local out - local rc=0 set -- $expected pfx=$1 [ "${pfx}" = "unreachable" ] && pfx=$2 out=$($IP ro ls match ${pfx}) - [ "${out}" = "${expected}" ] && return 0 - - if [ -z "${out}" ]; then - if [ "$VERBOSE" = "1" ]; then - printf "\nNo route entry found\n" - printf "Expected:\n" - printf " ${expected}\n" - fi - return 1 - fi - - # tricky way to convert output to 1-line without ip's - # messy '\'; this drops all extra white space - out=$(echo ${out}) - if [ "${out}" != "${expected}" ]; then - rc=1 - if [ "${VERBOSE}" = "1" ]; then - printf " Unexpected route entry. Have:\n" - printf " ${out}\n" - printf " Expected:\n" - printf " ${expected}\n\n" - fi - fi - - return $rc + check_expected "${out}" "${expected}" } # assumption is that basic add of a single path route works @@ -1387,7 +1372,8 @@ ipv4_addr_metric_test() run_cmd "$IP li set dev dummy2 down" rc=$? if [ $rc -eq 0 ]; then - check_route "" + out=$($IP ro ls match 172.16.104.0/24) + check_expected "${out}" "" rc=$? fi log_test $rc 0 "Prefix route removed on link down" -- cgit v1.2.3 From a89afe58f1a74aac768a5eb77af95ef4ee15beaa Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 12 Apr 2019 10:09:16 +0800 Subject: block: fix the return errno for direct IO If the last bio returned is not dio->bio, the status of the bio will not assigned to dio->bio if it is error. This will cause the whole IO status wrong. ksoftirqd/21-117 [021] ..s. 4017.966090: 8,0 C N 4883648 [0] -0 [018] ..s. 4017.970888: 8,0 C WS 4924800 + 1024 [0] -0 [018] ..s. 4017.970909: 8,0 D WS 4935424 + 1024 [] -0 [018] ..s. 4017.970924: 8,0 D WS 4936448 + 321 [] ksoftirqd/21-117 [021] ..s. 4017.995033: 8,0 C R 4883648 + 336 [65475] ksoftirqd/21-117 [021] d.s. 4018.001988: myprobe1: (blkdev_bio_end_io+0x0/0x168) bi_status=7 ksoftirqd/21-117 [021] d.s. 4018.001992: myprobe: (aio_complete_rw+0x0/0x148) x0=0xffff802f2595ad80 res=0x12a000 res2=0x0 We always have to assign bio->bi_status to dio->bio.bi_status because we will only check dio->bio.bi_status when we return the whole IO to the upper layer. Fixes: 542ff7bf18c6 ("block: new direct I/O implementation") Cc: stable@vger.kernel.org Cc: Christoph Hellwig Cc: Jens Axboe Reviewed-by: Ming Lei Signed-off-by: Jason Yan Signed-off-by: Jens Axboe --- fs/block_dev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 78d3257435c0..24615c76c1d0 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -307,10 +307,10 @@ static void blkdev_bio_end_io(struct bio *bio) struct blkdev_dio *dio = bio->bi_private; bool should_dirty = dio->should_dirty; - if (dio->multi_bio && !atomic_dec_and_test(&dio->ref)) { - if (bio->bi_status && !dio->bio.bi_status) - dio->bio.bi_status = bio->bi_status; - } else { + if (bio->bi_status && !dio->bio.bi_status) + dio->bio.bi_status = bio->bi_status; + + if (!dio->multi_bio || atomic_dec_and_test(&dio->ref)) { if (!dio->is_sync) { struct kiocb *iocb = dio->iocb; ssize_t ret; -- cgit v1.2.3 From e3058450965972e67cc0e5492c08c4cdadafc134 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 11 Apr 2019 05:55:23 -0700 Subject: dctcp: more accurate tracking of packets delivery After commit e21db6f69a95 ("tcp: track total bytes delivered with ECN CE marks") core TCP stack does a very good job tracking ECN signals. The "sender's best estimate of CE information" Yuchung mentioned in his patch is indeed the best we can do. DCTCP can use tp->delivered_ce and tp->delivered to not duplicate the logic, and use the existing best estimate. This solves some problems, since current DCTCP logic does not deal with losses and/or GRO or ack aggregation very well. This also removes a dubious use of inet_csk(sk)->icsk_ack.rcv_mss (this should have been tp->mss_cache), and a 64 bit divide. Finally, we can see that the DCTCP logic, calling dctcp_update_alpha() for every ACK could be done differently, calling it only once per RTT. Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Cc: Neal Cardwell Cc: Soheil Hassas Yeganeh Cc: Florian Westphal Cc: Daniel Borkmann Cc: Lawrence Brakmo Cc: Abdul Kabbani Acked-by: Soheil Hassas Yeganeh Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_dctcp.c | 45 +++++++++++++++++---------------------------- 1 file changed, 17 insertions(+), 28 deletions(-) diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 359da68d7c06..477cb4aa456c 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -49,9 +49,8 @@ #define DCTCP_MAX_ALPHA 1024U struct dctcp { - u32 acked_bytes_ecn; - u32 acked_bytes_total; - u32 prior_snd_una; + u32 old_delivered; + u32 old_delivered_ce; u32 prior_rcv_nxt; u32 dctcp_alpha; u32 next_seq; @@ -73,8 +72,8 @@ static void dctcp_reset(const struct tcp_sock *tp, struct dctcp *ca) { ca->next_seq = tp->snd_nxt; - ca->acked_bytes_ecn = 0; - ca->acked_bytes_total = 0; + ca->old_delivered = tp->delivered; + ca->old_delivered_ce = tp->delivered_ce; } static void dctcp_init(struct sock *sk) @@ -86,7 +85,6 @@ static void dctcp_init(struct sock *sk) sk->sk_state == TCP_CLOSE)) { struct dctcp *ca = inet_csk_ca(sk); - ca->prior_snd_una = tp->snd_una; ca->prior_rcv_nxt = tp->rcv_nxt; ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA); @@ -118,37 +116,25 @@ static void dctcp_update_alpha(struct sock *sk, u32 flags) { const struct tcp_sock *tp = tcp_sk(sk); struct dctcp *ca = inet_csk_ca(sk); - u32 acked_bytes = tp->snd_una - ca->prior_snd_una; - - /* If ack did not advance snd_una, count dupack as MSS size. - * If ack did update window, do not count it at all. - */ - if (acked_bytes == 0 && !(flags & CA_ACK_WIN_UPDATE)) - acked_bytes = inet_csk(sk)->icsk_ack.rcv_mss; - if (acked_bytes) { - ca->acked_bytes_total += acked_bytes; - ca->prior_snd_una = tp->snd_una; - - if (flags & CA_ACK_ECE) - ca->acked_bytes_ecn += acked_bytes; - } /* Expired RTT */ if (!before(tp->snd_una, ca->next_seq)) { - u64 bytes_ecn = ca->acked_bytes_ecn; + u32 delivered_ce = tp->delivered_ce - ca->old_delivered_ce; u32 alpha = ca->dctcp_alpha; /* alpha = (1 - g) * alpha + g * F */ alpha -= min_not_zero(alpha, alpha >> dctcp_shift_g); - if (bytes_ecn) { + if (delivered_ce) { + u32 delivered = tp->delivered - ca->old_delivered; + /* If dctcp_shift_g == 1, a 32bit value would overflow - * after 8 Mbytes. + * after 8 M packets. */ - bytes_ecn <<= (10 - dctcp_shift_g); - do_div(bytes_ecn, max(1U, ca->acked_bytes_total)); + delivered_ce <<= (10 - dctcp_shift_g); + delivered_ce /= max(1U, delivered); - alpha = min(alpha + (u32)bytes_ecn, DCTCP_MAX_ALPHA); + alpha = min(alpha + delivered_ce, DCTCP_MAX_ALPHA); } /* dctcp_alpha can be read from dctcp_get_info() without * synchro, so we ask compiler to not use dctcp_alpha @@ -200,6 +186,7 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info) { const struct dctcp *ca = inet_csk_ca(sk); + const struct tcp_sock *tp = tcp_sk(sk); /* Fill it also in case of VEGASINFO due to req struct limits. * We can still correctly retrieve it later. @@ -211,8 +198,10 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr, info->dctcp.dctcp_enabled = 1; info->dctcp.dctcp_ce_state = (u16) ca->ce_state; info->dctcp.dctcp_alpha = ca->dctcp_alpha; - info->dctcp.dctcp_ab_ecn = ca->acked_bytes_ecn; - info->dctcp.dctcp_ab_tot = ca->acked_bytes_total; + info->dctcp.dctcp_ab_ecn = tp->mss_cache * + (tp->delivered_ce - ca->old_delivered_ce); + info->dctcp.dctcp_ab_tot = tp->mss_cache * + (tp->delivered - ca->old_delivered); } *attr = INET_DIAG_DCTCPINFO; -- cgit v1.2.3 From d15d9fd02575ecfada92d42f655940c4f10af842 Mon Sep 17 00:00:00 2001 From: Jonas Karlman Date: Wed, 20 Feb 2019 07:52:31 +0000 Subject: drm: bridge: dw-hdmi: Fix overflow workaround for Rockchip SoCs The Rockchip RK3288 SoC (v2.00a) and RK3328/RK3399 SoCs (v2.11a) have also been identified as needing this workaround with a single iteration. Fixes: be41fc55f1aa ("drm: bridge: dw-hdmi: Handle overflow workaround based on device version") Signed-off-by: Jonas Karlman Tested-by: Heiko Stueber Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/AM3PR03MB0966818FAAAE6192FF4ED11AAC7D0@AM3PR03MB0966.eurprd03.prod.outlook.com --- drivers/gpu/drm/bridge/synopsys/dw-hdmi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index db761329a1e3..b74ccb6a24c2 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -1800,6 +1800,8 @@ static void dw_hdmi_clear_overflow(struct dw_hdmi *hdmi) * iteration for others. * The Amlogic Meson GX SoCs (v2.01a) have been identified as needing * the workaround with a single iteration. + * The Rockchip RK3288 SoC (v2.00a) and RK3328/RK3399 SoCs (v2.11a) have + * been identified as needing the workaround with a single iteration. */ switch (hdmi->version) { @@ -1808,7 +1810,9 @@ static void dw_hdmi_clear_overflow(struct dw_hdmi *hdmi) break; case 0x131a: case 0x132a: + case 0x200a: case 0x201a: + case 0x211a: case 0x212a: count = 1; break; -- cgit v1.2.3 From 1d54ad944074010609562da5c89e4f5df2f4e5db Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 4 Apr 2019 15:03:00 +0200 Subject: perf/core: Fix perf_event_disable_inatomic() race Thomas-Mich Richter reported he triggered a WARN()ing from event_function_local() on his s390. The problem boils down to: CPU-A CPU-B perf_event_overflow() perf_event_disable_inatomic() @pending_disable = 1 irq_work_queue(); sched-out event_sched_out() @pending_disable = 0 sched-in perf_event_overflow() perf_event_disable_inatomic() @pending_disable = 1; irq_work_queue(); // FAILS irq_work_run() perf_pending_event() if (@pending_disable) perf_event_disable_local(); // WHOOPS The problem exists in generic, but s390 is particularly sensitive because it doesn't implement arch_irq_work_raise(), nor does it call irq_work_run() from it's PMU interrupt handler (nor would that be sufficient in this case, because s390 also generates perf_event_overflow() from pmu::stop). Add to that the fact that s390 is a virtual architecture and (virtual) CPU-A can stall long enough for the above race to happen, even if it would self-IPI. Adding a irq_work_sync() to event_sched_in() would work for all hardare PMUs that properly use irq_work_run() but fails for software PMUs. Instead encode the CPU number in @pending_disable, such that we can tell which CPU requested the disable. This then allows us to detect the above scenario and even redirect the IPI to make up for the failed queue. Reported-by: Thomas-Mich Richter Tested-by: Thomas Richter Signed-off-by: Peter Zijlstra (Intel) Acked-by: Mark Rutland Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Heiko Carstens Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Kees Cook Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- kernel/events/core.c | 52 +++++++++++++++++++++++++++++++++++++-------- kernel/events/ring_buffer.c | 4 ++-- 2 files changed, 45 insertions(+), 11 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 72d06e302e99..534e01e7bc36 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2009,8 +2009,8 @@ event_sched_out(struct perf_event *event, event->pmu->del(event, 0); event->oncpu = -1; - if (event->pending_disable) { - event->pending_disable = 0; + if (READ_ONCE(event->pending_disable) >= 0) { + WRITE_ONCE(event->pending_disable, -1); state = PERF_EVENT_STATE_OFF; } perf_event_set_state(event, state); @@ -2198,7 +2198,8 @@ EXPORT_SYMBOL_GPL(perf_event_disable); void perf_event_disable_inatomic(struct perf_event *event) { - event->pending_disable = 1; + WRITE_ONCE(event->pending_disable, smp_processor_id()); + /* can fail, see perf_pending_event_disable() */ irq_work_queue(&event->pending); } @@ -5810,10 +5811,45 @@ void perf_event_wakeup(struct perf_event *event) } } +static void perf_pending_event_disable(struct perf_event *event) +{ + int cpu = READ_ONCE(event->pending_disable); + + if (cpu < 0) + return; + + if (cpu == smp_processor_id()) { + WRITE_ONCE(event->pending_disable, -1); + perf_event_disable_local(event); + return; + } + + /* + * CPU-A CPU-B + * + * perf_event_disable_inatomic() + * @pending_disable = CPU-A; + * irq_work_queue(); + * + * sched-out + * @pending_disable = -1; + * + * sched-in + * perf_event_disable_inatomic() + * @pending_disable = CPU-B; + * irq_work_queue(); // FAILS + * + * irq_work_run() + * perf_pending_event() + * + * But the event runs on CPU-B and wants disabling there. + */ + irq_work_queue_on(&event->pending, cpu); +} + static void perf_pending_event(struct irq_work *entry) { - struct perf_event *event = container_of(entry, - struct perf_event, pending); + struct perf_event *event = container_of(entry, struct perf_event, pending); int rctx; rctx = perf_swevent_get_recursion_context(); @@ -5822,10 +5858,7 @@ static void perf_pending_event(struct irq_work *entry) * and we won't recurse 'further'. */ - if (event->pending_disable) { - event->pending_disable = 0; - perf_event_disable_local(event); - } + perf_pending_event_disable(event); if (event->pending_wakeup) { event->pending_wakeup = 0; @@ -10236,6 +10269,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, init_waitqueue_head(&event->waitq); + event->pending_disable = -1; init_irq_work(&event->pending, perf_pending_event); mutex_init(&event->mmap_mutex); diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index a4047321d7d8..2545ac08cc77 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -392,7 +392,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, * store that will be enabled on successful return */ if (!handle->size) { /* A, matches D */ - event->pending_disable = 1; + event->pending_disable = smp_processor_id(); perf_output_wakeup(handle); local_set(&rb->aux_nest, 0); goto err_put; @@ -480,7 +480,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) if (wakeup) { if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED) - handle->event->pending_disable = 1; + handle->event->pending_disable = smp_processor_id(); perf_output_wakeup(handle); } -- cgit v1.2.3 From 837f74116585dcd235fae1696e1e1471b6bb9e01 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 9 Apr 2019 17:16:59 +0200 Subject: xfrm: update doc about xfrm[46]_gc_thresh Those entries are not used anymore. CC: Florian Westphal Fixes: 09c7570480f7 ("xfrm: remove flow cache") Signed-off-by: Nicolas Dichtel Signed-off-by: Steffen Klassert --- Documentation/networking/ip-sysctl.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index acdfb5d2bcaa..f1843b132453 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1336,6 +1336,7 @@ tag - INTEGER Default value is 0. xfrm4_gc_thresh - INTEGER + (Obsolete since linux-4.14) The threshold at which we will start garbage collecting for IPv4 destination cache entries. At twice this value the system will refuse new allocations. @@ -1919,6 +1920,7 @@ echo_ignore_all - BOOLEAN Default: 0 xfrm6_gc_thresh - INTEGER + (Obsolete since linux-4.14) The threshold at which we will start garbage collecting for IPv6 destination cache entries. At twice this value the system will refuse new allocations. -- cgit v1.2.3 From f16628d6e8c6616b071ffe775908b95e07404cab Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 30 Mar 2019 09:20:16 +0100 Subject: clang-format: Update with the latest for_each macro list Re-run the shell fragment that generated the original list now that there are two dozens of new entries after v5.1's merge window. Signed-off-by: Miguel Ojeda --- .clang-format | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/.clang-format b/.clang-format index f49620f506f1..f3923a1f9858 100644 --- a/.clang-format +++ b/.clang-format @@ -78,6 +78,8 @@ ForEachMacros: - 'ata_qc_for_each_with_internal' - 'ax25_for_each' - 'ax25_uid_for_each' + - '__bio_for_each_bvec' + - 'bio_for_each_bvec' - 'bio_for_each_integrity_vec' - '__bio_for_each_segment' - 'bio_for_each_segment' @@ -118,10 +120,12 @@ ForEachMacros: - 'drm_for_each_legacy_plane' - 'drm_for_each_plane' - 'drm_for_each_plane_mask' + - 'drm_for_each_privobj' - 'drm_mm_for_each_hole' - 'drm_mm_for_each_node' - 'drm_mm_for_each_node_in_range' - 'drm_mm_for_each_node_safe' + - 'flow_action_for_each' - 'for_each_active_drhd_unit' - 'for_each_active_iommu' - 'for_each_available_child_of_node' @@ -158,6 +162,9 @@ ForEachMacros: - 'for_each_dss_dev' - 'for_each_efi_memory_desc' - 'for_each_efi_memory_desc_in_map' + - 'for_each_element' + - 'for_each_element_extid' + - 'for_each_element_id' - 'for_each_endpoint_of_node' - 'for_each_evictable_lru' - 'for_each_fib6_node_rt_rcu' @@ -195,6 +202,7 @@ ForEachMacros: - 'for_each_net_rcu' - 'for_each_new_connector_in_state' - 'for_each_new_crtc_in_state' + - 'for_each_new_mst_mgr_in_state' - 'for_each_new_plane_in_state' - 'for_each_new_private_obj_in_state' - 'for_each_node' @@ -210,8 +218,10 @@ ForEachMacros: - 'for_each_of_pci_range' - 'for_each_old_connector_in_state' - 'for_each_old_crtc_in_state' + - 'for_each_old_mst_mgr_in_state' - 'for_each_oldnew_connector_in_state' - 'for_each_oldnew_crtc_in_state' + - 'for_each_oldnew_mst_mgr_in_state' - 'for_each_oldnew_plane_in_state' - 'for_each_oldnew_plane_in_state_reverse' - 'for_each_oldnew_private_obj_in_state' @@ -243,6 +253,9 @@ ForEachMacros: - 'for_each_sg_dma_page' - 'for_each_sg_page' - 'for_each_sibling_event' + - 'for_each_subelement' + - 'for_each_subelement_extid' + - 'for_each_subelement_id' - '__for_each_thread' - 'for_each_thread' - 'for_each_zone' @@ -252,6 +265,8 @@ ForEachMacros: - 'fwnode_for_each_child_node' - 'fwnode_graph_for_each_endpoint' - 'gadget_for_each_ep' + - 'genradix_for_each' + - 'genradix_for_each_from' - 'hash_for_each' - 'hash_for_each_possible' - 'hash_for_each_possible_rcu' @@ -293,7 +308,11 @@ ForEachMacros: - 'key_for_each' - 'key_for_each_safe' - 'klp_for_each_func' + - 'klp_for_each_func_safe' + - 'klp_for_each_func_static' - 'klp_for_each_object' + - 'klp_for_each_object_safe' + - 'klp_for_each_object_static' - 'kvm_for_each_memslot' - 'kvm_for_each_vcpu' - 'list_for_each' @@ -324,6 +343,8 @@ ForEachMacros: - 'media_device_for_each_intf' - 'media_device_for_each_link' - 'media_device_for_each_pad' + - 'mp_bvec_for_each_page' + - 'mp_bvec_for_each_segment' - 'nanddev_io_for_each_page' - 'netdev_for_each_lower_dev' - 'netdev_for_each_lower_private' @@ -375,6 +396,7 @@ ForEachMacros: - 'rht_for_each_rcu' - 'rht_for_each_rcu_continue' - '__rq_for_each_bio' + - 'rq_for_each_bvec' - 'rq_for_each_segment' - 'scsi_for_each_prot_sg' - 'scsi_for_each_sg' @@ -410,6 +432,8 @@ ForEachMacros: - 'v4l2_m2m_for_each_src_buf_safe' - 'virtio_device_for_each_vq' - 'xa_for_each' + - 'xa_for_each_marked' + - 'xa_for_each_start' - 'xas_for_each' - 'xas_for_each_conflict' - 'xas_for_each_marked' -- cgit v1.2.3 From 3c677d206210f53a4be972211066c0f1cd47fe12 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 12 Apr 2019 12:50:31 +0200 Subject: iommu/amd: Set exclusion range correctly The exlcusion range limit register needs to contain the base-address of the last page that is part of the range, as bits 0-11 of this register are treated as 0xfff by the hardware for comparisons. So correctly set the exclusion range in the hardware to the last page which is _in_ the range. Fixes: b2026aa2dce44 ('x86, AMD IOMMU: add functions for programming IOMMU MMIO space') Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 1b1378619fc9..ff40ba758cf3 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -359,7 +359,7 @@ static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val) static void iommu_set_exclusion_range(struct amd_iommu *iommu) { u64 start = iommu->exclusion_start & PAGE_MASK; - u64 limit = (start + iommu->exclusion_length) & PAGE_MASK; + u64 limit = (start + iommu->exclusion_length - 1) & PAGE_MASK; u64 entry; if (!iommu->exclusion_start) -- cgit v1.2.3 From 045afc24124d80c6998d9c770844c67912083506 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 8 Apr 2019 12:45:09 +0100 Subject: arm64: futex: Fix FUTEX_WAKE_OP atomic ops with non-zero result value Rather embarrassingly, our futex() FUTEX_WAKE_OP implementation doesn't explicitly set the return value on the non-faulting path and instead leaves it holding the result of the underlying atomic operation. This means that any FUTEX_WAKE_OP atomic operation which computes a non-zero value will be reported as having failed. Regrettably, I wrote the buggy code back in 2011 and it was upstreamed as part of the initial arm64 support in 2012. The reasons we appear to get away with this are: 1. FUTEX_WAKE_OP is rarely used and therefore doesn't appear to get exercised by futex() test applications 2. If the result of the atomic operation is zero, the system call behaves correctly 3. Prior to version 2.25, the only operation used by GLIBC set the futex to zero, and therefore worked as expected. From 2.25 onwards, FUTEX_WAKE_OP is not used by GLIBC at all. Fix the implementation by ensuring that the return value is either 0 to indicate that the atomic operation completed successfully, or -EFAULT if we encountered a fault when accessing the user mapping. Cc: Fixes: 6170a97460db ("arm64: Atomic operations") Signed-off-by: Will Deacon --- arch/arm64/include/asm/futex.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index cccb83ad7fa8..e1d95f08f8e1 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -30,8 +30,8 @@ do { \ " prfm pstl1strm, %2\n" \ "1: ldxr %w1, %2\n" \ insn "\n" \ -"2: stlxr %w3, %w0, %2\n" \ -" cbnz %w3, 1b\n" \ +"2: stlxr %w0, %w3, %2\n" \ +" cbnz %w0, 1b\n" \ " dmb ish\n" \ "3:\n" \ " .pushsection .fixup,\"ax\"\n" \ @@ -50,30 +50,30 @@ do { \ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *_uaddr) { - int oldval = 0, ret, tmp; + int oldval, ret, tmp; u32 __user *uaddr = __uaccess_mask_ptr(_uaddr); pagefault_disable(); switch (op) { case FUTEX_OP_SET: - __futex_atomic_op("mov %w0, %w4", + __futex_atomic_op("mov %w3, %w4", ret, oldval, uaddr, tmp, oparg); break; case FUTEX_OP_ADD: - __futex_atomic_op("add %w0, %w1, %w4", + __futex_atomic_op("add %w3, %w1, %w4", ret, oldval, uaddr, tmp, oparg); break; case FUTEX_OP_OR: - __futex_atomic_op("orr %w0, %w1, %w4", + __futex_atomic_op("orr %w3, %w1, %w4", ret, oldval, uaddr, tmp, oparg); break; case FUTEX_OP_ANDN: - __futex_atomic_op("and %w0, %w1, %w4", + __futex_atomic_op("and %w3, %w1, %w4", ret, oldval, uaddr, tmp, ~oparg); break; case FUTEX_OP_XOR: - __futex_atomic_op("eor %w0, %w1, %w4", + __futex_atomic_op("eor %w3, %w1, %w4", ret, oldval, uaddr, tmp, oparg); break; default: -- cgit v1.2.3 From b575f10dbd6f84c2c8744ff1f486bfae1e4f6f38 Mon Sep 17 00:00:00 2001 From: wentalou Date: Fri, 12 Apr 2019 15:01:14 +0800 Subject: drm/amdgpu: shadow in shadow_list without tbo.mem.start cause page fault in sriov TDR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit shadow was added into shadow_list by amdgpu_bo_create_shadow. meanwhile, shadow->tbo.mem was not fully configured. tbo.mem would be fully configured by amdgpu_vm_sdma_map_table until calling amdgpu_vm_clear_bo. If sriov TDR occurred between amdgpu_bo_create_shadow and amdgpu_vm_sdma_map_table, amdgpu_device_recover_vram would deal with shadow without tbo.mem.start. Signed-off-by: Wentao Lou Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5d8b30fd4534..79fb302fb954 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3165,6 +3165,7 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev) /* No need to recover an evicted BO */ if (shadow->tbo.mem.mem_type != TTM_PL_TT || + shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET || shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM) continue; -- cgit v1.2.3 From 1925e7d3d4677e681cc2e878c2bdbeaee988c8e2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 11 Apr 2019 14:54:40 -0500 Subject: drm/amdgpu/gmc9: fix VM_L2_CNTL3 programming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Got accidently dropped when 2+1 level support was added. Fixes: 6a42fd6fbf534096 ("drm/amdgpu: implement 2+1 PD support for Raven v3") Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index d0d966d6080a..1696644ec022 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -182,6 +182,7 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6); } + WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp); tmp = mmVM_L2_CNTL4_DEFAULT; tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0); -- cgit v1.2.3 From b3cf181c65c4d49f86b67b399fe7203ecac730a9 Mon Sep 17 00:00:00 2001 From: Weiyi Lu Date: Fri, 12 Apr 2019 11:30:27 +0800 Subject: clk: mediatek: fix clk-gate flag setting CLK_SET_RATE_PARENT would be dropped. Merge two flag setting together to correct the error. Fixes: 5a1cc4c27ad2 ("clk: mediatek: Add flags to mtk_gate") Cc: Signed-off-by: Weiyi Lu Reviewed-by: Matthias Brugger Signed-off-by: Stephen Boyd --- drivers/clk/mediatek/clk-gate.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/clk/mediatek/clk-gate.c b/drivers/clk/mediatek/clk-gate.c index 9628d4e7690b..85daf826619a 100644 --- a/drivers/clk/mediatek/clk-gate.c +++ b/drivers/clk/mediatek/clk-gate.c @@ -169,11 +169,10 @@ struct clk *mtk_clk_register_gate( return ERR_PTR(-ENOMEM); init.name = name; - init.flags = CLK_SET_RATE_PARENT; + init.flags = flags | CLK_SET_RATE_PARENT; init.parent_names = parent_name ? &parent_name : NULL; init.num_parents = parent_name ? 1 : 0; init.ops = ops; - init.flags = flags; cg->regmap = regmap; cg->set_ofs = set_ofs; -- cgit v1.2.3 From dd3ac9a684358b8c1d5c432ca8322aaf5e4f28ee Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 12 Apr 2019 19:51:52 +0900 Subject: net/rds: Check address length before reading address family syzbot is reporting uninitialized value at rds_connect() [1] and rds_bind() [2]. This is because syzbot is passing ulen == 0 whereas these functions expect that it is safe to access sockaddr->family field in order to determine minimal address length for validation. [1] https://syzkaller.appspot.com/bug?id=f4e61c010416c1e6f0fa3ffe247561b60a50ad71 [2] https://syzkaller.appspot.com/bug?id=a4bf9e41b7e055c3823fdcd83e8c58ca7270e38f Reported-by: syzbot Reported-by: syzbot Signed-off-by: Tetsuo Handa Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/af_rds.c | 3 +++ net/rds/bind.c | 2 ++ 2 files changed, 5 insertions(+) diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index d6cc97fbbbb0..2b969f99ef13 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -543,6 +543,9 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr, struct rds_sock *rs = rds_sk_to_rs(sk); int ret = 0; + if (addr_len < offsetofend(struct sockaddr, sa_family)) + return -EINVAL; + lock_sock(sk); switch (uaddr->sa_family) { diff --git a/net/rds/bind.c b/net/rds/bind.c index 17c9d9f0c848..0f4398e7f2a7 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -173,6 +173,8 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) /* We allow an RDS socket to be bound to either IPv4 or IPv6 * address. */ + if (addr_len < offsetofend(struct sockaddr, sa_family)) + return -EINVAL; if (uaddr->sa_family == AF_INET) { struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; -- cgit v1.2.3 From 238ffdc49ef98b15819cfd5e3fb23194e3ea3d39 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 12 Apr 2019 19:52:36 +0900 Subject: mISDN: Check address length before reading address family KMSAN will complain if valid address length passed to bind() is shorter than sizeof("struct sockaddr_mISDN"->family) bytes. Signed-off-by: Tetsuo Handa Signed-off-by: David S. Miller --- drivers/isdn/mISDN/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index 4ab8b1b6608f..a14e35d40538 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -710,10 +710,10 @@ base_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) struct sock *sk = sock->sk; int err = 0; - if (!maddr || maddr->family != AF_ISDN) + if (addr_len < sizeof(struct sockaddr_mISDN)) return -EINVAL; - if (addr_len < sizeof(struct sockaddr_mISDN)) + if (!maddr || maddr->family != AF_ISDN) return -EINVAL; lock_sock(sk); -- cgit v1.2.3 From 175f7c1f01d30b2088491bee4636fbf846fb76ce Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 12 Apr 2019 19:53:10 +0900 Subject: sctp: Check address length before reading address family KMSAN will complain if valid address length passed to connect() is shorter than sizeof("struct sockaddr"->sa_family) bytes. Signed-off-by: Tetsuo Handa Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9874e60c9b0d..4583fa914e62 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4847,7 +4847,8 @@ static int sctp_connect(struct sock *sk, struct sockaddr *addr, } /* Validate addr_len before calling common connect/connectx routine. */ - af = sctp_get_af_specific(addr->sa_family); + af = addr_len < offsetofend(struct sockaddr, sa_family) ? NULL : + sctp_get_af_specific(addr->sa_family); if (!af || addr_len < af->sockaddr_len) { err = -EINVAL; } else { -- cgit v1.2.3 From d852be84770c0611f8b76bd7046c6a814c5b9f11 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 12 Apr 2019 19:53:38 +0900 Subject: net: netlink: Check address length before reading groups field KMSAN will complain if valid address length passed to bind() is shorter than sizeof(struct sockaddr_nl) bytes. Signed-off-by: Tetsuo Handa Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index f28e937320a3..216ab915dd54 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -988,7 +988,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, struct netlink_sock *nlk = nlk_sk(sk); struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; int err = 0; - unsigned long groups = nladdr->nl_groups; + unsigned long groups; bool bound; if (addr_len < sizeof(struct sockaddr_nl)) @@ -996,6 +996,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, if (nladdr->nl_family != AF_NETLINK) return -EINVAL; + groups = nladdr->nl_groups; /* Only superuser is allowed to listen multicasts */ if (groups) { -- cgit v1.2.3 From a9107a14a9b9112775459ad291fc5de0f2513ce0 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 12 Apr 2019 19:54:05 +0900 Subject: rxrpc: Check address length before reading srx_service field KMSAN will complain if valid address length passed to bind() is shorter than sizeof(struct sockaddr_rxrpc) bytes. Signed-off-by: Tetsuo Handa Signed-off-by: David S. Miller --- net/rxrpc/af_rxrpc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 96f2952bbdfd..c54dce3ca0dd 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -135,7 +135,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *)saddr; struct rxrpc_local *local; struct rxrpc_sock *rx = rxrpc_sk(sock->sk); - u16 service_id = srx->srx_service; + u16 service_id; int ret; _enter("%p,%p,%d", rx, saddr, len); @@ -143,6 +143,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) ret = rxrpc_validate_address(rx, srx, len); if (ret < 0) goto error; + service_id = srx->srx_service; lock_sock(&rx->sk); -- cgit v1.2.3 From bd7d46ddca06f1fadd68ceb99bc6e6f808ab50f2 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 12 Apr 2019 19:54:33 +0900 Subject: Bluetooth: Check address length before reading address field KMSAN will complain if valid address length passed to bind() is shorter than sizeof(struct sockaddr_sco) bytes. Signed-off-by: Tetsuo Handa Signed-off-by: David S. Miller --- net/bluetooth/sco.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 9a580999ca57..d892b7c3cc42 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -523,12 +523,12 @@ static int sco_sock_bind(struct socket *sock, struct sockaddr *addr, struct sock *sk = sock->sk; int err = 0; - BT_DBG("sk %p %pMR", sk, &sa->sco_bdaddr); - if (!addr || addr_len < sizeof(struct sockaddr_sco) || addr->sa_family != AF_BLUETOOTH) return -EINVAL; + BT_DBG("sk %p %pMR", sk, &sa->sco_bdaddr); + lock_sock(sk); if (sk->sk_state != BT_OPEN) { -- cgit v1.2.3 From c68e747d0a98f44a4e49071940a692fa83630e47 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 12 Apr 2019 19:55:14 +0900 Subject: llc: Check address length before reading address field KMSAN will complain if valid address length passed to bind() is shorter than sizeof(struct sockaddr_llc) bytes. Signed-off-by: Tetsuo Handa Signed-off-by: David S. Miller --- net/llc/af_llc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index b99e73a7e7e0..2017b7d780f5 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -320,14 +320,13 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) struct llc_sap *sap; int rc = -EINVAL; - dprintk("%s: binding %02X\n", __func__, addr->sllc_sap); - lock_sock(sk); if (unlikely(!sock_flag(sk, SOCK_ZAPPED) || addrlen != sizeof(*addr))) goto out; rc = -EAFNOSUPPORT; if (unlikely(addr->sllc_family != AF_LLC)) goto out; + dprintk("%s: binding %02X\n", __func__, addr->sllc_sap); rc = -ENODEV; rcu_read_lock(); if (sk->sk_bound_dev_if) { -- cgit v1.2.3 From ba024f2574a19557f92116ec6be129b26ae66e97 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 12 Apr 2019 19:55:47 +0900 Subject: bpf: Check address length before reading address family KMSAN will complain if valid address length passed to bpf_bind() is shorter than sizeof("struct sockaddr"->sa_family) bytes. Signed-off-by: Tetsuo Handa Acked-by: Andrey Ignatov Signed-off-by: David S. Miller --- net/core/filter.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/filter.c b/net/core/filter.c index fc92ebc4e200..27e61ffd9039 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4383,6 +4383,8 @@ BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr, * Only binding to IP is supported. */ err = -EINVAL; + if (addr_len < offsetofend(struct sockaddr, sa_family)) + return err; if (addr->sa_family == AF_INET) { if (addr_len < sizeof(struct sockaddr_in)) return err; -- cgit v1.2.3 From bddc028a4f2ac8cf4d0cd1c696b5f95d8305a553 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 12 Apr 2019 19:56:39 +0900 Subject: udpv6: Check address length before reading address family KMSAN will complain if valid address length passed to udpv6_pre_connect() is shorter than sizeof("struct sockaddr"->sa_family) bytes. (This patch is bogus if it is guaranteed that udpv6_pre_connect() is always called after checking "struct sockaddr"->sa_family. In that case, we want a comment why we don't need to check valid address length here.) Signed-off-by: Tetsuo Handa Acked-by: Song Liu Signed-off-by: David S. Miller --- net/ipv6/udp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b444483cdb2b..622eeaf5732b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1047,6 +1047,8 @@ static void udp_v6_flush_pending_frames(struct sock *sk) static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { + if (addr_len < offsetofend(struct sockaddr, sa_family)) + return -EINVAL; /* The following checks are replicated from __ip6_datagram_connect() * and intended to prevent BPF program called below from accessing * bytes that are out of the bound specified by user in addr_len. -- cgit v1.2.3 From 2170e2157d7c5398f84477935553d63a93a1f6b8 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 26 Mar 2019 09:34:19 +0100 Subject: mt76: mt7603: add missing initialization for dev->ps_lock Fixes lockdep complaint and a potential race condition Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt76/mt7603/init.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/init.c b/drivers/net/wireless/mediatek/mt76/mt7603/init.c index d54dda67d036..3af45949e868 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/init.c @@ -510,6 +510,8 @@ int mt7603_register_device(struct mt7603_dev *dev) bus_ops->rmw = mt7603_rmw; dev->mt76.bus = bus_ops; + spin_lock_init(&dev->ps_lock); + INIT_DELAYED_WORK(&dev->mac_work, mt7603_mac_work); tasklet_init(&dev->pre_tbtt_tasklet, mt7603_pre_tbtt_tasklet, (unsigned long)dev); -- cgit v1.2.3 From aa3cb24be18b9b537750c354c5cff96c3d17ae44 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 26 Mar 2019 09:34:20 +0100 Subject: mt76: mt7603: fix sequence number assignment If the MT_TXD3_SN_VALID flag is not set in the tx descriptor, the hardware assigns the sequence number. However, the rest of the code assumes that the sequence number specified in the 802.11 header gets transmitted. This was causing issues with the aggregation setup, which worked for the initial one (where the sequence numbers were still close), but not for further teardown/re-establishing of sessions. Additionally, the overwrite of the TID sequence number in WTBL2 was resetting the hardware assigned sequence numbers, causing them to drift further apart. Fix this by using the software assigned sequence numbers Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt76/mt7603/mac.c | 53 ++++++---------------- drivers/net/wireless/mediatek/mt76/mt7603/main.c | 6 +-- drivers/net/wireless/mediatek/mt76/mt7603/mt7603.h | 2 +- 3 files changed, 18 insertions(+), 43 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c index 5e31d7da96fc..5abc02b57818 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c @@ -343,7 +343,7 @@ void mt7603_mac_rx_ba_reset(struct mt7603_dev *dev, void *addr, u8 tid) MT_BA_CONTROL_1_RESET)); } -void mt7603_mac_tx_ba_reset(struct mt7603_dev *dev, int wcid, int tid, int ssn, +void mt7603_mac_tx_ba_reset(struct mt7603_dev *dev, int wcid, int tid, int ba_size) { u32 addr = mt7603_wtbl2_addr(wcid); @@ -358,43 +358,6 @@ void mt7603_mac_tx_ba_reset(struct mt7603_dev *dev, int wcid, int tid, int ssn, mt76_clear(dev, addr + (15 * 4), tid_mask); return; } - mt76_poll(dev, MT_WTBL_UPDATE, MT_WTBL_UPDATE_BUSY, 0, 5000); - - mt7603_mac_stop(dev); - switch (tid) { - case 0: - mt76_rmw_field(dev, addr + (2 * 4), MT_WTBL2_W2_TID0_SN, ssn); - break; - case 1: - mt76_rmw_field(dev, addr + (2 * 4), MT_WTBL2_W2_TID1_SN, ssn); - break; - case 2: - mt76_rmw_field(dev, addr + (2 * 4), MT_WTBL2_W2_TID2_SN_LO, - ssn); - mt76_rmw_field(dev, addr + (3 * 4), MT_WTBL2_W3_TID2_SN_HI, - ssn >> 8); - break; - case 3: - mt76_rmw_field(dev, addr + (3 * 4), MT_WTBL2_W3_TID3_SN, ssn); - break; - case 4: - mt76_rmw_field(dev, addr + (3 * 4), MT_WTBL2_W3_TID4_SN, ssn); - break; - case 5: - mt76_rmw_field(dev, addr + (3 * 4), MT_WTBL2_W3_TID5_SN_LO, - ssn); - mt76_rmw_field(dev, addr + (4 * 4), MT_WTBL2_W4_TID5_SN_HI, - ssn >> 4); - break; - case 6: - mt76_rmw_field(dev, addr + (4 * 4), MT_WTBL2_W4_TID6_SN, ssn); - break; - case 7: - mt76_rmw_field(dev, addr + (4 * 4), MT_WTBL2_W4_TID7_SN, ssn); - break; - } - mt7603_wtbl_update(dev, wcid, MT_WTBL_UPDATE_WTBL2); - mt7603_mac_start(dev); for (i = 7; i > 0; i--) { if (ba_size >= MT_AGG_SIZE_LIMIT(i)) @@ -827,6 +790,7 @@ mt7603_mac_write_txwi(struct mt7603_dev *dev, __le32 *txwi, struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_tx_rate *rate = &info->control.rates[0]; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + struct ieee80211_bar *bar = (struct ieee80211_bar *)skb->data; struct ieee80211_vif *vif = info->control.vif; struct mt7603_vif *mvif; int wlan_idx; @@ -834,6 +798,7 @@ mt7603_mac_write_txwi(struct mt7603_dev *dev, __le32 *txwi, int tx_count = 8; u8 frame_type, frame_subtype; u16 fc = le16_to_cpu(hdr->frame_control); + u16 seqno = 0; u8 vif_idx = 0; u32 val; u8 bw; @@ -919,7 +884,17 @@ mt7603_mac_write_txwi(struct mt7603_dev *dev, __le32 *txwi, tx_count = 0x1f; val = FIELD_PREP(MT_TXD3_REM_TX_COUNT, tx_count) | - FIELD_PREP(MT_TXD3_SEQ, le16_to_cpu(hdr->seq_ctrl)); + MT_TXD3_SN_VALID; + + if (ieee80211_is_data_qos(hdr->frame_control)) + seqno = le16_to_cpu(hdr->seq_ctrl); + else if (ieee80211_is_back_req(hdr->frame_control)) + seqno = le16_to_cpu(bar->start_seq_num); + else + val &= ~MT_TXD3_SN_VALID; + + val |= FIELD_PREP(MT_TXD3_SEQ, seqno >> 4); + txwi[3] = cpu_to_le32(val); if (key) { diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/main.c b/drivers/net/wireless/mediatek/mt76/mt7603/main.c index cc0fe0933b2d..31a7ca691195 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/main.c @@ -584,13 +584,13 @@ mt7603_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif, case IEEE80211_AMPDU_TX_OPERATIONAL: mtxq->aggr = true; mtxq->send_bar = false; - mt7603_mac_tx_ba_reset(dev, msta->wcid.idx, tid, *ssn, ba_size); + mt7603_mac_tx_ba_reset(dev, msta->wcid.idx, tid, ba_size); break; case IEEE80211_AMPDU_TX_STOP_FLUSH: case IEEE80211_AMPDU_TX_STOP_FLUSH_CONT: mtxq->aggr = false; ieee80211_send_bar(vif, sta->addr, tid, mtxq->agg_ssn); - mt7603_mac_tx_ba_reset(dev, msta->wcid.idx, tid, *ssn, -1); + mt7603_mac_tx_ba_reset(dev, msta->wcid.idx, tid, -1); break; case IEEE80211_AMPDU_TX_START: mtxq->agg_ssn = *ssn << 4; @@ -598,7 +598,7 @@ mt7603_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif, break; case IEEE80211_AMPDU_TX_STOP_CONT: mtxq->aggr = false; - mt7603_mac_tx_ba_reset(dev, msta->wcid.idx, tid, *ssn, -1); + mt7603_mac_tx_ba_reset(dev, msta->wcid.idx, tid, -1); ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid); break; } diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mt7603.h b/drivers/net/wireless/mediatek/mt76/mt7603/mt7603.h index 79f332429432..6049f3b7c8fe 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/mt7603.h +++ b/drivers/net/wireless/mediatek/mt76/mt7603/mt7603.h @@ -200,7 +200,7 @@ void mt7603_beacon_set_timer(struct mt7603_dev *dev, int idx, int intval); int mt7603_mac_fill_rx(struct mt7603_dev *dev, struct sk_buff *skb); void mt7603_mac_add_txs(struct mt7603_dev *dev, void *data); void mt7603_mac_rx_ba_reset(struct mt7603_dev *dev, void *addr, u8 tid); -void mt7603_mac_tx_ba_reset(struct mt7603_dev *dev, int wcid, int tid, int ssn, +void mt7603_mac_tx_ba_reset(struct mt7603_dev *dev, int wcid, int tid, int ba_size); void mt7603_pse_client_reset(struct mt7603_dev *dev); -- cgit v1.2.3 From 9dc27bcbe78c5d3926f48b1105840f349c827766 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 26 Mar 2019 09:34:21 +0100 Subject: mt76: mt7603: send BAR after powersave wakeup Now that the sequence number allocation is fixed, we can finally send a BAR at powersave wakeup time to refresh the receiver side reorder window Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt76/mt7603/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/main.c b/drivers/net/wireless/mediatek/mt76/mt7603/main.c index 31a7ca691195..a3c4ef198bfe 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/main.c @@ -372,7 +372,7 @@ mt7603_sta_ps(struct mt76_dev *mdev, struct ieee80211_sta *sta, bool ps) struct mt7603_sta *msta = (struct mt7603_sta *)sta->drv_priv; struct sk_buff_head list; - mt76_stop_tx_queues(&dev->mt76, sta, false); + mt76_stop_tx_queues(&dev->mt76, sta, true); mt7603_wtbl_set_ps(dev, msta, ps); if (ps) return; -- cgit v1.2.3 From 746ba11f170603bf1eaade817553a6c2e9135bbe Mon Sep 17 00:00:00 2001 From: Vijayakumar Durai Date: Wed, 27 Mar 2019 11:03:17 +0100 Subject: rt2x00: do not increment sequence number while re-transmitting Currently rt2x00 devices retransmit the management frames with incremented sequence number if hardware is assigning the sequence. This is HW bug fixed already for non-QOS data frames, but it should be fixed for management frames except beacon. Without fix retransmitted frames have wrong SN: AlphaNet_e8:fb:36 Vivotek_52:31:51 Authentication, SN=1648, FN=0, Flags=........C Frame is not being retransmitted 1648 1 AlphaNet_e8:fb:36 Vivotek_52:31:51 Authentication, SN=1649, FN=0, Flags=....R...C Frame is being retransmitted 1649 1 AlphaNet_e8:fb:36 Vivotek_52:31:51 Authentication, SN=1650, FN=0, Flags=....R...C Frame is being retransmitted 1650 1 With the fix SN stays correctly the same: 88:6a:e3:e8:f9:a2 8c:f5:a3:88:76:87 Authentication, SN=1450, FN=0, Flags=........C 88:6a:e3:e8:f9:a2 8c:f5:a3:88:76:87 Authentication, SN=1450, FN=0, Flags=....R...C 88:6a:e3:e8:f9:a2 8c:f5:a3:88:76:87 Authentication, SN=1450, FN=0, Flags=....R...C Cc: stable@vger.kernel.org Signed-off-by: Vijayakumar Durai [sgruszka: simplify code, change comments and changelog] Signed-off-by: Stanislaw Gruszka Signed-off-by: Kalle Valo --- drivers/net/wireless/ralink/rt2x00/rt2x00.h | 1 - drivers/net/wireless/ralink/rt2x00/rt2x00mac.c | 10 ---------- drivers/net/wireless/ralink/rt2x00/rt2x00queue.c | 15 +++++++++------ 3 files changed, 9 insertions(+), 17 deletions(-) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00.h b/drivers/net/wireless/ralink/rt2x00/rt2x00.h index 4b1744e9fb78..50b92ca92bd7 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00.h +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00.h @@ -673,7 +673,6 @@ enum rt2x00_state_flags { CONFIG_CHANNEL_HT40, CONFIG_POWERSAVING, CONFIG_HT_DISABLED, - CONFIG_QOS_DISABLED, CONFIG_MONITORING, /* diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c b/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c index 2825560e2424..e8462f25d252 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c @@ -642,18 +642,8 @@ void rt2x00mac_bss_info_changed(struct ieee80211_hw *hw, rt2x00dev->intf_associated--; rt2x00leds_led_assoc(rt2x00dev, !!rt2x00dev->intf_associated); - - clear_bit(CONFIG_QOS_DISABLED, &rt2x00dev->flags); } - /* - * Check for access point which do not support 802.11e . We have to - * generate data frames sequence number in S/W for such AP, because - * of H/W bug. - */ - if (changes & BSS_CHANGED_QOS && !bss_conf->qos) - set_bit(CONFIG_QOS_DISABLED, &rt2x00dev->flags); - /* * When the erp information has changed, we should perform * additional configuration steps. For all other changes we are done. diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c b/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c index 92ddc19e7bf7..4834b4eb0206 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c @@ -201,15 +201,18 @@ static void rt2x00queue_create_tx_descriptor_seq(struct rt2x00_dev *rt2x00dev, if (!rt2x00_has_cap_flag(rt2x00dev, REQUIRE_SW_SEQNO)) { /* * rt2800 has a H/W (or F/W) bug, device incorrectly increase - * seqno on retransmited data (non-QOS) frames. To workaround - * the problem let's generate seqno in software if QOS is - * disabled. + * seqno on retransmitted data (non-QOS) and management frames. + * To workaround the problem let's generate seqno in software. + * Except for beacons which are transmitted periodically by H/W + * hence hardware has to assign seqno for them. */ - if (test_bit(CONFIG_QOS_DISABLED, &rt2x00dev->flags)) - __clear_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags); - else + if (ieee80211_is_beacon(hdr->frame_control)) { + __set_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags); /* H/W will generate sequence number */ return; + } + + __clear_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags); } /* -- cgit v1.2.3 From bafdf85dfa59374f927ff597bc8c259193afda30 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Fri, 5 Apr 2019 13:42:56 +0200 Subject: mt76x02: avoid status_list.lock and sta->rate_ctrl_lock dependency Move ieee80211_tx_status_ext() outside of status_list lock section in order to avoid locking dependency and possible deadlock reposed by LOCKDEP in below warning. Also do mt76_tx_status_lock() just before it's needed. [ 440.224832] WARNING: possible circular locking dependency detected [ 440.224833] 5.1.0-rc2+ #22 Not tainted [ 440.224834] ------------------------------------------------------ [ 440.224835] kworker/u16:28/2362 is trying to acquire lock: [ 440.224836] 0000000089b8cacf (&(&q->lock)->rlock#2){+.-.}, at: mt76_wake_tx_queue+0x4c/0xb0 [mt76] [ 440.224842] but task is already holding lock: [ 440.224842] 000000002cfedc59 (&(&sta->lock)->rlock){+.-.}, at: ieee80211_stop_tx_ba_cb+0x32/0x1f0 [mac80211] [ 440.224863] which lock already depends on the new lock. [ 440.224863] the existing dependency chain (in reverse order) is: [ 440.224864] -> #3 (&(&sta->lock)->rlock){+.-.}: [ 440.224869] _raw_spin_lock_bh+0x34/0x40 [ 440.224880] ieee80211_start_tx_ba_session+0xe4/0x3d0 [mac80211] [ 440.224894] minstrel_ht_get_rate+0x45c/0x510 [mac80211] [ 440.224906] rate_control_get_rate+0xc1/0x140 [mac80211] [ 440.224918] ieee80211_tx_h_rate_ctrl+0x195/0x3c0 [mac80211] [ 440.224930] ieee80211_xmit_fast+0x26d/0xa50 [mac80211] [ 440.224942] __ieee80211_subif_start_xmit+0xfc/0x310 [mac80211] [ 440.224954] ieee80211_subif_start_xmit+0x38/0x390 [mac80211] [ 440.224956] dev_hard_start_xmit+0xb8/0x300 [ 440.224957] __dev_queue_xmit+0x7d4/0xbb0 [ 440.224968] ip6_finish_output2+0x246/0x860 [ipv6] [ 440.224978] mld_sendpack+0x1bd/0x360 [ipv6] [ 440.224987] mld_ifc_timer_expire+0x1a4/0x2f0 [ipv6] [ 440.224989] call_timer_fn+0x89/0x2a0 [ 440.224990] run_timer_softirq+0x1bd/0x4d0 [ 440.224992] __do_softirq+0xdb/0x47c [ 440.224994] irq_exit+0xfa/0x100 [ 440.224996] smp_apic_timer_interrupt+0x9a/0x220 [ 440.224997] apic_timer_interrupt+0xf/0x20 [ 440.224999] cpuidle_enter_state+0xc1/0x470 [ 440.225000] do_idle+0x21a/0x260 [ 440.225001] cpu_startup_entry+0x19/0x20 [ 440.225004] start_secondary+0x135/0x170 [ 440.225006] secondary_startup_64+0xa4/0xb0 [ 440.225007] -> #2 (&(&sta->rate_ctrl_lock)->rlock){+.-.}: [ 440.225009] _raw_spin_lock_bh+0x34/0x40 [ 440.225022] rate_control_tx_status+0x4f/0xb0 [mac80211] [ 440.225031] ieee80211_tx_status_ext+0x142/0x1a0 [mac80211] [ 440.225035] mt76x02_send_tx_status+0x2e4/0x340 [mt76x02_lib] [ 440.225037] mt76x02_tx_status_data+0x31/0x40 [mt76x02_lib] [ 440.225040] mt76u_tx_status_data+0x51/0xa0 [mt76_usb] [ 440.225042] process_one_work+0x237/0x5d0 [ 440.225043] worker_thread+0x3c/0x390 [ 440.225045] kthread+0x11d/0x140 [ 440.225046] ret_from_fork+0x3a/0x50 [ 440.225047] -> #1 (&(&list->lock)->rlock#8){+.-.}: [ 440.225049] _raw_spin_lock_bh+0x34/0x40 [ 440.225052] mt76_tx_status_skb_add+0x51/0x100 [mt76] [ 440.225054] mt76x02u_tx_prepare_skb+0xbd/0x116 [mt76x02_usb] [ 440.225056] mt76u_tx_queue_skb+0x5f/0x180 [mt76_usb] [ 440.225058] mt76_tx+0x93/0x190 [mt76] [ 440.225070] ieee80211_tx_frags+0x148/0x210 [mac80211] [ 440.225081] __ieee80211_tx+0x75/0x1b0 [mac80211] [ 440.225092] ieee80211_tx+0xde/0x110 [mac80211] [ 440.225105] __ieee80211_tx_skb_tid_band+0x72/0x90 [mac80211] [ 440.225122] ieee80211_send_auth+0x1f3/0x360 [mac80211] [ 440.225141] ieee80211_auth.cold.40+0x6c/0x100 [mac80211] [ 440.225156] ieee80211_mgd_auth.cold.50+0x132/0x15f [mac80211] [ 440.225171] cfg80211_mlme_auth+0x149/0x360 [cfg80211] [ 440.225181] nl80211_authenticate+0x273/0x2e0 [cfg80211] [ 440.225183] genl_family_rcv_msg+0x196/0x3a0 [ 440.225184] genl_rcv_msg+0x47/0x8e [ 440.225185] netlink_rcv_skb+0x3a/0xf0 [ 440.225187] genl_rcv+0x24/0x40 [ 440.225188] netlink_unicast+0x16d/0x210 [ 440.225189] netlink_sendmsg+0x204/0x3b0 [ 440.225191] sock_sendmsg+0x36/0x40 [ 440.225193] ___sys_sendmsg+0x259/0x2b0 [ 440.225194] __sys_sendmsg+0x47/0x80 [ 440.225196] do_syscall_64+0x60/0x1f0 [ 440.225197] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 440.225198] -> #0 (&(&q->lock)->rlock#2){+.-.}: [ 440.225200] lock_acquire+0xb9/0x1a0 [ 440.225202] _raw_spin_lock_bh+0x34/0x40 [ 440.225204] mt76_wake_tx_queue+0x4c/0xb0 [mt76] [ 440.225215] ieee80211_agg_start_txq+0xe8/0x2b0 [mac80211] [ 440.225225] ieee80211_stop_tx_ba_cb+0xb8/0x1f0 [mac80211] [ 440.225235] ieee80211_ba_session_work+0x1c1/0x2f0 [mac80211] [ 440.225236] process_one_work+0x237/0x5d0 [ 440.225237] worker_thread+0x3c/0x390 [ 440.225239] kthread+0x11d/0x140 [ 440.225240] ret_from_fork+0x3a/0x50 [ 440.225240] other info that might help us debug this: [ 440.225241] Chain exists of: &(&q->lock)->rlock#2 --> &(&sta->rate_ctrl_lock)->rlock --> &(&sta->lock)->rlock [ 440.225243] Possible unsafe locking scenario: [ 440.225244] CPU0 CPU1 [ 440.225244] ---- ---- [ 440.225245] lock(&(&sta->lock)->rlock); [ 440.225245] lock(&(&sta->rate_ctrl_lock)->rlock); [ 440.225246] lock(&(&sta->lock)->rlock); [ 440.225247] lock(&(&q->lock)->rlock#2); [ 440.225248] *** DEADLOCK *** [ 440.225249] 5 locks held by kworker/u16:28/2362: [ 440.225250] #0: 0000000048fcd291 ((wq_completion)phy0){+.+.}, at: process_one_work+0x1b5/0x5d0 [ 440.225252] #1: 00000000f1c6828f ((work_completion)(&sta->ampdu_mlme.work)){+.+.}, at: process_one_work+0x1b5/0x5d0 [ 440.225254] #2: 00000000433d2b2c (&sta->ampdu_mlme.mtx){+.+.}, at: ieee80211_ba_session_work+0x5c/0x2f0 [mac80211] [ 440.225265] #3: 000000002cfedc59 (&(&sta->lock)->rlock){+.-.}, at: ieee80211_stop_tx_ba_cb+0x32/0x1f0 [mac80211] [ 440.225276] #4: 000000009d7b9a44 (rcu_read_lock){....}, at: ieee80211_agg_start_txq+0x33/0x2b0 [mac80211] [ 440.225286] stack backtrace: [ 440.225288] CPU: 2 PID: 2362 Comm: kworker/u16:28 Not tainted 5.1.0-rc2+ #22 [ 440.225289] Hardware name: LENOVO 20KGS23S0P/20KGS23S0P, BIOS N23ET55W (1.30 ) 08/31/2018 [ 440.225300] Workqueue: phy0 ieee80211_ba_session_work [mac80211] [ 440.225301] Call Trace: [ 440.225304] dump_stack+0x85/0xc0 [ 440.225306] print_circular_bug.isra.38.cold.58+0x15c/0x195 [ 440.225307] check_prev_add.constprop.48+0x5f0/0xc00 [ 440.225309] ? check_prev_add.constprop.48+0x39d/0xc00 [ 440.225311] ? __lock_acquire+0x41d/0x1100 [ 440.225312] __lock_acquire+0xd98/0x1100 [ 440.225313] ? __lock_acquire+0x41d/0x1100 [ 440.225315] lock_acquire+0xb9/0x1a0 [ 440.225317] ? mt76_wake_tx_queue+0x4c/0xb0 [mt76] [ 440.225319] _raw_spin_lock_bh+0x34/0x40 [ 440.225321] ? mt76_wake_tx_queue+0x4c/0xb0 [mt76] [ 440.225323] mt76_wake_tx_queue+0x4c/0xb0 [mt76] [ 440.225334] ieee80211_agg_start_txq+0xe8/0x2b0 [mac80211] [ 440.225344] ieee80211_stop_tx_ba_cb+0xb8/0x1f0 [mac80211] [ 440.225354] ieee80211_ba_session_work+0x1c1/0x2f0 [mac80211] [ 440.225356] process_one_work+0x237/0x5d0 [ 440.225358] worker_thread+0x3c/0x390 [ 440.225359] ? wq_calc_node_cpumask+0x70/0x70 [ 440.225360] kthread+0x11d/0x140 [ 440.225362] ? kthread_create_on_node+0x40/0x40 [ 440.225363] ret_from_fork+0x3a/0x50 Cc: stable@vger.kernel.org Fixes: 88046b2c9f6d ("mt76: add support for reporting tx status with skb") Signed-off-by: Stanislaw Gruszka Acked-by: Felix Fietkau Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt76/mt76x02_mac.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c index 9ed231abe916..4fe5a83ca5a4 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c @@ -466,7 +466,6 @@ void mt76x02_send_tx_status(struct mt76x02_dev *dev, return; rcu_read_lock(); - mt76_tx_status_lock(mdev, &list); if (stat->wcid < ARRAY_SIZE(dev->mt76.wcid)) wcid = rcu_dereference(dev->mt76.wcid[stat->wcid]); @@ -479,6 +478,8 @@ void mt76x02_send_tx_status(struct mt76x02_dev *dev, drv_priv); } + mt76_tx_status_lock(mdev, &list); + if (wcid) { if (stat->pktid >= MT_PACKET_ID_FIRST) status.skb = mt76_tx_status_skb_get(mdev, wcid, @@ -498,7 +499,9 @@ void mt76x02_send_tx_status(struct mt76x02_dev *dev, if (*update == 0 && stat_val == stat_cache && stat->wcid == msta->status.wcid && msta->n_frames < 32) { msta->n_frames++; - goto out; + mt76_tx_status_unlock(mdev, &list); + rcu_read_unlock(); + return; } mt76x02_mac_fill_tx_status(dev, status.info, &msta->status, @@ -514,11 +517,10 @@ void mt76x02_send_tx_status(struct mt76x02_dev *dev, if (status.skb) mt76_tx_status_skb_done(mdev, status.skb, &list); - else - ieee80211_tx_status_ext(mt76_hw(dev), &status); - -out: mt76_tx_status_unlock(mdev, &list); + + if (!status.skb) + ieee80211_tx_status_ext(mt76_hw(dev), &status); rcu_read_unlock(); } -- cgit v1.2.3 From d5bc73f34cc97c4b4b9202cc93182c2515076edf Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Wed, 10 Apr 2019 15:05:31 -0600 Subject: PCI: Fix issue with "pci=disable_acs_redir" parameter being ignored In most cases, kmalloc() will not be available early in boot when pci_setup() is called. Thus, the kstrdup() call that was added to fix the __initdata bug with the disable_acs_redir parameter usually returns NULL, so the parameter is discarded and has no effect. To fix this, store the string that's in initdata until an initcall function can allocate the memory appropriately. This way we don't need any additional static memory. Fixes: d2fd6e81912a ("PCI: Fix __initdata issue with "pci=disable_acs_redir" parameter") Signed-off-by: Logan Gunthorpe Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 7c1b362f599a..766f5779db92 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -6262,8 +6262,7 @@ static int __init pci_setup(char *str) } else if (!strncmp(str, "pcie_scan_all", 13)) { pci_add_flags(PCI_SCAN_ALL_PCIE_DEVS); } else if (!strncmp(str, "disable_acs_redir=", 18)) { - disable_acs_redir_param = - kstrdup(str + 18, GFP_KERNEL); + disable_acs_redir_param = str + 18; } else { printk(KERN_ERR "PCI: Unknown option `%s'\n", str); @@ -6274,3 +6273,19 @@ static int __init pci_setup(char *str) return 0; } early_param("pci", pci_setup); + +/* + * 'disable_acs_redir_param' is initialized in pci_setup(), above, to point + * to data in the __initdata section which will be freed after the init + * sequence is complete. We can't allocate memory in pci_setup() because some + * architectures do not have any memory allocation service available during + * an early_param() call. So we allocate memory and copy the variable here + * before the init section is freed. + */ +static int __init pci_realloc_setup_params(void) +{ + disable_acs_redir_param = kstrdup(disable_acs_redir_param, GFP_KERNEL); + + return 0; +} +pure_initcall(pci_realloc_setup_params); -- cgit v1.2.3 From f89b9e1be7da8bb0aac667a0206a00975cefe6d3 Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Fri, 12 Apr 2019 14:10:03 +0000 Subject: clk: imx: Fix PLL_1416X not rounding rates Code which initializes the "clk_init_data.ops" checks pll->rate_table before that field is ever assigned to so it always picks "clk_pll1416x_min_ops". This breaks dynamic rate rounding for features such as cpufreq. Fix by checking pll_clk->rate_table instead, here pll_clk refers to the constant initialization data coming from per-soc clk driver. Signed-off-by: Leonard Crestez Fixes: 8646d4dcc7fb ("clk: imx: Add PLLs driver for imx8mm soc") Signed-off-by: Stephen Boyd --- drivers/clk/imx/clk-pll14xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/imx/clk-pll14xx.c b/drivers/clk/imx/clk-pll14xx.c index 1acfa3e3cfb4..113d71042199 100644 --- a/drivers/clk/imx/clk-pll14xx.c +++ b/drivers/clk/imx/clk-pll14xx.c @@ -362,7 +362,7 @@ struct clk *imx_clk_pll14xx(const char *name, const char *parent_name, switch (pll_clk->type) { case PLL_1416X: - if (!pll->rate_table) + if (!pll_clk->rate_table) init.ops = &clk_pll1416x_min_ops; else init.ops = &clk_pll1416x_ops; -- cgit v1.2.3 From 0a2c34f18c94b596562bf3d019fceab998b8b584 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 12 Apr 2019 14:45:12 +0100 Subject: vxge: fix return of a free'd memblock on a failed dma mapping Currently if a pci dma mapping failure is detected a free'd memblock address is returned rather than a NULL (that indicates an error). Fix this by ensuring NULL is returned on this error case. Addresses-Coverity: ("Use after free") Fixes: 528f727279ae ("vxge: code cleanup and reorganization") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/neterion/vxge/vxge-config.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/neterion/vxge/vxge-config.c b/drivers/net/ethernet/neterion/vxge/vxge-config.c index 7cde387e5ec6..51cd57ab3d95 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-config.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-config.c @@ -2366,6 +2366,7 @@ static void *__vxge_hw_blockpool_malloc(struct __vxge_hw_device *devh, u32 size, dma_object->addr))) { vxge_os_dma_free(devh->pdev, memblock, &dma_object->acc_handle); + memblock = NULL; goto exit; } -- cgit v1.2.3 From 1dc2b3d65523780ed1972d446c76e62e13f3e8f5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 12 Apr 2019 15:13:27 +0100 Subject: qede: fix write to free'd pointer error and double free of ptp The err2 error return path calls qede_ptp_disable that cleans up on an error and frees ptp. After this, the free'd ptp is dereferenced when ptp->clock is set to NULL and the code falls-through to error path err1 that frees ptp again. Fix this by calling qede_ptp_disable and exiting via an error return path that does not set ptp->clock or kfree ptp. Addresses-Coverity: ("Write to pointer after free") Fixes: 035744975aec ("qede: Add support for PTP resource locking.") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_ptp.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_ptp.c b/drivers/net/ethernet/qlogic/qede/qede_ptp.c index 5f3f42a25361..bddb2b5982dc 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ptp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.c @@ -490,18 +490,17 @@ int qede_ptp_enable(struct qede_dev *edev, bool init_tc) ptp->clock = ptp_clock_register(&ptp->clock_info, &edev->pdev->dev); if (IS_ERR(ptp->clock)) { - rc = -EINVAL; DP_ERR(edev, "PTP clock registration failed\n"); + qede_ptp_disable(edev); + rc = -EINVAL; goto err2; } return 0; -err2: - qede_ptp_disable(edev); - ptp->clock = NULL; err1: kfree(ptp); +err2: edev->ptp = NULL; return rc; -- cgit v1.2.3 From 56d282d9db19f85f759b7a81f0829b58c00571b0 Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Fri, 12 Apr 2019 16:33:40 +0100 Subject: rxrpc: Clear socket error When an ICMP or ICMPV6 error is received, the error will be attached to the socket (sk_err) and the report function will get called. Clear any pending error here by calling sock_error(). This would cause the following attempt to use the socket to fail with the error code stored by the ICMP error, resulting in unexpected errors with various side effects depending on the context. Signed-off-by: Marc Dionne Signed-off-by: David Howells Tested-by: Jonathan Billings Signed-off-by: David S. Miller --- net/rxrpc/peer_event.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index bc05af89fc38..6e84d878053c 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -157,6 +157,11 @@ void rxrpc_error_report(struct sock *sk) _enter("%p{%d}", sk, local->debug_id); + /* Clear the outstanding error value on the socket so that it doesn't + * cause kernel_sendmsg() to return it later. + */ + sock_error(sk); + skb = sock_dequeue_err_skb(sk); if (!skb) { _leave("UDP socket errqueue empty"); -- cgit v1.2.3 From 4611da30d679a4b0a2c2b5d4d7b3fbbafc922df7 Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Fri, 12 Apr 2019 16:33:47 +0100 Subject: rxrpc: Make rxrpc_kernel_check_life() indicate if call completed Make rxrpc_kernel_check_life() pass back the life counter through the argument list and return true if the call has not yet completed. Suggested-by: Marc Dionne Signed-off-by: David Howells Signed-off-by: David S. Miller --- Documentation/networking/rxrpc.txt | 16 +++++++++------- fs/afs/rxrpc.c | 4 ++-- include/net/af_rxrpc.h | 4 +++- net/rxrpc/af_rxrpc.c | 14 +++++++++----- 4 files changed, 23 insertions(+), 15 deletions(-) diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt index 2df5894353d6..cd7303d7fa25 100644 --- a/Documentation/networking/rxrpc.txt +++ b/Documentation/networking/rxrpc.txt @@ -1009,16 +1009,18 @@ The kernel interface functions are as follows: (*) Check call still alive. - u32 rxrpc_kernel_check_life(struct socket *sock, - struct rxrpc_call *call); + bool rxrpc_kernel_check_life(struct socket *sock, + struct rxrpc_call *call, + u32 *_life); void rxrpc_kernel_probe_life(struct socket *sock, struct rxrpc_call *call); - The first function returns a number that is updated when ACKs are received - from the peer (notably including PING RESPONSE ACKs which we can elicit by - sending PING ACKs to see if the call still exists on the server). The - caller should compare the numbers of two calls to see if the call is still - alive after waiting for a suitable interval. + The first function passes back in *_life a number that is updated when + ACKs are received from the peer (notably including PING RESPONSE ACKs + which we can elicit by sending PING ACKs to see if the call still exists + on the server). The caller should compare the numbers of two calls to see + if the call is still alive after waiting for a suitable interval. It also + returns true as long as the call hasn't yet reached the completed state. This allows the caller to work out if the server is still contactable and if the call is still alive on the server while waiting for the server to diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 2c588f9bbbda..5cb11aff9298 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -621,7 +621,7 @@ static long afs_wait_for_call_to_complete(struct afs_call *call, rtt2 = 2; timeout = rtt2; - last_life = rxrpc_kernel_check_life(call->net->socket, call->rxcall); + rxrpc_kernel_check_life(call->net->socket, call->rxcall, &last_life); add_wait_queue(&call->waitq, &myself); for (;;) { @@ -639,7 +639,7 @@ static long afs_wait_for_call_to_complete(struct afs_call *call, if (afs_check_call_state(call, AFS_CALL_COMPLETE)) break; - life = rxrpc_kernel_check_life(call->net->socket, call->rxcall); + rxrpc_kernel_check_life(call->net->socket, call->rxcall, &life); if (timeout == 0 && life == last_life && signal_pending(current)) { if (stalled) diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index 2bfb87eb98ce..78c856cba4f5 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -61,10 +61,12 @@ int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t, rxrpc_user_attach_call_t, unsigned long, gfp_t, unsigned int); void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64); -u32 rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *); +bool rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *, + u32 *); void rxrpc_kernel_probe_life(struct socket *, struct rxrpc_call *); u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *); bool rxrpc_kernel_get_reply_time(struct socket *, struct rxrpc_call *, ktime_t *); +bool rxrpc_kernel_call_is_complete(struct rxrpc_call *); #endif /* _NET_RXRPC_H */ diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index c54dce3ca0dd..ae8c5d7f3bf1 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -371,18 +371,22 @@ EXPORT_SYMBOL(rxrpc_kernel_end_call); * rxrpc_kernel_check_life - Check to see whether a call is still alive * @sock: The socket the call is on * @call: The call to check + * @_life: Where to store the life value * * Allow a kernel service to find out whether a call is still alive - ie. we're - * getting ACKs from the server. Returns a number representing the life state - * which can be compared to that returned by a previous call. + * getting ACKs from the server. Passes back in *_life a number representing + * the life state which can be compared to that returned by a previous call and + * return true if the call is still alive. * * If the life state stalls, rxrpc_kernel_probe_life() should be called and * then 2RTT waited. */ -u32 rxrpc_kernel_check_life(const struct socket *sock, - const struct rxrpc_call *call) +bool rxrpc_kernel_check_life(const struct socket *sock, + const struct rxrpc_call *call, + u32 *_life) { - return call->acks_latest; + *_life = call->acks_latest; + return call->state != RXRPC_CALL_COMPLETE; } EXPORT_SYMBOL(rxrpc_kernel_check_life); -- cgit v1.2.3 From 8e8715aaa905f6593f610f950d513e81fab5006a Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Fri, 12 Apr 2019 16:33:54 +0100 Subject: rxrpc: Allow errors to be returned from rxrpc_queue_packet() Change rxrpc_queue_packet()'s signature so that it can return any error code it may encounter when trying to send the packet. This allows the caller to eventually do something in case of error - though it should be noted that the packet has been queued and a resend is scheduled. Signed-off-by: Marc Dionne Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/sendmsg.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 46c9312085b1..bec64deb7b0a 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -152,12 +152,13 @@ static void rxrpc_notify_end_tx(struct rxrpc_sock *rx, struct rxrpc_call *call, } /* - * Queue a DATA packet for transmission, set the resend timeout and send the - * packet immediately + * Queue a DATA packet for transmission, set the resend timeout and send + * the packet immediately. Returns the error from rxrpc_send_data_packet() + * in case the caller wants to do something with it. */ -static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, - struct sk_buff *skb, bool last, - rxrpc_notify_end_tx_t notify_end_tx) +static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, + struct sk_buff *skb, bool last, + rxrpc_notify_end_tx_t notify_end_tx) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); unsigned long now; @@ -250,7 +251,8 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, out: rxrpc_free_skb(skb, rxrpc_skb_tx_freed); - _leave(""); + _leave(" = %d", ret); + return ret; } /* @@ -423,9 +425,10 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, if (ret < 0) goto out; - rxrpc_queue_packet(rx, call, skb, - !msg_data_left(msg) && !more, - notify_end_tx); + ret = rxrpc_queue_packet(rx, call, skb, + !msg_data_left(msg) && !more, + notify_end_tx); + /* Should check for failure here */ skb = NULL; } } while (msg_data_left(msg) > 0); -- cgit v1.2.3 From f7f1dd3162efc7ffdbcdb9da1ad1599f8ab51296 Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Fri, 12 Apr 2019 16:34:02 +0100 Subject: afs: Check for rxrpc call completion in wait loop Check the state of the rxrpc call backing an afs call in each iteration of the call wait loop in case the rxrpc call has already been terminated at the rxrpc layer. Interrupt the wait loop and mark the afs call as complete if the rxrpc layer call is complete. There were cases where rxrpc errors were not passed up to afs, which could result in this loop waiting forever for an afs call to transition to AFS_CALL_COMPLETE while the rx call was already complete. Signed-off-by: Marc Dionne Signed-off-by: David Howells Signed-off-by: David S. Miller --- fs/afs/rxrpc.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 5cb11aff9298..c14001b42d20 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -610,6 +610,7 @@ static long afs_wait_for_call_to_complete(struct afs_call *call, bool stalled = false; u64 rtt; u32 life, last_life; + bool rxrpc_complete = false; DECLARE_WAITQUEUE(myself, current); @@ -639,7 +640,12 @@ static long afs_wait_for_call_to_complete(struct afs_call *call, if (afs_check_call_state(call, AFS_CALL_COMPLETE)) break; - rxrpc_kernel_check_life(call->net->socket, call->rxcall, &life); + if (!rxrpc_kernel_check_life(call->net->socket, call->rxcall, &life)) { + /* rxrpc terminated the call. */ + rxrpc_complete = true; + break; + } + if (timeout == 0 && life == last_life && signal_pending(current)) { if (stalled) @@ -663,12 +669,16 @@ static long afs_wait_for_call_to_complete(struct afs_call *call, remove_wait_queue(&call->waitq, &myself); __set_current_state(TASK_RUNNING); - /* Kill off the call if it's still live. */ if (!afs_check_call_state(call, AFS_CALL_COMPLETE)) { - _debug("call interrupted"); - if (rxrpc_kernel_abort_call(call->net->socket, call->rxcall, - RX_USER_ABORT, -EINTR, "KWI")) - afs_set_call_complete(call, -EINTR, 0); + if (rxrpc_complete) { + afs_set_call_complete(call, call->error, call->abort_code); + } else { + /* Kill off the call if it's still live. */ + _debug("call interrupted"); + if (rxrpc_kernel_abort_call(call->net->socket, call->rxcall, + RX_USER_ABORT, -EINTR, "KWI")) + afs_set_call_complete(call, -EINTR, 0); + } } spin_lock_bh(&call->state_lock); -- cgit v1.2.3 From 39ce67557568962fa9d1593741f76c4cc6762469 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 12 Apr 2019 16:34:09 +0100 Subject: rxrpc: Trace received connection aborts Trace received calls that are aborted due to a connection abort, typically because of authentication failure. Without this, connection aborts don't show up in the trace log. Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/conn_event.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index b6fca8ebb117..8d31fb4c51e1 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -153,7 +153,8 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, * pass a connection-level abort onto all calls on that connection */ static void rxrpc_abort_calls(struct rxrpc_connection *conn, - enum rxrpc_call_completion compl) + enum rxrpc_call_completion compl, + rxrpc_serial_t serial) { struct rxrpc_call *call; int i; @@ -173,6 +174,9 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, call->call_id, 0, conn->abort_code, conn->error); + else + trace_rxrpc_rx_abort(call, serial, + conn->abort_code); if (rxrpc_set_call_completion(call, compl, conn->abort_code, conn->error)) @@ -213,8 +217,6 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, conn->state = RXRPC_CONN_LOCALLY_ABORTED; spin_unlock_bh(&conn->state_lock); - rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED); - msg.msg_name = &conn->params.peer->srx.transport; msg.msg_namelen = conn->params.peer->srx.transport_len; msg.msg_control = NULL; @@ -242,6 +244,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, len = iov[0].iov_len + iov[1].iov_len; serial = atomic_inc_return(&conn->serial); + rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, serial); whdr.serial = htonl(serial); _proto("Tx CONN ABORT %%%u { %d }", serial, conn->abort_code); @@ -321,7 +324,7 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, conn->error = -ECONNABORTED; conn->abort_code = abort_code; conn->state = RXRPC_CONN_REMOTELY_ABORTED; - rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED); + rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, sp->hdr.serial); return -ECONNABORTED; case RXRPC_PACKET_TYPE_CHALLENGE: -- cgit v1.2.3 From 1a2391c30c0b9d041bc340f68df81d49c53546cc Mon Sep 17 00:00:00 2001 From: Jeffrey Altman Date: Fri, 12 Apr 2019 16:34:16 +0100 Subject: rxrpc: Fix detection of out of order acks The rxrpc packet serial number cannot be safely used to compute out of order ack packets for several reasons: 1. The allocation of serial numbers cannot be assumed to imply the order by which acks are populated and transmitted. In some rxrpc implementations, delayed acks and ping acks are transmitted asynchronously to the receipt of data packets and so may be transmitted out of order. As a result, they can race with idle acks. 2. Serial numbers are allocated by the rxrpc connection and not the call and as such may wrap independently if multiple channels are in use. In any case, what matters is whether the ack packet provides new information relating to the bounds of the window (the firstPacket and previousPacket in the ACK data). Fix this by discarding packets that appear to wind back the window bounds rather than on serial number procession. Fixes: 298bc15b2079 ("rxrpc: Only take the rwind and mtu values from latest ACK") Signed-off-by: Jeffrey Altman Signed-off-by: David Howells Tested-by: Marc Dionne Signed-off-by: David S. Miller --- net/rxrpc/ar-internal.h | 1 + net/rxrpc/input.c | 18 ++++++++++++------ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 4b1a534d290a..062ca9dc29b8 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -654,6 +654,7 @@ struct rxrpc_call { u8 ackr_reason; /* reason to ACK */ u16 ackr_skew; /* skew on packet being ACK'd */ rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */ + rxrpc_serial_t ackr_first_seq; /* first sequence number received */ rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */ rxrpc_seq_t ackr_consumed; /* Highest packet shown consumed */ rxrpc_seq_t ackr_seen; /* Highest packet shown seen */ diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 9128aa0e40aa..4c6f9d0a00e7 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -837,7 +837,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, u8 acks[RXRPC_MAXACKS]; } buf; rxrpc_serial_t acked_serial; - rxrpc_seq_t first_soft_ack, hard_ack; + rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt; int nr_acks, offset, ioffset; _enter(""); @@ -851,13 +851,14 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, acked_serial = ntohl(buf.ack.serial); first_soft_ack = ntohl(buf.ack.firstPacket); + prev_pkt = ntohl(buf.ack.previousPacket); hard_ack = first_soft_ack - 1; nr_acks = buf.ack.nAcks; summary.ack_reason = (buf.ack.reason < RXRPC_ACK__INVALID ? buf.ack.reason : RXRPC_ACK__INVALID); trace_rxrpc_rx_ack(call, sp->hdr.serial, acked_serial, - first_soft_ack, ntohl(buf.ack.previousPacket), + first_soft_ack, prev_pkt, summary.ack_reason, nr_acks); if (buf.ack.reason == RXRPC_ACK_PING_RESPONSE) @@ -878,8 +879,9 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, rxrpc_propose_ack_respond_to_ack); } - /* Discard any out-of-order or duplicate ACKs. */ - if (before_eq(sp->hdr.serial, call->acks_latest)) + /* Discard any out-of-order or duplicate ACKs (outside lock). */ + if (before(first_soft_ack, call->ackr_first_seq) || + before(prev_pkt, call->ackr_prev_seq)) return; buf.info.rxMTU = 0; @@ -890,12 +892,16 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, spin_lock(&call->input_lock); - /* Discard any out-of-order or duplicate ACKs. */ - if (before_eq(sp->hdr.serial, call->acks_latest)) + /* Discard any out-of-order or duplicate ACKs (inside lock). */ + if (before(first_soft_ack, call->ackr_first_seq) || + before(prev_pkt, call->ackr_prev_seq)) goto out; call->acks_latest_ts = skb->tstamp; call->acks_latest = sp->hdr.serial; + call->ackr_first_seq = first_soft_ack; + call->ackr_prev_seq = prev_pkt; + /* Parse rwind and mtu sizes if provided. */ if (buf.info.rxMTU) rxrpc_input_ackinfo(call, skb, &buf.info); -- cgit v1.2.3 From ed0de45a1008991fdaa27a0152befcb74d126a8b Mon Sep 17 00:00:00 2001 From: Stephen Suryaputra Date: Fri, 12 Apr 2019 16:19:27 -0400 Subject: ipv4: recompile ip options in ipv4_link_failure Recompile IP options since IPCB may not be valid anymore when ipv4_link_failure is called from arp_error_report. Refer to the commit 3da1ed7ac398 ("net: avoid use IPCB in cipso_v4_error") and the commit before that (9ef6b42ad6fd) for a similar issue. Signed-off-by: Stephen Suryaputra Signed-off-by: David S. Miller --- net/ipv4/route.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a5da63e5faa2..0206789bc2b7 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1186,8 +1186,16 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) static void ipv4_link_failure(struct sk_buff *skb) { struct rtable *rt; + struct ip_options opt; - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); + /* Recompile ip options since IPCB may not be valid anymore. + */ + memset(&opt, 0, sizeof(opt)); + opt.optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr); + if (__ip_options_compile(dev_net(skb->dev), &opt, skb, NULL)) + return; + + __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt); rt = skb_rtable(skb); if (rt) -- cgit v1.2.3 From ba25b81e3a420f8345585029d49ee32e73de9d5f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 13 Apr 2019 08:37:36 +0100 Subject: afs: avoid deprecated get_seconds() get_seconds() has a limited range on 32-bit architectures and is deprecated because of that. While AFS uses the same limits for its inode timestamps on the wire protocol, let's just use the simpler current_time() as we do for other file systems. This will still zero out the 'tv_nsec' field of the timestamps internally. Signed-off-by: Arnd Bergmann Signed-off-by: David Howells --- fs/afs/inode.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 1a4ce07fb406..9cedc3fc1b77 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -216,9 +216,7 @@ struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root) set_nlink(inode, 2); inode->i_uid = GLOBAL_ROOT_UID; inode->i_gid = GLOBAL_ROOT_GID; - inode->i_ctime.tv_sec = get_seconds(); - inode->i_ctime.tv_nsec = 0; - inode->i_atime = inode->i_mtime = inode->i_ctime; + inode->i_ctime = inode->i_atime = inode->i_mtime = current_time(inode); inode->i_blocks = 0; inode_set_iversion_raw(inode, 0); inode->i_generation = 0; -- cgit v1.2.3 From d2abfa86ff373bd00634a656c7ad5531747f2bf8 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 13 Apr 2019 08:37:36 +0100 Subject: afs: Avoid section confusion in CM_NAME __tracepoint_str cannot be const because the tracepoint_str section is not read-only. Remove the stray const. Cc: dhowells@redhat.com Cc: viro@zeniv.linux.org.uk Signed-off-by: Andi Kleen --- fs/afs/cmservice.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 8ee5972893ed..2f8acb4c556d 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -34,7 +34,7 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *); static int afs_deliver_yfs_cb_callback(struct afs_call *); #define CM_NAME(name) \ - const char afs_SRXCB##name##_name[] __tracepoint_string = \ + char afs_SRXCB##name##_name[] __tracepoint_string = \ "CB." #name /* -- cgit v1.2.3 From 8022c4b95c3793d7ba28ab0701ea15b5deb46e02 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 13 Apr 2019 08:37:37 +0100 Subject: afs: Differentiate abort due to unmarshalling from other errors Differentiate an abort due to an unmarshalling error from an abort due to other errors, such as ENETUNREACH. It doesn't make sense to set abort code RXGEN_*_UNMARSHAL in such a case, so use RX_USER_ABORT instead. Signed-off-by: David Howells --- fs/afs/rxrpc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index c14001b42d20..15c7e82d80cb 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -572,13 +572,17 @@ static void afs_deliver_to_call(struct afs_call *call) case -ENODATA: case -EBADMSG: case -EMSGSIZE: - default: abort_code = RXGEN_CC_UNMARSHAL; if (state != AFS_CALL_CL_AWAIT_REPLY) abort_code = RXGEN_SS_UNMARSHAL; rxrpc_kernel_abort_call(call->net->socket, call->rxcall, abort_code, ret, "KUM"); goto local_abort; + default: + abort_code = RX_USER_ABORT; + rxrpc_kernel_abort_call(call->net->socket, call->rxcall, + abort_code, ret, "KER"); + goto local_abort; } } -- cgit v1.2.3 From 21bd68f196ca91fc0f3d9bd1b32f6e530e8c1c88 Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Sat, 13 Apr 2019 08:37:37 +0100 Subject: afs: Unlock pages for __pagevec_release() __pagevec_release() complains loudly if any page in the vector is still locked. The pages need to be locked for generic_error_remove_page(), but that function doesn't actually unlock them. Unlock the pages afterwards. Signed-off-by: Marc Dionne Signed-off-by: David Howells Tested-by: Jonathan Billings --- fs/afs/write.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/afs/write.c b/fs/afs/write.c index 72efcfcf9f95..0122d7445fba 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -264,6 +264,7 @@ static void afs_kill_pages(struct address_space *mapping, first = page->index + 1; lock_page(page); generic_error_remove_page(mapping, page); + unlock_page(page); } __pagevec_release(&pv); -- cgit v1.2.3 From eeba1e9cf31d064284dd1fa7bd6cfe01395bd03d Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 13 Apr 2019 08:37:37 +0100 Subject: afs: Fix in-progess ops to ignore server-level callback invalidation The in-kernel afs filesystem client counts the number of server-level callback invalidation events (CB.InitCallBackState* RPC operations) that it receives from the server. This is stored in cb_s_break in various structures, including afs_server and afs_vnode. If an inode is examined by afs_validate(), say, the afs_server copy is compared, along with other break counters, to those in afs_vnode, and if one or more of the counters do not match, it is considered that the server's callback promise is broken. At points where this happens, AFS_VNODE_CB_PROMISED is cleared to indicate that the status must be refetched from the server. afs_validate() issues an FS.FetchStatus operation to get updated metadata - and based on the updated data_version may invalidate the pagecache too. However, the break counters are also used to determine whether to note a new callback in the vnode (which would set the AFS_VNODE_CB_PROMISED flag) and whether to cache the permit data included in the YFSFetchStatus record by the server. The problem comes when the server sends us a CB.InitCallBackState op. The first such instance doesn't cause cb_s_break to be incremented, but rather causes AFS_SERVER_FL_NEW to be cleared - but thereafter, say some hours after last use and all the volumes have been automatically unmounted and the server has forgotten about the client[*], this *will* likely cause an increment. [*] There are other circumstances too, such as the server restarting or needing to make space in its callback table. Note that the server won't send us a CB.InitCallBackState op until we talk to it again. So what happens is: (1) A mount for a new volume is attempted, a inode is created for the root vnode and vnode->cb_s_break and AFS_VNODE_CB_PROMISED aren't set immediately, as we don't have a nominated server to talk to yet - and we may iterate through a few to find one. (2) Before the operation happens, afs_fetch_status(), say, notes in the cursor (fc.cb_break) the break counter sum from the vnode, volume and server counters, but the server->cb_s_break is currently 0. (3) We send FS.FetchStatus to the server. The server sends us back CB.InitCallBackState. We increment server->cb_s_break. (4) Our FS.FetchStatus completes. The reply includes a callback record. (5) xdr_decode_AFSCallBack()/xdr_decode_YFSCallBack() check to see whether the callback promise was broken by checking the break counter sum from step (2) against the current sum. This fails because of step (3), so we don't set the callback record and, importantly, don't set AFS_VNODE_CB_PROMISED on the vnode. This does not preclude the syscall from progressing, and we don't loop here rechecking the status, but rather assume it's good enough for one round only and will need to be rechecked next time. (6) afs_validate() it triggered on the vnode, probably called from d_revalidate() checking the parent directory. (7) afs_validate() notes that AFS_VNODE_CB_PROMISED isn't set, so doesn't update vnode->cb_s_break and assumes the vnode to be invalid. (8) afs_validate() needs to calls afs_fetch_status(). Go back to step (2) and repeat, every time the vnode is validated. This primarily affects volume root dir vnodes. Everything subsequent to those inherit an already incremented cb_s_break upon mounting. The issue is that we assume that the callback record and the cached permit information in a reply from the server can't be trusted after getting a server break - but this is wrong since the server makes sure things are done in the right order, holding up our ops if necessary[*]. [*] There is an extremely unlikely scenario where a reply from before the CB.InitCallBackState could get its delivery deferred till after - at which point we think we have a promise when we don't. This, however, requires unlucky mass packet loss to one call. AFS_SERVER_FL_NEW tries to paper over the cracks for the initial mount from a server we've never contacted before, but this should be unnecessary. It's also further insulated from the problem on an initial mount by querying the server first with FS.GetCapabilities, which triggers the CB.InitCallBackState. Fix this by (1) Remove AFS_SERVER_FL_NEW. (2) In afs_calc_vnode_cb_break(), don't include cb_s_break in the calculation. (3) In afs_cb_is_broken(), don't include cb_s_break in the check. Signed-off-by: David Howells --- fs/afs/callback.c | 3 +-- fs/afs/internal.h | 4 +--- fs/afs/server.c | 1 - 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 1c7955f5cdaf..128f2dbe256a 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -203,8 +203,7 @@ void afs_put_cb_interest(struct afs_net *net, struct afs_cb_interest *cbi) */ void afs_init_callback_state(struct afs_server *server) { - if (!test_and_clear_bit(AFS_SERVER_FL_NEW, &server->flags)) - server->cb_s_break++; + server->cb_s_break++; } /* diff --git a/fs/afs/internal.h b/fs/afs/internal.h index bb1f244b2b3a..3904ab0b9563 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -474,7 +474,6 @@ struct afs_server { time64_t put_time; /* Time at which last put */ time64_t update_at; /* Time at which to next update the record */ unsigned long flags; -#define AFS_SERVER_FL_NEW 0 /* New server, don't inc cb_s_break */ #define AFS_SERVER_FL_NOT_READY 1 /* The record is not ready for use */ #define AFS_SERVER_FL_NOT_FOUND 2 /* VL server says no such server */ #define AFS_SERVER_FL_VL_FAIL 3 /* Failed to access VL server */ @@ -827,7 +826,7 @@ static inline struct afs_cb_interest *afs_get_cb_interest(struct afs_cb_interest static inline unsigned int afs_calc_vnode_cb_break(struct afs_vnode *vnode) { - return vnode->cb_break + vnode->cb_s_break + vnode->cb_v_break; + return vnode->cb_break + vnode->cb_v_break; } static inline bool afs_cb_is_broken(unsigned int cb_break, @@ -835,7 +834,6 @@ static inline bool afs_cb_is_broken(unsigned int cb_break, const struct afs_cb_interest *cbi) { return !cbi || cb_break != (vnode->cb_break + - cbi->server->cb_s_break + vnode->volume->cb_v_break); } diff --git a/fs/afs/server.c b/fs/afs/server.c index 642afa2e9783..65b33b6da48b 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -226,7 +226,6 @@ static struct afs_server *afs_alloc_server(struct afs_net *net, RCU_INIT_POINTER(server->addresses, alist); server->addr_version = alist->version; server->uuid = *uuid; - server->flags = (1UL << AFS_SERVER_FL_NEW); server->update_at = ktime_get_real_seconds() + afs_server_update_delay; rwlock_init(&server->fs_lock); INIT_HLIST_HEAD(&server->cb_volumes); -- cgit v1.2.3 From 183ab39eb0ea9879bb68422a83e65f750f3192f0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 13 Apr 2019 10:04:49 +0200 Subject: ALSA: hda: Initialize power_state field properly The recent commit 98081ca62cba ("ALSA: hda - Record the current power state before suspend/resume calls") made the HD-audio driver to store the PM state in power_state field. This forgot, however, the initialization at power up. Although the codec drivers usually don't need to refer to this field in the normal operation, let's initialize it properly for consistency. Fixes: 98081ca62cba ("ALSA: hda - Record the current power state before suspend/resume calls") Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_codec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index ec0b8595eb4d..701a69d856f5 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -969,6 +969,7 @@ int snd_hda_codec_device_new(struct hda_bus *bus, struct snd_card *card, /* power-up all before initialization */ hda_set_power_state(codec, AC_PWRST_D0); + codec->core.dev.power.power_state = PMSG_ON; snd_hda_codec_proc_new(codec); -- cgit v1.2.3 From becf2319f320cae43e20cf179cc51a355a0deb5f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 25 Mar 2019 23:11:53 +0100 Subject: selftests: netfilter: check icmp pkttoobig errors are set as related When an icmp error such as pkttoobig is received, conntrack checks if the "inner" header (header of packet that did not fit link mtu) is matches an existing connection, and, if so, sets that packet as being related to the conntrack entry it found. It was recently reported that this "related" setting also works if the inner header is from another, different connection (i.e., artificial/forged icmp error). Add a test, followup patch will add additional "inner dst matches outer dst in reverse direction" check before setting related state. Link: https://www.synacktiv.com/posts/systems/icmp-reachable.html Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/Makefile | 2 +- .../selftests/netfilter/conntrack_icmp_related.sh | 283 +++++++++++++++++++++ 2 files changed, 284 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/netfilter/conntrack_icmp_related.sh diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index c9ff2b47bd1c..a37cb1192c6a 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for netfilter selftests -TEST_PROGS := nft_trans_stress.sh nft_nat.sh +TEST_PROGS := nft_trans_stress.sh nft_nat.sh conntrack_icmp_related.sh include ../lib.mk diff --git a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh new file mode 100755 index 000000000000..b48e1833bc89 --- /dev/null +++ b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh @@ -0,0 +1,283 @@ +#!/bin/bash +# +# check that ICMP df-needed/pkttoobig icmp are set are set as related +# state +# +# Setup is: +# +# nsclient1 -> nsrouter1 -> nsrouter2 -> nsclient2 +# MTU 1500, except for nsrouter2 <-> nsclient2 link (1280). +# ping nsclient2 from nsclient1, checking that conntrack did set RELATED +# 'fragmentation needed' icmp packet. +# +# In addition, nsrouter1 will perform IP masquerading, i.e. also +# check the icmp errors are propagated to the correct host as per +# nat of "established" icmp-echo "connection". + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +cleanup() { + for i in 1 2;do ip netns del nsclient$i;done + for i in 1 2;do ip netns del nsrouter$i;done +} + +ipv4() { + echo -n 192.168.$1.2 +} + +ipv6 () { + echo -n dead:$1::2 +} + +check_counter() +{ + ns=$1 + name=$2 + expect=$3 + local lret=0 + + cnt=$(ip netns exec $ns nft list counter inet filter "$name" | grep -q "$expect") + if [ $? -ne 0 ]; then + echo "ERROR: counter $name in $ns has unexpected value (expected $expect)" 1>&2 + ip netns exec $ns nft list counter inet filter "$name" 1>&2 + lret=1 + fi + + return $lret +} + +check_unknown() +{ + expect="packets 0 bytes 0" + for n in nsclient1 nsclient2 nsrouter1 nsrouter2; do + check_counter $n "unknown" "$expect" + if [ $? -ne 0 ] ;then + return 1 + fi + done + + return 0 +} + +for n in nsclient1 nsclient2 nsrouter1 nsrouter2; do + ip netns add $n + ip -net $n link set lo up +done + +DEV=veth0 +ip link add $DEV netns nsclient1 type veth peer name eth1 netns nsrouter1 +DEV=veth0 +ip link add $DEV netns nsclient2 type veth peer name eth1 netns nsrouter2 + +DEV=veth0 +ip link add $DEV netns nsrouter1 type veth peer name eth2 netns nsrouter2 + +DEV=veth0 +for i in 1 2; do + ip -net nsclient$i link set $DEV up + ip -net nsclient$i addr add $(ipv4 $i)/24 dev $DEV + ip -net nsclient$i addr add $(ipv6 $i)/64 dev $DEV +done + +ip -net nsrouter1 link set eth1 up +ip -net nsrouter1 link set veth0 up + +ip -net nsrouter2 link set eth1 up +ip -net nsrouter2 link set eth2 up + +ip -net nsclient1 route add default via 192.168.1.1 +ip -net nsclient1 -6 route add default via dead:1::1 + +ip -net nsclient2 route add default via 192.168.2.1 +ip -net nsclient2 route add default via dead:2::1 + +i=3 +ip -net nsrouter1 addr add 192.168.1.1/24 dev eth1 +ip -net nsrouter1 addr add 192.168.3.1/24 dev veth0 +ip -net nsrouter1 addr add dead:1::1/64 dev eth1 +ip -net nsrouter1 addr add dead:3::1/64 dev veth0 +ip -net nsrouter1 route add default via 192.168.3.10 +ip -net nsrouter1 -6 route add default via dead:3::10 + +ip -net nsrouter2 addr add 192.168.2.1/24 dev eth1 +ip -net nsrouter2 addr add 192.168.3.10/24 dev eth2 +ip -net nsrouter2 addr add dead:2::1/64 dev eth1 +ip -net nsrouter2 addr add dead:3::10/64 dev eth2 +ip -net nsrouter2 route add default via 192.168.3.1 +ip -net nsrouter2 route add default via dead:3::1 + +sleep 2 +for i in 4 6; do + ip netns exec nsrouter1 sysctl -q net.ipv$i.conf.all.forwarding=1 + ip netns exec nsrouter2 sysctl -q net.ipv$i.conf.all.forwarding=1 +done + +for netns in nsrouter1 nsrouter2; do +ip netns exec $netns nft -f - </dev/null +if [ $? -ne 0 ]; then + echo "ERROR: netns ip routing/connectivity broken" 1>&2 + cleanup + exit 1 +fi +ip netns exec nsclient1 ping6 -q -c 1 -s 1000 dead:2::2 >/dev/null +if [ $? -ne 0 ]; then + echo "ERROR: netns ipv6 routing/connectivity broken" 1>&2 + cleanup + exit 1 +fi + +check_unknown +if [ $? -ne 0 ]; then + ret=1 +fi + +expect="packets 0 bytes 0" +for netns in nsrouter1 nsrouter2 nsclient1;do + check_counter "$netns" "related" "$expect" + if [ $? -ne 0 ]; then + ret=1 + fi +done + +expect="packets 2 bytes 2076" +check_counter nsclient2 "new" "$expect" +if [ $? -ne 0 ]; then + ret=1 +fi + +ip netns exec nsclient1 ping -q -c 1 -s 1300 -M do 192.168.2.2 > /dev/null +if [ $? -eq 0 ]; then + echo "ERROR: ping should have failed with PMTU too big error" 1>&2 + ret=1 +fi + +# nsrouter2 should have generated the icmp error, so +# related counter should be 0 (its in forward). +expect="packets 0 bytes 0" +check_counter "nsrouter2" "related" "$expect" +if [ $? -ne 0 ]; then + ret=1 +fi + +# but nsrouter1 should have seen it, same for nsclient1. +expect="packets 1 bytes 576" +for netns in nsrouter1 nsclient1;do + check_counter "$netns" "related" "$expect" + if [ $? -ne 0 ]; then + ret=1 + fi +done + +ip netns exec nsclient1 ping6 -c 1 -s 1300 dead:2::2 > /dev/null +if [ $? -eq 0 ]; then + echo "ERROR: ping6 should have failed with PMTU too big error" 1>&2 + ret=1 +fi + +expect="packets 2 bytes 1856" +for netns in nsrouter1 nsclient1;do + check_counter "$netns" "related" "$expect" + if [ $? -ne 0 ]; then + ret=1 + fi +done + +if [ $ret -eq 0 ];then + echo "PASS: icmp mtu error had RELATED state" +else + echo "ERROR: icmp error RELATED state test has failed" +fi + +cleanup +exit $ret -- cgit v1.2.3 From 1025ce75212bf06d93910297a03ed6a4d41d8213 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 25 Mar 2019 23:11:54 +0100 Subject: netfilter: conntrack: don't set related state for different outer address Luca Moro says: ------ The issue lies in the filtering of ICMP and ICMPv6 errors that include an inner IP datagram. For these packets, icmp_error_message() extract the ICMP error and inner layer to search of a known state. If a state is found the packet is tagged as related (IP_CT_RELATED). The problem is that there is no correlation check between the inner and outer layer of the packet. So one can encapsulate an error with an inner layer matching a known state, while its outer layer is directed to a filtered host. In this case the whole packet will be tagged as related. This has various implications from a rule bypass (if a rule to related trafic is allow), to a known state oracle. Unfortunately, we could not find a real statement in a RFC on how this case should be filtered. The closest we found is RFC5927 (Section 4.3) but it is not very clear. A possible fix would be to check that the inner IP source is the same than the outer destination. We believed this kind of attack was not documented yet, so we started to write a blog post about it. You can find it attached to this mail (sorry for the extract quality). It contains more technical details, PoC and discussion about the identified behavior. We discovered later that https://www.gont.com.ar/papers/filtering-of-icmp-error-messages.pdf described a similar attack concept in 2004 but without the stateful filtering in mind. ----- This implements above suggested fix: In icmp(v6) error handler, take outer destination address, then pass that into the common function that does the "related" association. After obtaining the nf_conn of the matching inner-headers connection, check that the destination address of the opposite direction tuple is the same as the outer address and only set RELATED if thats the case. Reported-by: Luca Moro Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 6 ++ net/netfilter/nf_conntrack_proto_icmp.c | 93 +++++++++++++++++++++------- net/netfilter/nf_conntrack_proto_icmpv6.c | 52 ++-------------- 3 files changed, 84 insertions(+), 67 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 778087591983..a49edfdf47e8 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -75,6 +75,12 @@ bool nf_conntrack_invert_icmp_tuple(struct nf_conntrack_tuple *tuple, bool nf_conntrack_invert_icmpv6_tuple(struct nf_conntrack_tuple *tuple, const struct nf_conntrack_tuple *orig); +int nf_conntrack_inet_error(struct nf_conn *tmpl, struct sk_buff *skb, + unsigned int dataoff, + const struct nf_hook_state *state, + u8 l4proto, + union nf_inet_addr *outer_daddr); + int nf_conntrack_icmpv4_error(struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c index 7df477996b16..9becac953587 100644 --- a/net/netfilter/nf_conntrack_proto_icmp.c +++ b/net/netfilter/nf_conntrack_proto_icmp.c @@ -103,49 +103,94 @@ int nf_conntrack_icmp_packet(struct nf_conn *ct, return NF_ACCEPT; } -/* Returns conntrack if it dealt with ICMP, and filled in skb fields */ -static int -icmp_error_message(struct nf_conn *tmpl, struct sk_buff *skb, - const struct nf_hook_state *state) +/* Check inner header is related to any of the existing connections */ +int nf_conntrack_inet_error(struct nf_conn *tmpl, struct sk_buff *skb, + unsigned int dataoff, + const struct nf_hook_state *state, + u8 l4proto, union nf_inet_addr *outer_daddr) { struct nf_conntrack_tuple innertuple, origtuple; const struct nf_conntrack_tuple_hash *h; const struct nf_conntrack_zone *zone; enum ip_conntrack_info ctinfo; struct nf_conntrack_zone tmp; + union nf_inet_addr *ct_daddr; + enum ip_conntrack_dir dir; + struct nf_conn *ct; WARN_ON(skb_nfct(skb)); zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); /* Are they talking about one of our connections? */ - if (!nf_ct_get_tuplepr(skb, - skb_network_offset(skb) + ip_hdrlen(skb) - + sizeof(struct icmphdr), - PF_INET, state->net, &origtuple)) { - pr_debug("icmp_error_message: failed to get tuple\n"); + if (!nf_ct_get_tuplepr(skb, dataoff, + state->pf, state->net, &origtuple)) return -NF_ACCEPT; - } /* Ordinarily, we'd expect the inverted tupleproto, but it's been preserved inside the ICMP. */ - if (!nf_ct_invert_tuple(&innertuple, &origtuple)) { - pr_debug("icmp_error_message: no match\n"); + if (!nf_ct_invert_tuple(&innertuple, &origtuple)) return -NF_ACCEPT; - } - - ctinfo = IP_CT_RELATED; h = nf_conntrack_find_get(state->net, zone, &innertuple); - if (!h) { - pr_debug("icmp_error_message: no match\n"); + if (!h) + return -NF_ACCEPT; + + /* Consider: A -> T (=This machine) -> B + * Conntrack entry will look like this: + * Original: A->B + * Reply: B->T (SNAT case) OR A + * + * When this function runs, we got packet that looks like this: + * iphdr|icmphdr|inner_iphdr|l4header (tcp, udp, ..). + * + * Above nf_conntrack_find_get() makes lookup based on inner_hdr, + * so we should expect that destination of the found connection + * matches outer header destination address. + * + * In above example, we can consider these two cases: + * 1. Error coming in reply direction from B or M (middle box) to + * T (SNAT case) or A. + * Inner saddr will be B, dst will be T or A. + * The found conntrack will be reply tuple (B->T/A). + * 2. Error coming in original direction from A or M to B. + * Inner saddr will be A, inner daddr will be B. + * The found conntrack will be original tuple (A->B). + * + * In both cases, conntrack[dir].dst == inner.dst. + * + * A bogus packet could look like this: + * Inner: B->T + * Outer: B->X (other machine reachable by T). + * + * In this case, lookup yields connection A->B and will + * set packet from B->X as *RELATED*, even though no connection + * from X was ever seen. + */ + ct = nf_ct_tuplehash_to_ctrack(h); + dir = NF_CT_DIRECTION(h); + ct_daddr = &ct->tuplehash[dir].tuple.dst.u3; + if (!nf_inet_addr_cmp(outer_daddr, ct_daddr)) { + if (state->pf == AF_INET) { + nf_l4proto_log_invalid(skb, state->net, state->pf, + l4proto, + "outer daddr %pI4 != inner %pI4", + &outer_daddr->ip, &ct_daddr->ip); + } else if (state->pf == AF_INET6) { + nf_l4proto_log_invalid(skb, state->net, state->pf, + l4proto, + "outer daddr %pI6 != inner %pI6", + &outer_daddr->ip6, &ct_daddr->ip6); + } + nf_ct_put(ct); return -NF_ACCEPT; } - if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) + ctinfo = IP_CT_RELATED; + if (dir == IP_CT_DIR_REPLY) ctinfo += IP_CT_IS_REPLY; /* Update skb to refer to this connection */ - nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo); + nf_ct_set(skb, ct, ctinfo); return NF_ACCEPT; } @@ -162,11 +207,12 @@ int nf_conntrack_icmpv4_error(struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, const struct nf_hook_state *state) { + union nf_inet_addr outer_daddr; const struct icmphdr *icmph; struct icmphdr _ih; /* Not enough header? */ - icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih); + icmph = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih); if (icmph == NULL) { icmp_error_log(skb, state, "short packet"); return -NF_ACCEPT; @@ -199,7 +245,12 @@ int nf_conntrack_icmpv4_error(struct nf_conn *tmpl, icmph->type != ICMP_REDIRECT) return NF_ACCEPT; - return icmp_error_message(tmpl, skb, state); + memset(&outer_daddr, 0, sizeof(outer_daddr)); + outer_daddr.ip = ip_hdr(skb)->daddr; + + dataoff += sizeof(*icmph); + return nf_conntrack_inet_error(tmpl, skb, dataoff, state, + IPPROTO_ICMP, &outer_daddr); } #if IS_ENABLED(CONFIG_NF_CT_NETLINK) diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c index bec4a3211658..c63ee3612855 100644 --- a/net/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/netfilter/nf_conntrack_proto_icmpv6.c @@ -123,51 +123,6 @@ int nf_conntrack_icmpv6_packet(struct nf_conn *ct, return NF_ACCEPT; } -static int -icmpv6_error_message(struct net *net, struct nf_conn *tmpl, - struct sk_buff *skb, - unsigned int icmp6off) -{ - struct nf_conntrack_tuple intuple, origtuple; - const struct nf_conntrack_tuple_hash *h; - enum ip_conntrack_info ctinfo; - struct nf_conntrack_zone tmp; - - WARN_ON(skb_nfct(skb)); - - /* Are they talking about one of our connections? */ - if (!nf_ct_get_tuplepr(skb, - skb_network_offset(skb) - + sizeof(struct ipv6hdr) - + sizeof(struct icmp6hdr), - PF_INET6, net, &origtuple)) { - pr_debug("icmpv6_error: Can't get tuple\n"); - return -NF_ACCEPT; - } - - /* Ordinarily, we'd expect the inverted tupleproto, but it's - been preserved inside the ICMP. */ - if (!nf_ct_invert_tuple(&intuple, &origtuple)) { - pr_debug("icmpv6_error: Can't invert tuple\n"); - return -NF_ACCEPT; - } - - ctinfo = IP_CT_RELATED; - - h = nf_conntrack_find_get(net, nf_ct_zone_tmpl(tmpl, skb, &tmp), - &intuple); - if (!h) { - pr_debug("icmpv6_error: no match\n"); - return -NF_ACCEPT; - } else { - if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) - ctinfo += IP_CT_IS_REPLY; - } - - /* Update skb to refer to this connection */ - nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo); - return NF_ACCEPT; -} static void icmpv6_error_log(const struct sk_buff *skb, const struct nf_hook_state *state, @@ -182,6 +137,7 @@ int nf_conntrack_icmpv6_error(struct nf_conn *tmpl, unsigned int dataoff, const struct nf_hook_state *state) { + union nf_inet_addr outer_daddr; const struct icmp6hdr *icmp6h; struct icmp6hdr _ih; int type; @@ -210,7 +166,11 @@ int nf_conntrack_icmpv6_error(struct nf_conn *tmpl, if (icmp6h->icmp6_type >= 128) return NF_ACCEPT; - return icmpv6_error_message(state->net, tmpl, skb, dataoff); + memcpy(&outer_daddr.ip6, &ipv6_hdr(skb)->daddr, + sizeof(outer_daddr.ip6)); + dataoff += sizeof(*icmp6h); + return nf_conntrack_inet_error(tmpl, skb, dataoff, state, + IPPROTO_ICMPV6, &outer_daddr); } #if IS_ENABLED(CONFIG_NF_CT_NETLINK) -- cgit v1.2.3 From 8176c8332751bf27597488d6e45c9b8f530593bf Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Thu, 28 Mar 2019 10:47:20 +0100 Subject: netfilter: conntrack: initialize ct->timeout KMSAN started reporting an error when accessing ct->timeout for the first time without initialization: BUG: KMSAN: uninit-value in __nf_ct_refresh_acct+0x1ae/0x470 net/netfilter/nf_conntrack_core.c:1765 ... dump_stack+0x173/0x1d0 lib/dump_stack.c:113 kmsan_report+0x131/0x2a0 mm/kmsan/kmsan.c:624 __msan_warning+0x7a/0xf0 mm/kmsan/kmsan_instr.c:310 __nf_ct_refresh_acct+0x1ae/0x470 net/netfilter/nf_conntrack_core.c:1765 nf_ct_refresh_acct ./include/net/netfilter/nf_conntrack.h:201 nf_conntrack_udp_packet+0xb44/0x1040 net/netfilter/nf_conntrack_proto_udp.c:122 nf_conntrack_handle_packet net/netfilter/nf_conntrack_core.c:1605 nf_conntrack_in+0x1250/0x26c9 net/netfilter/nf_conntrack_core.c:1696 ... Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:205 kmsan_internal_poison_shadow+0x92/0x150 mm/kmsan/kmsan.c:159 kmsan_kmalloc+0xa9/0x130 mm/kmsan/kmsan_hooks.c:173 kmem_cache_alloc+0x554/0xb10 mm/slub.c:2789 __nf_conntrack_alloc+0x16f/0x690 net/netfilter/nf_conntrack_core.c:1342 init_conntrack+0x6cb/0x2490 net/netfilter/nf_conntrack_core.c:1421 Signed-off-by: Alexander Potapenko Fixes: cc16921351d8ba1 ("netfilter: conntrack: avoid same-timeout update") Cc: Florian Westphal Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 82bfbeef46af..a137d4e7f218 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1350,6 +1350,7 @@ __nf_conntrack_alloc(struct net *net, /* save hash for reusing when confirming */ *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash; ct->status = 0; + ct->timeout = 0; write_pnet(&ct->ct_net, net); memset(&ct->__nfct_init_offset[0], 0, offsetof(struct nf_conn, proto) - -- cgit v1.2.3 From 0261ea1bd1eb0da5c0792a9119b8655cf33c80a3 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 31 Mar 2019 13:24:52 +0300 Subject: ipvs: do not schedule icmp errors from tunnels We can receive ICMP errors from client or from tunneling real server. While the former can be scheduled to real server, the latter should not be scheduled, they are decapsulated only when existing connection is found. Fixes: 6044eeffafbe ("ipvs: attempt to schedule icmp packets") Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 43bbaa32b1d6..14457551bcb4 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1678,7 +1678,7 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related, if (!cp) { int v; - if (!sysctl_schedule_icmp(ipvs)) + if (ipip || !sysctl_schedule_icmp(ipvs)) return NF_ACCEPT; if (!ip_vs_try_to_schedule(ipvs, AF_INET, skb, pd, &v, &cp, &ciph)) -- cgit v1.2.3 From 06058632464845abb1af91521122fd04dd3daaec Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 13 Apr 2019 09:26:03 -0600 Subject: io_uring: park SQPOLL thread if it's percpu kthread expects this, or we can throw a warning on exit: WARNING: CPU: 0 PID: 7822 at kernel/kthread.c:399 __kthread_bind_mask+0x3b/0xc0 kernel/kthread.c:399 Kernel panic - not syncing: panic_on_warn set ... CPU: 0 PID: 7822 Comm: syz-executor030 Not tainted 5.1.0-rc4-next-20190412 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 panic+0x2cb/0x72b kernel/panic.c:214 __warn.cold+0x20/0x46 kernel/panic.c:576 report_bug+0x263/0x2b0 lib/bug.c:186 fixup_bug arch/x86/kernel/traps.c:179 [inline] fixup_bug arch/x86/kernel/traps.c:174 [inline] do_error_trap+0x11b/0x200 arch/x86/kernel/traps.c:272 do_invalid_op+0x37/0x50 arch/x86/kernel/traps.c:291 invalid_op+0x14/0x20 arch/x86/entry/entry_64.S:973 RIP: 0010:__kthread_bind_mask+0x3b/0xc0 kernel/kthread.c:399 Code: 48 89 fb e8 f7 ab 24 00 4c 89 e6 48 89 df e8 ac e1 02 00 31 ff 49 89 c4 48 89 c6 e8 7f ad 24 00 4d 85 e4 75 15 e8 d5 ab 24 00 <0f> 0b e8 ce ab 24 00 5b 41 5c 41 5d 41 5e 5d c3 e8 c0 ab 24 00 4c RSP: 0018:ffff8880a89bfbb8 EFLAGS: 00010293 RAX: ffff88808ca7a280 RBX: ffff8880a98e4380 RCX: ffffffff814bdd11 RDX: 0000000000000000 RSI: ffffffff814bdd1b RDI: 0000000000000007 RBP: ffff8880a89bfbd8 R08: ffff88808ca7a280 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: ffffffff87691148 R14: ffff8880a98e43a0 R15: ffffffff81c91e10 __kthread_bind kernel/kthread.c:412 [inline] kthread_unpark+0x123/0x160 kernel/kthread.c:480 kthread_stop+0xfa/0x6c0 kernel/kthread.c:556 io_sq_thread_stop fs/io_uring.c:2057 [inline] io_sq_thread_stop fs/io_uring.c:2052 [inline] io_finish_async+0xab/0x180 fs/io_uring.c:2064 io_ring_ctx_free fs/io_uring.c:2534 [inline] io_ring_ctx_wait_and_kill+0x133/0x510 fs/io_uring.c:2591 io_uring_release+0x42/0x50 fs/io_uring.c:2599 __fput+0x2e5/0x8d0 fs/file_table.c:278 ____fput+0x16/0x20 fs/file_table.c:309 task_work_run+0x14a/0x1c0 kernel/task_work.c:113 exit_task_work include/linux/task_work.h:22 [inline] do_exit+0x90a/0x2fa0 kernel/exit.c:876 do_group_exit+0x135/0x370 kernel/exit.c:980 __do_sys_exit_group kernel/exit.c:991 [inline] __se_sys_exit_group kernel/exit.c:989 [inline] __x64_sys_exit_group+0x44/0x50 kernel/exit.c:989 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Reported-by: syzbot+6d4a92619eb0ad08602b@syzkaller.appspotmail.com Fixes: 6c271ce2f1d5 ("io_uring: add submission polling") Signed-off-by: Jens Axboe --- fs/io_uring.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 89aa8412b5f5..e5008c1b82be 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1920,6 +1920,10 @@ static int io_sq_thread(void *data) unuse_mm(cur_mm); mmput(cur_mm); } + + if (kthread_should_park()) + kthread_parkme(); + return 0; } @@ -2054,6 +2058,7 @@ static void io_sq_thread_stop(struct io_ring_ctx *ctx) if (ctx->sqo_thread) { ctx->sqo_stop = 1; mb(); + kthread_park(ctx->sqo_thread); kthread_stop(ctx->sqo_thread); ctx->sqo_thread = NULL; } -- cgit v1.2.3 From 917257daa0fea7a007102691c0e27d9216a96768 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 13 Apr 2019 09:28:55 -0600 Subject: io_uring: only test SQPOLL cpu after we've verified it We currently call cpu_possible() even if we don't use the CPU. Move the test under the SQ_AFF branch, which is the only place where we'll use the value. Do the cpu_possible() test AFTER we've limited it to a max of NR_CPUS. This avoids triggering the following warning: WARNING: CPU: 1 PID: 7600 at include/linux/cpumask.h:121 cpu_max_bits_warn if CONFIG_DEBUG_PER_CPU_MAPS is enabled. While in there, also move the SQ thread idle period assignment inside SETUP_SQPOLL, as we don't use it otherwise either. Reported-by: syzbot+cd714a07c6de2bc34293@syzkaller.appspotmail.com Fixes: 6c271ce2f1d5 ("io_uring: add submission polling") Signed-off-by: Jens Axboe --- fs/io_uring.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index e5008c1b82be..24355e0c47f0 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2241,10 +2241,6 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx, mmgrab(current->mm); ctx->sqo_mm = current->mm; - ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle); - if (!ctx->sq_thread_idle) - ctx->sq_thread_idle = HZ; - ret = -EINVAL; if (!cpu_possible(p->sq_thread_cpu)) goto err; @@ -2254,10 +2250,18 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx, if (!capable(CAP_SYS_ADMIN)) goto err; + ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle); + if (!ctx->sq_thread_idle) + ctx->sq_thread_idle = HZ; + if (p->flags & IORING_SETUP_SQ_AFF) { int cpu; cpu = array_index_nospec(p->sq_thread_cpu, NR_CPUS); + ret = -EINVAL; + if (!cpu_possible(p->sq_thread_cpu)) + goto err; + ctx->sqo_thread = kthread_create_on_cpu(io_sq_thread, ctx, cpu, "io_uring-sq"); -- cgit v1.2.3 From 77f1e0a52d26242b6c2dba019f6ebebfb9ff701e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 18 Jan 2019 10:34:16 -0700 Subject: bfq: update internal depth state when queue depth changes A previous commit moved the shallow depth and BFQ depth map calculations to be done at init time, moving it outside of the hotter IO path. This potentially causes hangs if the users changes the depth of the scheduler map, by writing to the 'nr_requests' sysfs file for that device. Add a blk-mq-sched hook that allows blk-mq to inform the scheduler if the depth changes, so that the scheduler can update its internal state. Tested-by: Kai Krakow Reported-by: Paolo Valente Fixes: f0635b8a416e ("bfq: calculate shallow depths at init time") Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 8 +++++++- block/blk-mq.c | 2 ++ include/linux/elevator.h | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index dfb8cb0af13a..5ba1e0d841b4 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -5396,7 +5396,7 @@ static unsigned int bfq_update_depths(struct bfq_data *bfqd, return min_shallow; } -static int bfq_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int index) +static void bfq_depth_updated(struct blk_mq_hw_ctx *hctx) { struct bfq_data *bfqd = hctx->queue->elevator->elevator_data; struct blk_mq_tags *tags = hctx->sched_tags; @@ -5404,6 +5404,11 @@ static int bfq_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int index) min_shallow = bfq_update_depths(bfqd, &tags->bitmap_tags); sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, min_shallow); +} + +static int bfq_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int index) +{ + bfq_depth_updated(hctx); return 0; } @@ -5826,6 +5831,7 @@ static struct elevator_type iosched_bfq_mq = { .requests_merged = bfq_requests_merged, .request_merged = bfq_request_merged, .has_work = bfq_has_work, + .depth_updated = bfq_depth_updated, .init_hctx = bfq_init_hctx, .init_sched = bfq_init_queue, .exit_sched = bfq_exit_queue, diff --git a/block/blk-mq.c b/block/blk-mq.c index 9516304a38ee..fc60ed7e940e 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -3135,6 +3135,8 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) } if (ret) break; + if (q->elevator && q->elevator->type->ops.depth_updated) + q->elevator->type->ops.depth_updated(hctx); } if (!ret) diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 2e9e2763bf47..6e8bc53740f0 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -31,6 +31,7 @@ struct elevator_mq_ops { void (*exit_sched)(struct elevator_queue *); int (*init_hctx)(struct blk_mq_hw_ctx *, unsigned int); void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int); + void (*depth_updated)(struct blk_mq_hw_ctx *); bool (*allow_merge)(struct request_queue *, struct request *, struct bio *); bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *); -- cgit v1.2.3 From 3d6770fbd9353988839611bab107e4e891506aad Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 13 Apr 2019 11:50:54 -0600 Subject: io_uring: drop io_file_put() 'file' argument Since the fget/fput handling was reworked in commit 09bb839434bd, we never call io_file_put() with state == NULL (and hence file != NULL) anymore. Remove that case. Reported-by: Al Viro Signed-off-by: Jens Axboe --- fs/io_uring.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 24355e0c47f0..f4ddb9d23241 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -682,11 +682,9 @@ static void io_iopoll_req_issued(struct io_kiocb *req) list_add_tail(&req->list, &ctx->poll_list); } -static void io_file_put(struct io_submit_state *state, struct file *file) +static void io_file_put(struct io_submit_state *state) { - if (!state) { - fput(file); - } else if (state->file) { + if (state->file) { int diff = state->has_refs - state->used_refs; if (diff) @@ -711,7 +709,7 @@ static struct file *io_file_get(struct io_submit_state *state, int fd) state->ios_left--; return state->file; } - io_file_put(state, NULL); + io_file_put(state); } state->file = fget_many(fd, state->ios_left); if (!state->file) @@ -1671,7 +1669,7 @@ out: static void io_submit_state_end(struct io_submit_state *state) { blk_finish_plug(&state->plug); - io_file_put(state, NULL); + io_file_put(state); if (state->free_reqs) kmem_cache_free_bulk(req_cachep, state->free_reqs, &state->reqs[state->cur_req]); -- cgit v1.2.3 From 40fba00ffa431c8597ca785ea1cfa4d9f6503390 Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Wed, 10 Apr 2019 03:53:49 +0800 Subject: x86/resctrl: Do not repeat rdtgroup mode initialization When cache allocation is supported and the user creates a new resctrl resource group, the allocations of the new resource group are initialized to all regions that it can possibly use. At this time these regions are all that are shareable by other resource groups as well as regions that are not currently used. The new resource group's mode is also initialized to reflect this initialization and set to "shareable". The new resource group's mode is currently repeatedly initialized within the loop that configures the hardware with the resource group's default allocations. Move the initialization of the resource group's mode outside the hardware configuration loop. The resource group's mode is now initialized only once as the final step to reflect that its configured allocations are "shareable". Fixes: 95f0b77efa57 ("x86/intel_rdt: Initialize new resource group with sane defaults") Signed-off-by: Xiaochen Shen Signed-off-by: Borislav Petkov Reviewed-by: Fenghua Yu Acked-by: Reinette Chatre Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: pei.p.jia@intel.com Cc: Thomas Gleixner Cc: Tony Luck Cc: x86-ml Link: https://lkml.kernel.org/r/1554839629-5448-1-git-send-email-xiaochen.shen@intel.com --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 54b9eef3eea9..85212a32b54d 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -2610,9 +2610,10 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) rdt_last_cmd_puts("Failed to initialize allocations\n"); return ret; } - rdtgrp->mode = RDT_MODE_SHAREABLE; } + rdtgrp->mode = RDT_MODE_SHAREABLE; + return 0; } -- cgit v1.2.3 From f958d7b528b1b40c44cfda5eabe2d82760d868c3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 11 Apr 2019 10:06:20 -0700 Subject: mm: make page ref count overflow check tighter and more explicit We have a VM_BUG_ON() to check that the page reference count doesn't underflow (or get close to overflow) by checking the sign of the count. That's all fine, but we actually want to allow people to use a "get page ref unless it's already very high" helper function, and we want that one to use the sign of the page ref (without triggering this VM_BUG_ON). Change the VM_BUG_ON to only check for small underflows (or _very_ close to overflowing), and ignore overflows which have strayed into negative territory. Acked-by: Matthew Wilcox Cc: Jann Horn Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- include/linux/mm.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 80bb6408fe73..541d99b86aea 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -965,6 +965,10 @@ static inline bool is_pci_p2pdma_page(const struct page *page) } #endif /* CONFIG_DEV_PAGEMAP_OPS */ +/* 127: arbitrary random number, small enough to assemble well */ +#define page_ref_zero_or_close_to_overflow(page) \ + ((unsigned int) page_ref_count(page) + 127u <= 127u) + static inline void get_page(struct page *page) { page = compound_head(page); @@ -972,7 +976,7 @@ static inline void get_page(struct page *page) * Getting a normal page or the head of a compound page * requires to already have an elevated page->_refcount. */ - VM_BUG_ON_PAGE(page_ref_count(page) <= 0, page); + VM_BUG_ON_PAGE(page_ref_zero_or_close_to_overflow(page), page); page_ref_inc(page); } -- cgit v1.2.3 From 88b1a17dfc3ed7728316478fae0f5ad508f50397 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 11 Apr 2019 10:14:59 -0700 Subject: mm: add 'try_get_page()' helper function This is the same as the traditional 'get_page()' function, but instead of unconditionally incrementing the reference count of the page, it only does so if the count was "safe". It returns whether the reference count was incremented (and is marked __must_check, since the caller obviously has to be aware of it). Also like 'get_page()', you can't use this function unless you already had a reference to the page. The intent is that you can use this exactly like get_page(), but in situations where you want to limit the maximum reference count. The code currently does an unconditional WARN_ON_ONCE() if we ever hit the reference count issues (either zero or negative), as a notification that the conditional non-increment actually happened. NOTE! The count access for the "safety" check is inherently racy, but that doesn't matter since the buffer we use is basically half the range of the reference count (ie we look at the sign of the count). Acked-by: Matthew Wilcox Cc: Jann Horn Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- include/linux/mm.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index 541d99b86aea..7000ddd807e0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -980,6 +980,15 @@ static inline void get_page(struct page *page) page_ref_inc(page); } +static inline __must_check bool try_get_page(struct page *page) +{ + page = compound_head(page); + if (WARN_ON_ONCE(page_ref_count(page) <= 0)) + return false; + page_ref_inc(page); + return true; +} + static inline void put_page(struct page *page) { page = compound_head(page); -- cgit v1.2.3 From 8fde12ca79aff9b5ba951fce1a2641901b8d8e64 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 11 Apr 2019 10:49:19 -0700 Subject: mm: prevent get_user_pages() from overflowing page refcount If the page refcount wraps around past zero, it will be freed while there are still four billion references to it. One of the possible avenues for an attacker to try to make this happen is by doing direct IO on a page multiple times. This patch makes get_user_pages() refuse to take a new page reference if there are already more than two billion references to the page. Reported-by: Jann Horn Acked-by: Matthew Wilcox Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- mm/gup.c | 48 ++++++++++++++++++++++++++++++++++++------------ mm/hugetlb.c | 13 +++++++++++++ 2 files changed, 49 insertions(+), 12 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index 75029649baca..81e0bdefa2cc 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -157,8 +157,12 @@ retry: goto retry; } - if (flags & FOLL_GET) - get_page(page); + if (flags & FOLL_GET) { + if (unlikely(!try_get_page(page))) { + page = ERR_PTR(-ENOMEM); + goto out; + } + } if (flags & FOLL_TOUCH) { if ((flags & FOLL_WRITE) && !pte_dirty(pte) && !PageDirty(page)) @@ -295,7 +299,10 @@ retry_locked: if (pmd_trans_unstable(pmd)) ret = -EBUSY; } else { - get_page(page); + if (unlikely(!try_get_page(page))) { + spin_unlock(ptl); + return ERR_PTR(-ENOMEM); + } spin_unlock(ptl); lock_page(page); ret = split_huge_page(page); @@ -497,7 +504,10 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address, if (is_device_public_page(*page)) goto unmap; } - get_page(*page); + if (unlikely(!try_get_page(*page))) { + ret = -ENOMEM; + goto unmap; + } out: ret = 0; unmap: @@ -1393,6 +1403,20 @@ static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages) } } +/* + * Return the compund head page with ref appropriately incremented, + * or NULL if that failed. + */ +static inline struct page *try_get_compound_head(struct page *page, int refs) +{ + struct page *head = compound_head(page); + if (WARN_ON_ONCE(page_ref_count(head) < 0)) + return NULL; + if (unlikely(!page_cache_add_speculative(head, refs))) + return NULL; + return head; +} + #ifdef CONFIG_ARCH_HAS_PTE_SPECIAL static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) @@ -1427,9 +1451,9 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, VM_BUG_ON(!pfn_valid(pte_pfn(pte))); page = pte_page(pte); - head = compound_head(page); - if (!page_cache_get_speculative(head)) + head = try_get_compound_head(page, 1); + if (!head) goto pte_unmap; if (unlikely(pte_val(pte) != pte_val(*ptep))) { @@ -1568,8 +1592,8 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, refs++; } while (addr += PAGE_SIZE, addr != end); - head = compound_head(pmd_page(orig)); - if (!page_cache_add_speculative(head, refs)) { + head = try_get_compound_head(pmd_page(orig), refs); + if (!head) { *nr -= refs; return 0; } @@ -1606,8 +1630,8 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr, refs++; } while (addr += PAGE_SIZE, addr != end); - head = compound_head(pud_page(orig)); - if (!page_cache_add_speculative(head, refs)) { + head = try_get_compound_head(pud_page(orig), refs); + if (!head) { *nr -= refs; return 0; } @@ -1643,8 +1667,8 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr, refs++; } while (addr += PAGE_SIZE, addr != end); - head = compound_head(pgd_page(orig)); - if (!page_cache_add_speculative(head, refs)) { + head = try_get_compound_head(pgd_page(orig), refs); + if (!head) { *nr -= refs; return 0; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 8dfdffc34a99..c220315dc533 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4298,6 +4298,19 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, pfn_offset = (vaddr & ~huge_page_mask(h)) >> PAGE_SHIFT; page = pte_page(huge_ptep_get(pte)); + + /* + * Instead of doing 'try_get_page()' below in the same_page + * loop, just check the count once here. + */ + if (unlikely(page_count(page) <= 0)) { + if (pages) { + spin_unlock(ptl); + remainder = 0; + err = -ENOMEM; + break; + } + } same_page: if (pages) { pages[i] = mem_map_offset(page, pfn_offset); -- cgit v1.2.3 From 15fab63e1e57be9fdb5eec1bbc5916e9825e9acb Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 5 Apr 2019 14:02:10 -0700 Subject: fs: prevent page refcount overflow in pipe_buf_get Change pipe_buf_get() to return a bool indicating whether it succeeded in raising the refcount of the page (if the thing in the pipe is a page). This removes another mechanism for overflowing the page refcount. All callers converted to handle a failure. Reported-by: Jann Horn Signed-off-by: Matthew Wilcox Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- fs/fuse/dev.c | 12 ++++++------ fs/pipe.c | 4 ++-- fs/splice.c | 12 ++++++++++-- include/linux/pipe_fs_i.h | 10 ++++++---- kernel/trace/trace.c | 6 +++++- 5 files changed, 29 insertions(+), 15 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 809c0f2f9942..64f4de983468 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -2034,10 +2034,8 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len; ret = -EINVAL; - if (rem < len) { - pipe_unlock(pipe); - goto out; - } + if (rem < len) + goto out_free; rem = len; while (rem) { @@ -2055,7 +2053,9 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); pipe->nrbufs--; } else { - pipe_buf_get(pipe, ibuf); + if (!pipe_buf_get(pipe, ibuf)) + goto out_free; + *obuf = *ibuf; obuf->flags &= ~PIPE_BUF_FLAG_GIFT; obuf->len = rem; @@ -2078,11 +2078,11 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, ret = fuse_dev_do_write(fud, &cs, len); pipe_lock(pipe); +out_free: for (idx = 0; idx < nbuf; idx++) pipe_buf_release(pipe, &bufs[idx]); pipe_unlock(pipe); -out: kvfree(bufs); return ret; } diff --git a/fs/pipe.c b/fs/pipe.c index bdc5d3c0977d..b1543b85c14a 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -189,9 +189,9 @@ EXPORT_SYMBOL(generic_pipe_buf_steal); * in the tee() system call, when we duplicate the buffers in one * pipe into another. */ -void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf) +bool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { - get_page(buf->page); + return try_get_page(buf->page); } EXPORT_SYMBOL(generic_pipe_buf_get); diff --git a/fs/splice.c b/fs/splice.c index de2ede048473..f30af82b850d 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1588,7 +1588,11 @@ retry: * Get a reference to this pipe buffer, * so we can copy the contents over. */ - pipe_buf_get(ipipe, ibuf); + if (!pipe_buf_get(ipipe, ibuf)) { + if (ret == 0) + ret = -EFAULT; + break; + } *obuf = *ibuf; /* @@ -1660,7 +1664,11 @@ static int link_pipe(struct pipe_inode_info *ipipe, * Get a reference to this pipe buffer, * so we can copy the contents over. */ - pipe_buf_get(ipipe, ibuf); + if (!pipe_buf_get(ipipe, ibuf)) { + if (ret == 0) + ret = -EFAULT; + break; + } obuf = opipe->bufs + nbuf; *obuf = *ibuf; diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 5a3bb3b7c9ad..3f2a42c11e20 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -108,18 +108,20 @@ struct pipe_buf_operations { /* * Get a reference to the pipe buffer. */ - void (*get)(struct pipe_inode_info *, struct pipe_buffer *); + bool (*get)(struct pipe_inode_info *, struct pipe_buffer *); }; /** * pipe_buf_get - get a reference to a pipe_buffer * @pipe: the pipe that the buffer belongs to * @buf: the buffer to get a reference to + * + * Return: %true if the reference was successfully obtained. */ -static inline void pipe_buf_get(struct pipe_inode_info *pipe, +static inline __must_check bool pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { - buf->ops->get(pipe, buf); + return buf->ops->get(pipe, buf); } /** @@ -178,7 +180,7 @@ struct pipe_inode_info *alloc_pipe_info(void); void free_pipe_info(struct pipe_inode_info *); /* Generic pipe buffer ops functions */ -void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); +bool generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *); void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c4238b441624..0f300d488c9f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6835,12 +6835,16 @@ static void buffer_pipe_buf_release(struct pipe_inode_info *pipe, buf->private = 0; } -static void buffer_pipe_buf_get(struct pipe_inode_info *pipe, +static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { struct buffer_ref *ref = (struct buffer_ref *)buf->private; + if (ref->ref > INT_MAX/2) + return false; + ref->ref++; + return true; } /* Pipe buffer operations for a buffer. */ -- cgit v1.2.3 From c543cb4a5f07e09237ec0fc2c60c9f131b2c79ad Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 13 Apr 2019 17:32:21 -0700 Subject: ipv4: ensure rcu_read_lock() in ipv4_link_failure() fib_compute_spec_dst() needs to be called under rcu protection. syzbot reported : WARNING: suspicious RCU usage 5.1.0-rc4+ #165 Not tainted include/linux/inetdevice.h:220 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 1 lock held by swapper/0/0: #0: 0000000051b67925 ((&n->timer)){+.-.}, at: lockdep_copy_map include/linux/lockdep.h:170 [inline] #0: 0000000051b67925 ((&n->timer)){+.-.}, at: call_timer_fn+0xda/0x720 kernel/time/timer.c:1315 stack backtrace: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.1.0-rc4+ #165 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 lockdep_rcu_suspicious+0x153/0x15d kernel/locking/lockdep.c:5162 __in_dev_get_rcu include/linux/inetdevice.h:220 [inline] fib_compute_spec_dst+0xbbd/0x1030 net/ipv4/fib_frontend.c:294 spec_dst_fill net/ipv4/ip_options.c:245 [inline] __ip_options_compile+0x15a7/0x1a10 net/ipv4/ip_options.c:343 ipv4_link_failure+0x172/0x400 net/ipv4/route.c:1195 dst_link_failure include/net/dst.h:427 [inline] arp_error_report+0xd1/0x1c0 net/ipv4/arp.c:297 neigh_invalidate+0x24b/0x570 net/core/neighbour.c:995 neigh_timer_handler+0xc35/0xf30 net/core/neighbour.c:1081 call_timer_fn+0x190/0x720 kernel/time/timer.c:1325 expire_timers kernel/time/timer.c:1362 [inline] __run_timers kernel/time/timer.c:1681 [inline] __run_timers kernel/time/timer.c:1649 [inline] run_timer_softirq+0x652/0x1700 kernel/time/timer.c:1694 __do_softirq+0x266/0x95a kernel/softirq.c:293 invoke_softirq kernel/softirq.c:374 [inline] irq_exit+0x180/0x1d0 kernel/softirq.c:414 exiting_irq arch/x86/include/asm/apic.h:536 [inline] smp_apic_timer_interrupt+0x14a/0x570 arch/x86/kernel/apic/apic.c:1062 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:807 Fixes: ed0de45a1008 ("ipv4: recompile ip options in ipv4_link_failure") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Stephen Suryaputra Signed-off-by: David S. Miller --- net/ipv4/route.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 0206789bc2b7..88ce038dd495 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1185,14 +1185,20 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) static void ipv4_link_failure(struct sk_buff *skb) { - struct rtable *rt; struct ip_options opt; + struct rtable *rt; + int res; /* Recompile ip options since IPCB may not be valid anymore. */ memset(&opt, 0, sizeof(opt)); opt.optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr); - if (__ip_options_compile(dev_net(skb->dev), &opt, skb, NULL)) + + rcu_read_lock(); + res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL); + rcu_read_unlock(); + + if (res) return; __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt); -- cgit v1.2.3 From 9ac6bb1414ac0d45fe9cefbd1f5b06f0e1a3c98a Mon Sep 17 00:00:00 2001 From: Denis Bolotin Date: Sun, 14 Apr 2019 17:23:05 +0300 Subject: qed: Delete redundant doorbell recovery types DB_REC_DRY_RUN (running doorbell recovery without sending doorbells) is never used. DB_REC_ONCE (send a single doorbell from the doorbell recovery) is not needed anymore because by running the periodic handler we make sure we check the overflow status later instead. This patch is needed because in the next patches, the only doorbell recovery type being used is DB_REC_REAL_DEAL, and the fixes are much cleaner without this enum. Signed-off-by: Denis Bolotin Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed.h | 3 +- drivers/net/ethernet/qlogic/qed/qed_dev.c | 69 ++++++++++++------------------- drivers/net/ethernet/qlogic/qed/qed_int.c | 6 +-- drivers/net/ethernet/qlogic/qed/qed_int.h | 4 +- 4 files changed, 31 insertions(+), 51 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 43a57ec296fd..1514ec93b989 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -920,8 +920,7 @@ u16 qed_get_cm_pq_idx_llt_mtc(struct qed_hwfn *p_hwfn, u8 tc); /* doorbell recovery mechanism */ void qed_db_recovery_dp(struct qed_hwfn *p_hwfn); -void qed_db_recovery_execute(struct qed_hwfn *p_hwfn, - enum qed_db_rec_exec db_exec); +void qed_db_recovery_execute(struct qed_hwfn *p_hwfn); bool qed_edpm_enabled(struct qed_hwfn *p_hwfn); /* Other Linux specific common definitions */ diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 9df8c4b3b54e..da9df81d651d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -300,26 +300,19 @@ void qed_db_recovery_dp(struct qed_hwfn *p_hwfn) /* Ring the doorbell of a single doorbell recovery entry */ static void qed_db_recovery_ring(struct qed_hwfn *p_hwfn, - struct qed_db_recovery_entry *db_entry, - enum qed_db_rec_exec db_exec) -{ - if (db_exec != DB_REC_ONCE) { - /* Print according to width */ - if (db_entry->db_width == DB_REC_WIDTH_32B) { - DP_VERBOSE(p_hwfn, QED_MSG_SPQ, - "%s doorbell address %p data %x\n", - db_exec == DB_REC_DRY_RUN ? - "would have rung" : "ringing", - db_entry->db_addr, - *(u32 *)db_entry->db_data); - } else { - DP_VERBOSE(p_hwfn, QED_MSG_SPQ, - "%s doorbell address %p data %llx\n", - db_exec == DB_REC_DRY_RUN ? - "would have rung" : "ringing", - db_entry->db_addr, - *(u64 *)(db_entry->db_data)); - } + struct qed_db_recovery_entry *db_entry) +{ + /* Print according to width */ + if (db_entry->db_width == DB_REC_WIDTH_32B) { + DP_VERBOSE(p_hwfn, QED_MSG_SPQ, + "ringing doorbell address %p data %x\n", + db_entry->db_addr, + *(u32 *)db_entry->db_data); + } else { + DP_VERBOSE(p_hwfn, QED_MSG_SPQ, + "ringing doorbell address %p data %llx\n", + db_entry->db_addr, + *(u64 *)(db_entry->db_data)); } /* Sanity */ @@ -334,14 +327,12 @@ static void qed_db_recovery_ring(struct qed_hwfn *p_hwfn, wmb(); /* Ring the doorbell */ - if (db_exec == DB_REC_REAL_DEAL || db_exec == DB_REC_ONCE) { - if (db_entry->db_width == DB_REC_WIDTH_32B) - DIRECT_REG_WR(db_entry->db_addr, - *(u32 *)(db_entry->db_data)); - else - DIRECT_REG_WR64(db_entry->db_addr, - *(u64 *)(db_entry->db_data)); - } + if (db_entry->db_width == DB_REC_WIDTH_32B) + DIRECT_REG_WR(db_entry->db_addr, + *(u32 *)(db_entry->db_data)); + else + DIRECT_REG_WR64(db_entry->db_addr, + *(u64 *)(db_entry->db_data)); /* Flush the write combined buffer. Next doorbell may come from a * different entity to the same address... @@ -350,29 +341,21 @@ static void qed_db_recovery_ring(struct qed_hwfn *p_hwfn, } /* Traverse the doorbell recovery entry list and ring all the doorbells */ -void qed_db_recovery_execute(struct qed_hwfn *p_hwfn, - enum qed_db_rec_exec db_exec) +void qed_db_recovery_execute(struct qed_hwfn *p_hwfn) { struct qed_db_recovery_entry *db_entry = NULL; - if (db_exec != DB_REC_ONCE) { - DP_NOTICE(p_hwfn, - "Executing doorbell recovery. Counter was %d\n", - p_hwfn->db_recovery_info.db_recovery_counter); + DP_NOTICE(p_hwfn, "Executing doorbell recovery. Counter was %d\n", + p_hwfn->db_recovery_info.db_recovery_counter); - /* Track amount of times recovery was executed */ - p_hwfn->db_recovery_info.db_recovery_counter++; - } + /* Track amount of times recovery was executed */ + p_hwfn->db_recovery_info.db_recovery_counter++; /* Protect the list */ spin_lock_bh(&p_hwfn->db_recovery_info.lock); list_for_each_entry(db_entry, - &p_hwfn->db_recovery_info.list, list_entry) { - qed_db_recovery_ring(p_hwfn, db_entry, db_exec); - if (db_exec == DB_REC_ONCE) - break; - } - + &p_hwfn->db_recovery_info.list, list_entry) + qed_db_recovery_ring(p_hwfn, db_entry); spin_unlock_bh(&p_hwfn->db_recovery_info.lock); } diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c index e23980e301b6..3546e253c75f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.c +++ b/drivers/net/ethernet/qlogic/qed/qed_int.c @@ -411,10 +411,8 @@ int qed_db_rec_handler(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) overflow = qed_rd(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY); DP_NOTICE(p_hwfn, "PF Overflow sticky 0x%x\n", overflow); - if (!overflow) { - qed_db_recovery_execute(p_hwfn, DB_REC_ONCE); + if (!overflow) return 0; - } if (qed_edpm_enabled(p_hwfn)) { rc = qed_db_rec_flush_queue(p_hwfn, p_ptt); @@ -429,7 +427,7 @@ int qed_db_rec_handler(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY, 0x0); /* Repeat all last doorbells (doorbell drop recovery) */ - qed_db_recovery_execute(p_hwfn, DB_REC_REAL_DEAL); + qed_db_recovery_execute(p_hwfn); return 0; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.h b/drivers/net/ethernet/qlogic/qed/qed_int.h index 1f356ed4f761..d473b522afc5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.h +++ b/drivers/net/ethernet/qlogic/qed/qed_int.h @@ -192,8 +192,8 @@ void qed_int_disable_post_isr_release(struct qed_dev *cdev); /** * @brief - Doorbell Recovery handler. - * Run DB_REAL_DEAL doorbell recovery in case of PF overflow - * (and flush DORQ if needed), otherwise run DB_REC_ONCE. + * Run doorbell recovery in case of PF overflow (and flush DORQ if + * needed). * * @param p_hwfn * @param p_ptt -- cgit v1.2.3 From b61b04ad81d5f975349d66abbecabf96ba211140 Mon Sep 17 00:00:00 2001 From: Denis Bolotin Date: Sun, 14 Apr 2019 17:23:06 +0300 Subject: qed: Fix the doorbell address sanity check Fix the condition which verifies that doorbell address is inside the doorbell bar by checking that the end of the address is within range as well. Signed-off-by: Denis Bolotin Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_dev.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index da9df81d651d..866cdc86a3f2 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -102,11 +102,15 @@ static void qed_db_recovery_dp_entry(struct qed_hwfn *p_hwfn, /* Doorbell address sanity (address within doorbell bar range) */ static bool qed_db_rec_sanity(struct qed_dev *cdev, - void __iomem *db_addr, void *db_data) + void __iomem *db_addr, + enum qed_db_rec_width db_width, + void *db_data) { + u32 width = (db_width == DB_REC_WIDTH_32B) ? 32 : 64; + /* Make sure doorbell address is within the doorbell bar */ if (db_addr < cdev->doorbells || - (u8 __iomem *)db_addr > + (u8 __iomem *)db_addr + width > (u8 __iomem *)cdev->doorbells + cdev->db_size) { WARN(true, "Illegal doorbell address: %p. Legal range for doorbell addresses is [%p..%p]\n", @@ -159,7 +163,7 @@ int qed_db_recovery_add(struct qed_dev *cdev, } /* Sanitize doorbell address */ - if (!qed_db_rec_sanity(cdev, db_addr, db_data)) + if (!qed_db_rec_sanity(cdev, db_addr, db_width, db_data)) return -EINVAL; /* Obtain hwfn from doorbell address */ @@ -205,10 +209,6 @@ int qed_db_recovery_del(struct qed_dev *cdev, return 0; } - /* Sanitize doorbell address */ - if (!qed_db_rec_sanity(cdev, db_addr, db_data)) - return -EINVAL; - /* Obtain hwfn from doorbell address */ p_hwfn = qed_db_rec_find_hwfn(cdev, db_addr); @@ -317,7 +317,7 @@ static void qed_db_recovery_ring(struct qed_hwfn *p_hwfn, /* Sanity */ if (!qed_db_rec_sanity(p_hwfn->cdev, db_entry->db_addr, - db_entry->db_data)) + db_entry->db_width, db_entry->db_data)) return; /* Flush the write combined buffer. Since there are multiple doorbelling -- cgit v1.2.3 From d4476b8a6151b2dd86c09b5acec64f66430db55d Mon Sep 17 00:00:00 2001 From: Denis Bolotin Date: Sun, 14 Apr 2019 17:23:07 +0300 Subject: qed: Fix missing DORQ attentions When the DORQ (doorbell block) is overflowed, all PFs get attentions at the same time. If one PF finished handling the attention before another PF even started, the second PF might miss the DORQ's attention bit and not handle the attention at all. If the DORQ attention is missed and the issue is not resolved, another attention will not be sent, therefore each attention is treated as a potential DORQ attention. As a result, the attention callback is called more frequently so the debug print was moved to reduce its quantity. The number of periodic doorbell recovery handler schedules was reduced because it was the previous way to mitigating the missed attention issue. Signed-off-by: Denis Bolotin Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed.h | 1 + drivers/net/ethernet/qlogic/qed/qed_int.c | 20 ++++++++++++++++++-- drivers/net/ethernet/qlogic/qed/qed_main.c | 2 +- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 1514ec93b989..fcc2d745c375 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -436,6 +436,7 @@ struct qed_db_recovery_info { /* Lock to protect the doorbell recovery mechanism list */ spinlock_t lock; + bool dorq_attn; u32 db_recovery_counter; }; diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c index 3546e253c75f..fe3286fc956b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.c +++ b/drivers/net/ethernet/qlogic/qed/qed_int.c @@ -438,17 +438,19 @@ static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn) struct qed_ptt *p_ptt = p_hwfn->p_dpc_ptt; int rc; - int_sts = qed_rd(p_hwfn, p_ptt, DORQ_REG_INT_STS); - DP_NOTICE(p_hwfn->cdev, "DORQ attention. int_sts was %x\n", int_sts); + p_hwfn->db_recovery_info.dorq_attn = true; /* int_sts may be zero since all PFs were interrupted for doorbell * overflow but another one already handled it. Can abort here. If * This PF also requires overflow recovery we will be interrupted again. * The masked almost full indication may also be set. Ignoring. */ + int_sts = qed_rd(p_hwfn, p_ptt, DORQ_REG_INT_STS); if (!(int_sts & ~DORQ_REG_INT_STS_DORQ_FIFO_AFULL)) return 0; + DP_NOTICE(p_hwfn->cdev, "DORQ attention. int_sts was %x\n", int_sts); + /* check if db_drop or overflow happened */ if (int_sts & (DORQ_REG_INT_STS_DB_DROP | DORQ_REG_INT_STS_DORQ_FIFO_OVFL_ERR)) { @@ -505,6 +507,17 @@ static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn) return -EINVAL; } +static void qed_dorq_attn_handler(struct qed_hwfn *p_hwfn) +{ + if (p_hwfn->db_recovery_info.dorq_attn) + goto out; + + /* Call DORQ callback if the attention was missed */ + qed_dorq_attn_cb(p_hwfn); +out: + p_hwfn->db_recovery_info.dorq_attn = false; +} + /* Instead of major changes to the data-structure, we have a some 'special' * identifiers for sources that changed meaning between adapters. */ @@ -1078,6 +1091,9 @@ static int qed_int_deassertion(struct qed_hwfn *p_hwfn, } } + /* Handle missed DORQ attention */ + qed_dorq_attn_handler(p_hwfn); + /* Clear IGU indication for the deasserted bits */ DIRECT_REG_WR((u8 __iomem *)p_hwfn->regview + GTT_BAR0_MAP_REG_IGU_CMD + diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index f164d4acebcb..6de23b56b294 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -970,7 +970,7 @@ static void qed_update_pf_params(struct qed_dev *cdev, } } -#define QED_PERIODIC_DB_REC_COUNT 100 +#define QED_PERIODIC_DB_REC_COUNT 10 #define QED_PERIODIC_DB_REC_INTERVAL_MS 100 #define QED_PERIODIC_DB_REC_INTERVAL \ msecs_to_jiffies(QED_PERIODIC_DB_REC_INTERVAL_MS) -- cgit v1.2.3 From 0d72c2ac89185f179da1e8a91c40c82f3fa38f0b Mon Sep 17 00:00:00 2001 From: Denis Bolotin Date: Sun, 14 Apr 2019 17:23:08 +0300 Subject: qed: Fix the DORQ's attentions handling Separate the overflow handling from the hardware interrupt status analysis. The interrupt status is a single register and is common for all PFs. The first PF reading the register is not necessarily the one who overflowed. All PFs must check their overflow status on every attention. In this change we clear the sticky indication in the attention handler to allow doorbells to be processed again as soon as possible, but running the doorbell recovery is scheduled for the periodic handler to reduce the time spent in the attention handler. Checking the need for DORQ flush was changed to "db_bar_no_edpm" because qed_edpm_enabled()'s result could change dynamically and might have prevented a needed flush. Signed-off-by: Denis Bolotin Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed.h | 3 ++ drivers/net/ethernet/qlogic/qed/qed_int.c | 61 +++++++++++++++++++++++-------- 2 files changed, 48 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index fcc2d745c375..127c89b22ef0 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -431,6 +431,8 @@ struct qed_qm_info { u8 num_pf_rls; }; +#define QED_OVERFLOW_BIT 1 + struct qed_db_recovery_info { struct list_head list; @@ -438,6 +440,7 @@ struct qed_db_recovery_info { spinlock_t lock; bool dorq_attn; u32 db_recovery_counter; + unsigned long overflow; }; struct storm_stats { diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c index fe3286fc956b..8848d5bed6e5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.c +++ b/drivers/net/ethernet/qlogic/qed/qed_int.c @@ -378,6 +378,9 @@ static int qed_db_rec_flush_queue(struct qed_hwfn *p_hwfn, u32 count = QED_DB_REC_COUNT; u32 usage = 1; + /* Flush any pending (e)dpms as they may never arrive */ + qed_wr(p_hwfn, p_ptt, DORQ_REG_DPM_FORCE_ABORT, 0x1); + /* wait for usage to zero or count to run out. This is necessary since * EDPM doorbell transactions can take multiple 64b cycles, and as such * can "split" over the pci. Possibly, the doorbell drop can happen with @@ -406,23 +409,24 @@ static int qed_db_rec_flush_queue(struct qed_hwfn *p_hwfn, int qed_db_rec_handler(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { - u32 overflow; + u32 attn_ovfl, cur_ovfl; int rc; - overflow = qed_rd(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY); - DP_NOTICE(p_hwfn, "PF Overflow sticky 0x%x\n", overflow); - if (!overflow) + attn_ovfl = test_and_clear_bit(QED_OVERFLOW_BIT, + &p_hwfn->db_recovery_info.overflow); + cur_ovfl = qed_rd(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY); + if (!cur_ovfl && !attn_ovfl) return 0; - if (qed_edpm_enabled(p_hwfn)) { + DP_NOTICE(p_hwfn, "PF Overflow sticky: attn %u current %u\n", + attn_ovfl, cur_ovfl); + + if (cur_ovfl && !p_hwfn->db_bar_no_edpm) { rc = qed_db_rec_flush_queue(p_hwfn, p_ptt); if (rc) return rc; } - /* Flush any pending (e)dpm as they may never arrive */ - qed_wr(p_hwfn, p_ptt, DORQ_REG_DPM_FORCE_ABORT, 0x1); - /* Release overflow sticky indication (stop silently dropping everything) */ qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY, 0x0); @@ -432,13 +436,35 @@ int qed_db_rec_handler(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) return 0; } -static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn) +static void qed_dorq_attn_overflow(struct qed_hwfn *p_hwfn) { - u32 int_sts, first_drop_reason, details, address, all_drops_reason; struct qed_ptt *p_ptt = p_hwfn->p_dpc_ptt; + u32 overflow; int rc; - p_hwfn->db_recovery_info.dorq_attn = true; + overflow = qed_rd(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY); + if (!overflow) + goto out; + + /* Run PF doorbell recovery in next periodic handler */ + set_bit(QED_OVERFLOW_BIT, &p_hwfn->db_recovery_info.overflow); + + if (!p_hwfn->db_bar_no_edpm) { + rc = qed_db_rec_flush_queue(p_hwfn, p_ptt); + if (rc) + goto out; + } + + qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY, 0x0); +out: + /* Schedule the handler even if overflow was not detected */ + qed_periodic_db_rec_start(p_hwfn); +} + +static int qed_dorq_attn_int_sts(struct qed_hwfn *p_hwfn) +{ + u32 int_sts, first_drop_reason, details, address, all_drops_reason; + struct qed_ptt *p_ptt = p_hwfn->p_dpc_ptt; /* int_sts may be zero since all PFs were interrupted for doorbell * overflow but another one already handled it. Can abort here. If @@ -477,11 +503,6 @@ static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn) GET_FIELD(details, QED_DORQ_ATTENTION_SIZE) * 4, first_drop_reason, all_drops_reason); - rc = qed_db_rec_handler(p_hwfn, p_ptt); - qed_periodic_db_rec_start(p_hwfn); - if (rc) - return rc; - /* Clear the doorbell drop details and prepare for next drop */ qed_wr(p_hwfn, p_ptt, DORQ_REG_DB_DROP_DETAILS_REL, 0); @@ -507,6 +528,14 @@ static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn) return -EINVAL; } +static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn) +{ + p_hwfn->db_recovery_info.dorq_attn = true; + qed_dorq_attn_overflow(p_hwfn); + + return qed_dorq_attn_int_sts(p_hwfn); +} + static void qed_dorq_attn_handler(struct qed_hwfn *p_hwfn) { if (p_hwfn->db_recovery_info.dorq_attn) -- cgit v1.2.3 From 2f5fb19341883bb6e37da351bc3700489d8506a7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Apr 2019 19:51:06 +0200 Subject: x86/speculation: Prevent deadlock on ssb_state::lock Mikhail reported a lockdep splat related to the AMD specific ssb_state lock: CPU0 CPU1 lock(&st->lock); local_irq_disable(); lock(&(&sighand->siglock)->rlock); lock(&st->lock); lock(&(&sighand->siglock)->rlock); *** DEADLOCK *** The connection between sighand->siglock and st->lock comes through seccomp, which takes st->lock while holding sighand->siglock. Make sure interrupts are disabled when __speculation_ctrl_update() is invoked via prctl() -> speculation_ctrl_update(). Add a lockdep assert to catch future offenders. Fixes: 1f50ddb4f418 ("x86/speculation: Handle HT correctly on AMD") Reported-by: Mikhail Gavrilov Signed-off-by: Thomas Gleixner Tested-by: Mikhail Gavrilov Cc: Thomas Lendacky Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1904141948200.4917@nanos.tec.linutronix.de --- arch/x86/kernel/process.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 58ac7be52c7a..957eae13b370 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -426,6 +426,8 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, u64 msr = x86_spec_ctrl_base; bool updmsr = false; + lockdep_assert_irqs_disabled(); + /* * If TIF_SSBD is different, select the proper mitigation * method. Note that if SSBD mitigation is disabled or permanentely @@ -477,10 +479,12 @@ static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) void speculation_ctrl_update(unsigned long tif) { + unsigned long flags; + /* Forced update. Make sure all relevant TIF flags are different */ - preempt_disable(); + local_irq_save(flags); __speculation_ctrl_update(~tif, tif); - preempt_enable(); + local_irq_restore(flags); } /* Called from seccomp/prctl update */ -- cgit v1.2.3 From 69f23a09daf9790acb801aaef4bc7aea6f69eec1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 14 Apr 2019 11:02:05 -0700 Subject: rtnetlink: fix rtnl_valid_stats_req() nlmsg_len check Jakub forgot to either use nlmsg_len() or nlmsg_msg_size(), allowing KMSAN to detect a possible uninit-value in rtnl_stats_get BUG: KMSAN: uninit-value in rtnl_stats_get+0x6d9/0x11d0 net/core/rtnetlink.c:4997 CPU: 0 PID: 10428 Comm: syz-executor034 Not tainted 5.1.0-rc2+ #24 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x173/0x1d0 lib/dump_stack.c:113 kmsan_report+0x131/0x2a0 mm/kmsan/kmsan.c:619 __msan_warning+0x7a/0xf0 mm/kmsan/kmsan_instr.c:310 rtnl_stats_get+0x6d9/0x11d0 net/core/rtnetlink.c:4997 rtnetlink_rcv_msg+0x115b/0x1550 net/core/rtnetlink.c:5192 netlink_rcv_skb+0x431/0x620 net/netlink/af_netlink.c:2485 rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:5210 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] netlink_unicast+0xf3e/0x1020 net/netlink/af_netlink.c:1336 netlink_sendmsg+0x127f/0x1300 net/netlink/af_netlink.c:1925 sock_sendmsg_nosec net/socket.c:622 [inline] sock_sendmsg net/socket.c:632 [inline] ___sys_sendmsg+0xdb3/0x1220 net/socket.c:2137 __sys_sendmsg net/socket.c:2175 [inline] __do_sys_sendmsg net/socket.c:2184 [inline] __se_sys_sendmsg+0x305/0x460 net/socket.c:2182 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2182 do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 Fixes: 51bc860d4a99 ("rtnetlink: stats: validate attributes in get as well as dumps") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Jakub Kicinski Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a51cab95ba64..220c56e93659 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4948,7 +4948,7 @@ static int rtnl_valid_stats_req(const struct nlmsghdr *nlh, bool strict_check, { struct if_stats_msg *ifsm; - if (nlh->nlmsg_len < sizeof(*ifsm)) { + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifsm))) { NL_SET_ERR_MSG(extack, "Invalid header for stats dump"); return -EINVAL; } -- cgit v1.2.3 From dc4060a5dc2557e6b5aa813bf5b73677299d62d2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 14 Apr 2019 15:17:41 -0700 Subject: Linux 5.1-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 15c8251d4d5e..71fd5c2ce067 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Shy Crocodile # *DOCUMENTATION* -- cgit v1.2.3 From c238bfe0be9ef7420f7669a69e27c8c8f4d8a568 Mon Sep 17 00:00:00 2001 From: David Francis Date: Fri, 29 Mar 2019 13:23:15 -0400 Subject: drm/amd/display: If one stream full updates, full update all planes [Why] On some compositors, with two monitors attached, VT terminal switch can cause a graphical issue by the following means: There are two streams, one for each monitor. Each stream has one plane current state: M1:S1->P1 M2:S2->P2 The user calls for a terminal switch and a commit is made to change both planes to linear swizzle mode. In atomic check, a new dc_state is constructed with new planes on each stream new state: M1:S1->P3 M2:S2->P4 In commit tail, each stream is committed, one at a time. The first stream (S1) updates properly, triggerring a full update and replacing the state current state: M1:S1->P3 M2:S2->P4 The update for S2 comes in, but dc detects that there is no difference between the stream and plane in the new and current states, and so triggers a fast update. The fast update does not program swizzle, so the second monitor is corrupted [How] Add a flag to dc_plane_state that forces full updates When a stream undergoes a full update, set this flag on all changed planes, then clear it on the current stream Subsequent streams will get full updates as a result Signed-off-by: David Francis Signed-off-by: Nicholas Kazlauskas Reviewed-by: Roman Li Acked-by: Bhawanpreet Lakha Acked-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 19 +++++++++++++++++++ drivers/gpu/drm/amd/display/dc/dc.h | 3 +++ 2 files changed, 22 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index c68fbd55db3c..a6cda201c964 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1377,6 +1377,11 @@ static enum surface_update_type det_surface_update(const struct dc *dc, return UPDATE_TYPE_FULL; } + if (u->surface->force_full_update) { + update_flags->bits.full_update = 1; + return UPDATE_TYPE_FULL; + } + type = get_plane_info_update_type(u); elevate_update_type(&overall_type, type); @@ -1802,6 +1807,14 @@ void dc_commit_updates_for_stream(struct dc *dc, } dc_resource_state_copy_construct(state, context); + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + + if (new_pipe->plane_state && new_pipe->plane_state != old_pipe->plane_state) + new_pipe->plane_state->force_full_update = true; + } } @@ -1838,6 +1851,12 @@ void dc_commit_updates_for_stream(struct dc *dc, dc->current_state = context; dc_release_state(old); + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (pipe_ctx->plane_state && pipe_ctx->stream == stream) + pipe_ctx->plane_state->force_full_update = false; + } } /*let's use current_state to update watermark etc*/ if (update_type >= UPDATE_TYPE_FULL) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 1a7fd6aa77eb..0515095574e7 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -503,6 +503,9 @@ struct dc_plane_state { struct dc_plane_status status; struct dc_context *ctx; + /* HACK: Workaround for forcing full reprogramming under some conditions */ + bool force_full_update; + /* private to dc_surface.c */ enum dc_irq_source irq_source; struct kref refcount; -- cgit v1.2.3 From 3c79107631db1f7fd32cf3f7368e4672004a3010 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 1 Apr 2019 13:08:54 +0200 Subject: netfilter: ctnetlink: don't use conntrack/expect object addresses as id else, we leak the addresses to userspace via ctnetlink events and dumps. Compute an ID on demand based on the immutable parts of nf_conn struct. Another advantage compared to using an address is that there is no immediate re-use of the same ID in case the conntrack entry is freed and reallocated again immediately. Fixes: 3583240249ef ("[NETFILTER]: nf_conntrack_expect: kill unique ID") Fixes: 7f85f914721f ("[NETFILTER]: nf_conntrack: kill unique ID") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 2 ++ net/netfilter/nf_conntrack_core.c | 35 +++++++++++++++++++++++++++++++++++ net/netfilter/nf_conntrack_netlink.c | 34 +++++++++++++++++++++++++++++----- 3 files changed, 66 insertions(+), 5 deletions(-) diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 5ee7b30b4917..d2bc733a2ef1 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -316,6 +316,8 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net, gfp_t flags); void nf_ct_tmpl_free(struct nf_conn *tmpl); +u32 nf_ct_get_id(const struct nf_conn *ct); + static inline void nf_ct_set(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info info) { diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index a137d4e7f218..3c48d44d6fff 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -449,6 +450,40 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, } EXPORT_SYMBOL_GPL(nf_ct_invert_tuple); +/* Generate a almost-unique pseudo-id for a given conntrack. + * + * intentionally doesn't re-use any of the seeds used for hash + * table location, we assume id gets exposed to userspace. + * + * Following nf_conn items do not change throughout lifetime + * of the nf_conn after it has been committed to main hash table: + * + * 1. nf_conn address + * 2. nf_conn->ext address + * 3. nf_conn->master address (normally NULL) + * 4. tuple + * 5. the associated net namespace + */ +u32 nf_ct_get_id(const struct nf_conn *ct) +{ + static __read_mostly siphash_key_t ct_id_seed; + unsigned long a, b, c, d; + + net_get_random_once(&ct_id_seed, sizeof(ct_id_seed)); + + a = (unsigned long)ct; + b = (unsigned long)ct->master ^ net_hash_mix(nf_ct_net(ct)); + c = (unsigned long)ct->ext; + d = (unsigned long)siphash(&ct->tuplehash, sizeof(ct->tuplehash), + &ct_id_seed); +#ifdef CONFIG_64BIT + return siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &ct_id_seed); +#else + return siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &ct_id_seed); +#endif +} +EXPORT_SYMBOL_GPL(nf_ct_get_id); + static void clean_from_lists(struct nf_conn *ct) { diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 66c596d287a5..d7f61b0547c6 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -485,7 +486,9 @@ nla_put_failure: static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct) { - if (nla_put_be32(skb, CTA_ID, htonl((unsigned long)ct))) + __be32 id = (__force __be32)nf_ct_get_id(ct); + + if (nla_put_be32(skb, CTA_ID, id)) goto nla_put_failure; return 0; @@ -1286,8 +1289,9 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl, } if (cda[CTA_ID]) { - u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID])); - if (id != (u32)(unsigned long)ct) { + __be32 id = nla_get_be32(cda[CTA_ID]); + + if (id != (__force __be32)nf_ct_get_id(ct)) { nf_ct_put(ct); return -ENOENT; } @@ -2692,6 +2696,25 @@ nla_put_failure: static const union nf_inet_addr any_addr; +static __be32 nf_expect_get_id(const struct nf_conntrack_expect *exp) +{ + static __read_mostly siphash_key_t exp_id_seed; + unsigned long a, b, c, d; + + net_get_random_once(&exp_id_seed, sizeof(exp_id_seed)); + + a = (unsigned long)exp; + b = (unsigned long)exp->helper; + c = (unsigned long)exp->master; + d = (unsigned long)siphash(&exp->tuple, sizeof(exp->tuple), &exp_id_seed); + +#ifdef CONFIG_64BIT + return (__force __be32)siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &exp_id_seed); +#else + return (__force __be32)siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &exp_id_seed); +#endif +} + static int ctnetlink_exp_dump_expect(struct sk_buff *skb, const struct nf_conntrack_expect *exp) @@ -2739,7 +2762,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, } #endif if (nla_put_be32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout)) || - nla_put_be32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp)) || + nla_put_be32(skb, CTA_EXPECT_ID, nf_expect_get_id(exp)) || nla_put_be32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags)) || nla_put_be32(skb, CTA_EXPECT_CLASS, htonl(exp->class))) goto nla_put_failure; @@ -3044,7 +3067,8 @@ static int ctnetlink_get_expect(struct net *net, struct sock *ctnl, if (cda[CTA_EXPECT_ID]) { __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]); - if (ntohl(id) != (u32)(unsigned long)exp) { + + if (id != nf_expect_get_id(exp)) { nf_ct_expect_put(exp); return -ENOENT; } -- cgit v1.2.3 From 33d1c018179d0a30c39cc5f1682b77867282694b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 6 Apr 2019 08:26:52 +0300 Subject: netfilter: nf_tables: prevent shift wrap in nft_chain_parse_hook() I believe that "hook->num" can be up to UINT_MAX. Shifting more than 31 bits would is undefined in C but in practice it would lead to shift wrapping. That would lead to an array overflow in nf_tables_addchain(): ops->hook = hook.type->hooks[ops->hooknum]; Fixes: fe19c04ca137 ("netfilter: nf_tables: remove nhooks field from struct nft_af_info") Signed-off-by: Dan Carpenter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index ef7772e976cc..1606eaa5ae0d 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1545,7 +1545,7 @@ static int nft_chain_parse_hook(struct net *net, if (IS_ERR(type)) return PTR_ERR(type); } - if (!(type->hook_mask & (1 << hook->num))) + if (hook->num > NF_MAX_HOOKS || !(type->hook_mask & (1 << hook->num))) return -EOPNOTSUPP; if (type->type == NFT_CHAIN_T_NAT && -- cgit v1.2.3 From 5bdac418f33f60b07a34e01e722889140ee8fac9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 9 Apr 2019 14:45:20 +0200 Subject: netfilter: nat: fix icmp id randomization Sven Auhagen reported that a 2nd ping request will fail if 'fully-random' mode is used. Reason is that if no proto information is given, min/max are both 0, so we set the icmp id to 0 instead of chosing a random value between 0 and 65535. Update test case as well to catch this, without fix this yields: [..] ERROR: cannot ping ns1 from ns2 with ip masquerade fully-random (attempt 2) ERROR: cannot ping ns1 from ns2 with ipv6 masquerade fully-random (attempt 2) ... becaus 2nd ping clashes with existing 'id 0' icmp conntrack and gets dropped. Fixes: 203f2e78200c27e ("netfilter: nat: remove l4proto->unique_tuple") Reported-by: Sven Auhagen Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_core.c | 11 ++++++--- tools/testing/selftests/netfilter/nft_nat.sh | 36 +++++++++++++++++++++------- 2 files changed, 35 insertions(+), 12 deletions(-) diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index af7dc6537758..000952719adf 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -415,9 +415,14 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple, case IPPROTO_ICMPV6: /* id is same for either direction... */ keyptr = &tuple->src.u.icmp.id; - min = range->min_proto.icmp.id; - range_size = ntohs(range->max_proto.icmp.id) - - ntohs(range->min_proto.icmp.id) + 1; + if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) { + min = 0; + range_size = 65536; + } else { + min = ntohs(range->min_proto.icmp.id); + range_size = ntohs(range->max_proto.icmp.id) - + ntohs(range->min_proto.icmp.id) + 1; + } goto find_free_id; #if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE) case IPPROTO_GRE: diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh index 8ec76681605c..3194007cf8d1 100755 --- a/tools/testing/selftests/netfilter/nft_nat.sh +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -321,6 +321,7 @@ EOF test_masquerade6() { + local natflags=$1 local lret=0 ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null @@ -354,13 +355,13 @@ ip netns exec ns0 nft -f - < /dev/null # ping ns2->ns1 if [ $? -ne 0 ] ; then - echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerading" + echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerade $natflags" lret=1 fi @@ -397,19 +398,26 @@ EOF fi done + ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerade $natflags (attempt 2)" + lret=1 + fi + ip netns exec ns0 nft flush chain ip6 nat postrouting if [ $? -ne 0 ]; then echo "ERROR: Could not flush ip6 nat postrouting" 1>&2 lret=1 fi - test $lret -eq 0 && echo "PASS: IPv6 masquerade for ns2" + test $lret -eq 0 && echo "PASS: IPv6 masquerade $natflags for ns2" return $lret } test_masquerade() { + local natflags=$1 local lret=0 ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null @@ -417,7 +425,7 @@ test_masquerade() ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 if [ $? -ne 0 ] ; then - echo "ERROR: canot ping ns1 from ns2" + echo "ERROR: cannot ping ns1 from ns2 $natflags" lret=1 fi @@ -443,13 +451,13 @@ ip netns exec ns0 nft -f - < /dev/null # ping ns2->ns1 if [ $? -ne 0 ] ; then - echo "ERROR: cannot ping ns1 from ns2 with active ip masquerading" + echo "ERROR: cannot ping ns1 from ns2 with active ip masquere $natflags" lret=1 fi @@ -485,13 +493,19 @@ EOF fi done + ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ip masquerade $natflags (attempt 2)" + lret=1 + fi + ip netns exec ns0 nft flush chain ip nat postrouting if [ $? -ne 0 ]; then echo "ERROR: Could not flush nat postrouting" 1>&2 lret=1 fi - test $lret -eq 0 && echo "PASS: IP masquerade for ns2" + test $lret -eq 0 && echo "PASS: IP masquerade $natflags for ns2" return $lret } @@ -750,8 +764,12 @@ test_local_dnat test_local_dnat6 reset_counters -test_masquerade -test_masquerade6 +test_masquerade "" +test_masquerade6 "" + +reset_counters +test_masquerade "fully-random" +test_masquerade6 "fully-random" reset_counters test_redirect -- cgit v1.2.3 From 6b1f16ba730d4c0cda1247568c3a1bf4fa3a2f2f Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Fri, 12 Apr 2019 11:04:50 +0200 Subject: s390/pkey: add one more argument space for debug feature entry The debug feature entries have been used with up to 5 arguents (including the pointer to the format string) but there was only space reserved for 4 arguemnts. So now the registration does reserve space for 5 times a long value. This fixes a sometime appearing weired value as the last value of an debug feature entry like this: ... pkey_sec2protkey zcrypt_send_cprb (cardnr=10 domain=12) failed with errno -2143346254 Signed-off-by: Harald Freudenberger Reported-by: Christian Rund Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/pkey_api.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index 3e85d665c572..45eb0c14b880 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -51,7 +51,8 @@ static debug_info_t *debug_info; static void __init pkey_debug_init(void) { - debug_info = debug_register("pkey", 1, 1, 4 * sizeof(long)); + /* 5 arguments per dbf entry (including the format string ptr) */ + debug_info = debug_register("pkey", 1, 1, 5 * sizeof(long)); debug_register_view(debug_info, &debug_sprintf_view); debug_set_level(debug_info, 3); } -- cgit v1.2.3 From 2aae471d66c108b78493be1147e707bca6331e50 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 19 Mar 2019 15:51:56 +0100 Subject: drivers: power: supply: goldfish_battery: Fix bogus SPDX identifier spdxcheck.py complains: drivers/power/supply/goldfish_battery.c: 1:28 Invalid License ID: GPL which is correct because GPL is not a valid identifier. Of course this could have been caught by checkpatch.pl _before_ submitting or merging the patch. WARNING: 'SPDX-License-Identifier: GPL' is not supported in LICENSES/... #19: FILE: drivers/power/supply/goldfish_battery.c:1: +// SPDX-License-Identifier: GPL Which is absolutely hillarious as the commit introducing this wreckage says in the changelog: There was a checkpatch complain: "Missing or malformed SPDX-License-Identifier tag". Oh well. Replacing a checkpatch warning by a different checkpatch warning is a really useful exercise. Use the proper GPL-2.0 identifier which is what the boiler plate in the file had originally. Fixes: e75e3a125b40 ("drivers: power: supply: goldfish_battery: Put an SPDX tag") Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- drivers/power/supply/goldfish_battery.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/supply/goldfish_battery.c b/drivers/power/supply/goldfish_battery.c index ad969d9fc981..c2644a9fe80f 100644 --- a/drivers/power/supply/goldfish_battery.c +++ b/drivers/power/supply/goldfish_battery.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL +// SPDX-License-Identifier: GPL-2.0 /* * Power supply driver for the goldfish emulator * -- cgit v1.2.3 From cfd32acf7875d9dd83f82e1940098e88abeea439 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 12 Apr 2019 19:55:41 -0700 Subject: KVM: x86/mmu: Fix an inverted list_empty() check when zapping sptes A recently introduced helper for handling zap vs. remote flush incorrectly bails early, effectively leaking defunct shadow pages. Manifests as a slab BUG when exiting KVM due to the shadow pages being alive when their associated cache is destroyed. ========================================================================== BUG kvm_mmu_page_header: Objects remaining in kvm_mmu_page_header on ... -------------------------------------------------------------------------- Disabling lock debugging due to kernel taint INFO: Slab 0x00000000fc436387 objects=26 used=23 fp=0x00000000d023caee ... CPU: 6 PID: 4315 Comm: rmmod Tainted: G B 5.1.0-rc2+ #19 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 Call Trace: dump_stack+0x46/0x5b slab_err+0xad/0xd0 ? on_each_cpu_mask+0x3c/0x50 ? ksm_migrate_page+0x60/0x60 ? on_each_cpu_cond_mask+0x7c/0xa0 ? __kmalloc+0x1ca/0x1e0 __kmem_cache_shutdown+0x13a/0x310 shutdown_cache+0xf/0x130 kmem_cache_destroy+0x1d5/0x200 kvm_mmu_module_exit+0xa/0x30 [kvm] kvm_arch_exit+0x45/0x60 [kvm] kvm_exit+0x6f/0x80 [kvm] vmx_exit+0x1a/0x50 [kvm_intel] __x64_sys_delete_module+0x153/0x1f0 ? exit_to_usermode_loop+0x88/0xc0 do_syscall_64+0x4f/0x100 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: a21136345cb6f ("KVM: x86/mmu: Split remote_flush+zap case out of kvm_mmu_flush_or_zap()") Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index eee455a8a612..85f753728953 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2238,7 +2238,7 @@ static bool kvm_mmu_remote_flush_or_zap(struct kvm *kvm, struct list_head *invalid_list, bool remote_flush) { - if (!remote_flush && !list_empty(invalid_list)) + if (!remote_flush && list_empty(invalid_list)) return false; if (!list_empty(invalid_list)) -- cgit v1.2.3 From 39036cd2727395c3369b1051005da74059a85317 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 28 Feb 2019 13:59:19 +0100 Subject: arch: add pidfd and io_uring syscalls everywhere Add the io_uring and pidfd_send_signal system calls to all architectures. These system calls are designed to handle both native and compat tasks, so all entries are the same across architectures, only arm-compat and the generic tale still use an old format. Acked-by: Michael Ellerman (powerpc) Acked-by: Heiko Carstens (s390) Acked-by: Geert Uytterhoeven Signed-off-by: Arnd Bergmann --- arch/alpha/kernel/syscalls/syscall.tbl | 4 ++++ arch/arm/tools/syscall.tbl | 4 ++++ arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 8 ++++++++ arch/ia64/kernel/syscalls/syscall.tbl | 4 ++++ arch/m68k/kernel/syscalls/syscall.tbl | 4 ++++ arch/microblaze/kernel/syscalls/syscall.tbl | 4 ++++ arch/mips/kernel/syscalls/syscall_n32.tbl | 4 ++++ arch/mips/kernel/syscalls/syscall_n64.tbl | 4 ++++ arch/mips/kernel/syscalls/syscall_o32.tbl | 4 ++++ arch/parisc/kernel/syscalls/syscall.tbl | 4 ++++ arch/powerpc/kernel/syscalls/syscall.tbl | 4 ++++ arch/s390/kernel/syscalls/syscall.tbl | 4 ++++ arch/sh/kernel/syscalls/syscall.tbl | 4 ++++ arch/sparc/kernel/syscalls/syscall.tbl | 4 ++++ arch/xtensa/kernel/syscalls/syscall.tbl | 4 ++++ 16 files changed, 65 insertions(+), 1 deletion(-) diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 63ed39cbd3bd..165f268beafc 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -463,3 +463,7 @@ 532 common getppid sys_getppid # all other architectures have common numbers for new syscall, alpha # is the exception. +534 common pidfd_send_signal sys_pidfd_send_signal +535 common io_uring_setup sys_io_uring_setup +536 common io_uring_enter sys_io_uring_enter +537 common io_uring_register sys_io_uring_register diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 9016f4081bb9..0393917eaa57 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -437,3 +437,7 @@ 421 common rt_sigtimedwait_time64 sys_rt_sigtimedwait 422 common futex_time64 sys_futex 423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval +424 common pidfd_send_signal sys_pidfd_send_signal +425 common io_uring_setup sys_io_uring_setup +426 common io_uring_enter sys_io_uring_enter +427 common io_uring_register sys_io_uring_register diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index d1dd93436e1e..f2a83ff6b73c 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -44,7 +44,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 424 +#define __NR_compat_syscalls 428 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 5590f2623690..23f1a44acada 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -866,6 +866,14 @@ __SYSCALL(__NR_rt_sigtimedwait_time64, compat_sys_rt_sigtimedwait_time64) __SYSCALL(__NR_futex_time64, sys_futex) #define __NR_sched_rr_get_interval_time64 423 __SYSCALL(__NR_sched_rr_get_interval_time64, sys_sched_rr_get_interval) +#define __NR_pidfd_send_signal 424 +__SYSCALL(__NR_pidfd_send_signal, sys_pidfd_send_signal) +#define __NR_io_uring_setup 425 +__SYSCALL(__NR_io_uring_setup, sys_io_uring_setup) +#define __NR_io_uring_enter 426 +__SYSCALL(__NR_io_uring_enter, sys_io_uring_enter) +#define __NR_io_uring_register 427 +__SYSCALL(__NR_io_uring_register, sys_io_uring_register) /* * Please add new compat syscalls above this comment and update diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index ab9cda5f6136..56e3d0b685e1 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -344,3 +344,7 @@ 332 common pkey_free sys_pkey_free 333 common rseq sys_rseq # 334 through 423 are reserved to sync up with other architectures +424 common pidfd_send_signal sys_pidfd_send_signal +425 common io_uring_setup sys_io_uring_setup +426 common io_uring_enter sys_io_uring_enter +427 common io_uring_register sys_io_uring_register diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 125c14178979..df4ec3ec71d1 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -423,3 +423,7 @@ 421 common rt_sigtimedwait_time64 sys_rt_sigtimedwait 422 common futex_time64 sys_futex 423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval +424 common pidfd_send_signal sys_pidfd_send_signal +425 common io_uring_setup sys_io_uring_setup +426 common io_uring_enter sys_io_uring_enter +427 common io_uring_register sys_io_uring_register diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index 8ee3a8c18498..4964947732af 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -429,3 +429,7 @@ 421 common rt_sigtimedwait_time64 sys_rt_sigtimedwait 422 common futex_time64 sys_futex 423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval +424 common pidfd_send_signal sys_pidfd_send_signal +425 common io_uring_setup sys_io_uring_setup +426 common io_uring_enter sys_io_uring_enter +427 common io_uring_register sys_io_uring_register diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 15f4117900ee..9392dfe33f97 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -362,3 +362,7 @@ 421 n32 rt_sigtimedwait_time64 compat_sys_rt_sigtimedwait_time64 422 n32 futex_time64 sys_futex 423 n32 sched_rr_get_interval_time64 sys_sched_rr_get_interval +424 n32 pidfd_send_signal sys_pidfd_send_signal +425 n32 io_uring_setup sys_io_uring_setup +426 n32 io_uring_enter sys_io_uring_enter +427 n32 io_uring_register sys_io_uring_register diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index c85502e67b44..cd0c8aa21fba 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -338,3 +338,7 @@ 327 n64 rseq sys_rseq 328 n64 io_pgetevents sys_io_pgetevents # 329 through 423 are reserved to sync up with other architectures +424 n64 pidfd_send_signal sys_pidfd_send_signal +425 n64 io_uring_setup sys_io_uring_setup +426 n64 io_uring_enter sys_io_uring_enter +427 n64 io_uring_register sys_io_uring_register diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 2e063d0f837e..e849e8ffe4a2 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -411,3 +411,7 @@ 421 o32 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64 422 o32 futex_time64 sys_futex sys_futex 423 o32 sched_rr_get_interval_time64 sys_sched_rr_get_interval sys_sched_rr_get_interval +424 o32 pidfd_send_signal sys_pidfd_send_signal +425 o32 io_uring_setup sys_io_uring_setup +426 o32 io_uring_enter sys_io_uring_enter +427 o32 io_uring_register sys_io_uring_register diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index b26766c6647d..fe8ca623add8 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -420,3 +420,7 @@ 421 32 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64 422 32 futex_time64 sys_futex sys_futex 423 32 sched_rr_get_interval_time64 sys_sched_rr_get_interval sys_sched_rr_get_interval +424 common pidfd_send_signal sys_pidfd_send_signal +425 common io_uring_setup sys_io_uring_setup +426 common io_uring_enter sys_io_uring_enter +427 common io_uring_register sys_io_uring_register diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index b18abb0c3dae..00f5a63c8d9a 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -505,3 +505,7 @@ 421 32 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64 422 32 futex_time64 sys_futex sys_futex 423 32 sched_rr_get_interval_time64 sys_sched_rr_get_interval sys_sched_rr_get_interval +424 common pidfd_send_signal sys_pidfd_send_signal +425 common io_uring_setup sys_io_uring_setup +426 common io_uring_enter sys_io_uring_enter +427 common io_uring_register sys_io_uring_register diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 02579f95f391..061418f787c3 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -426,3 +426,7 @@ 421 32 rt_sigtimedwait_time64 - compat_sys_rt_sigtimedwait_time64 422 32 futex_time64 - sys_futex 423 32 sched_rr_get_interval_time64 - sys_sched_rr_get_interval +424 common pidfd_send_signal sys_pidfd_send_signal sys_pidfd_send_signal +425 common io_uring_setup sys_io_uring_setup sys_io_uring_setup +426 common io_uring_enter sys_io_uring_enter sys_io_uring_enter +427 common io_uring_register sys_io_uring_register sys_io_uring_register diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index bfda678576e4..480b057556ee 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -426,3 +426,7 @@ 421 common rt_sigtimedwait_time64 sys_rt_sigtimedwait 422 common futex_time64 sys_futex 423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval +424 common pidfd_send_signal sys_pidfd_send_signal +425 common io_uring_setup sys_io_uring_setup +426 common io_uring_enter sys_io_uring_enter +427 common io_uring_register sys_io_uring_register diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index b9a5a04b2d2c..a1dd24307b00 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -469,3 +469,7 @@ 421 32 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64 422 32 futex_time64 sys_futex sys_futex 423 32 sched_rr_get_interval_time64 sys_sched_rr_get_interval sys_sched_rr_get_interval +424 common pidfd_send_signal sys_pidfd_send_signal +425 common io_uring_setup sys_io_uring_setup +426 common io_uring_enter sys_io_uring_enter +427 common io_uring_register sys_io_uring_register diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 6af49929de85..30084eaf8422 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -394,3 +394,7 @@ 421 common rt_sigtimedwait_time64 sys_rt_sigtimedwait 422 common futex_time64 sys_futex 423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval +424 common pidfd_send_signal sys_pidfd_send_signal +425 common io_uring_setup sys_io_uring_setup +426 common io_uring_enter sys_io_uring_enter +427 common io_uring_register sys_io_uring_register -- cgit v1.2.3 From 5aae7832d1b4ec614996ea0f4fafc4d9855ec0b0 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 26 Mar 2019 16:49:02 +0200 Subject: drm/i915: Do not enable FEC without DSC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we enable FEC even when DSC is no used. While that is theoretically valid supposedly there isn't much of a benefit from this. But more importantly we do not account for the FEC link bandwidth overhead (2.4%) in the non-DSC link bandwidth computations. So the code may think we have enough bandwidth when we in fact do not. Cc: stable@vger.kernel.org Cc: Anusha Srivatsa Cc: Manasi Navare Fixes: 240999cf339f ("i915/dp/fec: Add fec_enable to the crtc state.") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190326144903.6617-1-ville.syrjala@linux.intel.com Reviewed-by: Manasi Navare (cherry picked from commit 6fd3134ae3551d4802a04669c0f39f2f5c56f77d) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_dp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 8891f29a8c7f..48da4a969a0a 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1886,6 +1886,9 @@ static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, int pipe_bpp; int ret; + pipe_config->fec_enable = !intel_dp_is_edp(intel_dp) && + intel_dp_supports_fec(intel_dp, pipe_config); + if (!intel_dp_supports_dsc(intel_dp, pipe_config)) return -EINVAL; @@ -2116,9 +2119,6 @@ intel_dp_compute_config(struct intel_encoder *encoder, if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) return -EINVAL; - pipe_config->fec_enable = !intel_dp_is_edp(intel_dp) && - intel_dp_supports_fec(intel_dp, pipe_config); - ret = intel_dp_compute_link_config(encoder, pipe_config, conn_state); if (ret < 0) return ret; -- cgit v1.2.3 From f5c58ba18ab8ea2169670ed880e4d31ed772ad10 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 11 Apr 2019 19:49:25 +0300 Subject: drm/i915: Restore correct bxt_ddi_phy_calc_lane_lat_optim_mask() calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are no longer calling bxt_ddi_phy_calc_lane_lat_optim_mask() when intel{hdmi,dp}_compute_config() succeeds, and instead only call it when those fail. This is fallout from the bool->int .compute_config() conversion which failed to invert the return value check before calling bxt_ddi_phy_calc_lane_lat_optim_mask(). Let's just replace it with an early bailout so that it's harder to miss. This restores the correct latency optim setting calculation (which could fix some real failures), and avoids the MISSING_CASE() from bxt_ddi_phy_calc_lane_lat_optim_mask() after intel{hdmi,dp}_compute_config() has failed. Cc: Lyude Paul Fixes: 204474a6b859 ("drm/i915: Pass down rc in intel_encoder->compute_config()") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109373 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190411164925.28491-1-ville.syrjala@linux.intel.com Reviewed-by: Lyude Paul (cherry picked from commit 7a412b8f60cd57ab7dcb72ab701fde2bf81752eb) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_ddi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index ab4e60dfd6a3..98cea1f4b3bf 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -3862,14 +3862,16 @@ static int intel_ddi_compute_config(struct intel_encoder *encoder, ret = intel_hdmi_compute_config(encoder, pipe_config, conn_state); else ret = intel_dp_compute_config(encoder, pipe_config, conn_state); + if (ret) + return ret; - if (IS_GEN9_LP(dev_priv) && ret) + if (IS_GEN9_LP(dev_priv)) pipe_config->lane_lat_optim_mask = bxt_ddi_phy_calc_lane_lat_optim_mask(pipe_config->lane_count); intel_ddi_compute_min_voltage_level(dev_priv, pipe_config); - return ret; + return 0; } -- cgit v1.2.3 From b19062a567266ee1f10f6709325f766bbcc07d1c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 15 Apr 2019 10:49:38 -0600 Subject: io_uring: fix possible deadlock between io_uring_{enter,register} If we have multiple threads, one doing io_uring_enter() while the other is doing io_uring_register(), we can run into a deadlock between the two. io_uring_register() must wait for existing users of the io_uring instance to exit. But it does so while holding the io_uring mutex. Callers of io_uring_enter() may need this mutex to make progress (and eventually exit). If we wait for users to exit in io_uring_register(), we can't do so with the io_uring mutex held without potentially risking a deadlock. Drop the io_uring mutex while waiting for existing callers to exit. This is safe and guaranteed to make forward progress, since we already killed the percpu ref before doing so. Hence later callers of io_uring_enter() will be rejected. Reported-by: syzbot+16dc03452dee970a0c3e@syzkaller.appspotmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index f4ddb9d23241..b35300e4c9a7 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2929,11 +2929,23 @@ SYSCALL_DEFINE2(io_uring_setup, u32, entries, static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, void __user *arg, unsigned nr_args) + __releases(ctx->uring_lock) + __acquires(ctx->uring_lock) { int ret; percpu_ref_kill(&ctx->refs); + + /* + * Drop uring mutex before waiting for references to exit. If another + * thread is currently inside io_uring_enter() it might need to grab + * the uring_lock to make progress. If we hold it here across the drain + * wait, then we can deadlock. It's safe to drop the mutex here, since + * no new references will come in after we've killed the percpu ref. + */ + mutex_unlock(&ctx->uring_lock); wait_for_completion(&ctx->ctx_done); + mutex_lock(&ctx->uring_lock); switch (opcode) { case IORING_REGISTER_BUFFERS: -- cgit v1.2.3 From 79b4a9cf0e2ea8203ce777c8d5cfa86c71eae86e Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Tue, 9 Apr 2019 16:53:55 +0200 Subject: MIPS: scall64-o32: Fix indirect syscall number load MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 4c21b8fd8f14 (MIPS: seccomp: Handle indirect system calls (o32)) added indirect syscall detection for O32 processes running on MIPS64, but it did not work correctly for big endian kernel/processes. The reason is that the syscall number is loaded from ARG1 using the lw instruction while this is a 64-bit value, so zero is loaded instead of the syscall number. Fix the code by using the ld instruction instead. When running a 32-bit processes on a 64 bit CPU, the values are properly sign-extended, so it ensures the value passed to syscall_trace_enter is correct. Recent systemd versions with seccomp enabled whitelist the getpid syscall for their internal processes (e.g. systemd-journald), but call it through syscall(SYS_getpid). This fix therefore allows O32 big endian systems with a 64-bit kernel to run recent systemd versions. Signed-off-by: Aurelien Jarno Cc: # v3.15+ Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Paul Burton Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org --- arch/mips/kernel/scall64-o32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index f158c5894a9a..feb2653490df 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -125,7 +125,7 @@ trace_a_syscall: subu t1, v0, __NR_O32_Linux move a1, v0 bnez t1, 1f /* __NR_syscall at offset 0 */ - lw a1, PT_R4(sp) /* Arg1 for __NR_syscall case */ + ld a1, PT_R4(sp) /* Arg1 for __NR_syscall case */ .set pop 1: jal syscall_trace_enter -- cgit v1.2.3 From 8ed633b9baf9ec7d593ebb8e256312ff1c70ab37 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Fri, 12 Apr 2019 16:36:33 -0400 Subject: Revert "net-sysfs: Fix memory leak in netdev_register_kobject" This reverts commit 6b70fc94afd165342876e53fc4b2f7d085009945. The reverted bugfix will cause another issue. Reported by syzbot+6024817a931b2830bc93@syzkaller.appspotmail.com. See https://syzkaller.appspot.com/x/log.txt?x=1737671b200000 for details. Signed-off-by: Wang Hai Acked-by: Andy Shevchenko Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index f8f94303a1f5..8f8b7b6c2945 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1747,20 +1747,16 @@ int netdev_register_kobject(struct net_device *ndev) error = device_add(dev); if (error) - goto error_put_device; + return error; error = register_queue_kobjects(ndev); - if (error) - goto error_device_del; + if (error) { + device_del(dev); + return error; + } pm_runtime_set_memalloc_noio(dev, true); - return 0; - -error_device_del: - device_del(dev); -error_put_device: - put_device(dev); return error; } -- cgit v1.2.3 From 92480b3977fd3884649d404cbbaf839b70035699 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 12 Apr 2019 15:04:10 +0200 Subject: bonding: fix event handling for stacked bonds When a bond is enslaved to another bond, bond_netdev_event() only handles the event as if the bond is a master, and skips treating the bond as a slave. This leads to a refcount leak on the slave, since we don't remove the adjacency to its master and the master holds a reference on the slave. Reproducer: ip link add bondL type bond ip link add bondU type bond ip link set bondL master bondU ip link del bondL No "Fixes:" tag, this code is older than git history. Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index b59708c35faf..ee610721098e 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3213,8 +3213,12 @@ static int bond_netdev_event(struct notifier_block *this, return NOTIFY_DONE; if (event_dev->flags & IFF_MASTER) { + int ret; + netdev_dbg(event_dev, "IFF_MASTER\n"); - return bond_master_netdev_event(event, event_dev); + ret = bond_master_netdev_event(event, event_dev); + if (ret != NOTIFY_DONE) + return ret; } if (event_dev->flags & IFF_SLAVE) { -- cgit v1.2.3 From 789445b960c16baf626c050f762126189b45b82d Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Sat, 13 Apr 2019 09:52:15 +0200 Subject: MAINTAINERS: normalize Woojung Huh's email address MAINTAINERS contains a lower-case and upper-case variant of Woojung Huh' s email address. Only keep the lower-case variant in MAINTAINERS. Signed-off-by: Lukas Bulwahn Acked-by: Woojung Huh Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 27b0de13506c..601679a213f6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10145,7 +10145,7 @@ F: drivers/spi/spi-at91-usart.c F: Documentation/devicetree/bindings/mfd/atmel-usart.txt MICROCHIP KSZ SERIES ETHERNET SWITCH DRIVER -M: Woojung Huh +M: Woojung Huh M: Microchip Linux Driver Support L: netdev@vger.kernel.org S: Maintained -- cgit v1.2.3 From 9c69a13205151c0d801de9f9d83a818e6e8f60ec Mon Sep 17 00:00:00 2001 From: Jonathan Lemon Date: Sun, 14 Apr 2019 14:21:29 -0700 Subject: route: Avoid crash from dereferencing NULL rt->from When __ip6_rt_update_pmtu() is called, rt->from is RCU dereferenced, but is never checked for null - rt6_flush_exceptions() may have removed the entry. [ 1913.989004] RIP: 0010:ip6_rt_cache_alloc+0x13/0x170 [ 1914.209410] Call Trace: [ 1914.214798] [ 1914.219226] __ip6_rt_update_pmtu+0xb0/0x190 [ 1914.228649] ip6_tnl_xmit+0x2c2/0x970 [ip6_tunnel] [ 1914.239223] ? ip6_tnl_parse_tlv_enc_lim+0x32/0x1a0 [ip6_tunnel] [ 1914.252489] ? __gre6_xmit+0x148/0x530 [ip6_gre] [ 1914.262678] ip6gre_tunnel_xmit+0x17e/0x3c7 [ip6_gre] [ 1914.273831] dev_hard_start_xmit+0x8d/0x1f0 [ 1914.283061] sch_direct_xmit+0xfa/0x230 [ 1914.291521] __qdisc_run+0x154/0x4b0 [ 1914.299407] net_tx_action+0x10e/0x1f0 [ 1914.307678] __do_softirq+0xca/0x297 [ 1914.315567] irq_exit+0x96/0xa0 [ 1914.322494] smp_apic_timer_interrupt+0x68/0x130 [ 1914.332683] apic_timer_interrupt+0xf/0x20 [ 1914.341721] Fixes: a68886a69180 ("net/ipv6: Make from in rt6_info rcu protected") Signed-off-by: Jonathan Lemon Reviewed-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Martin KaFai Lau Signed-off-by: David S. Miller --- net/ipv6/route.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0302e0eb07af..7178e32eb15d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2330,6 +2330,10 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, rcu_read_lock(); from = rcu_dereference(rt6->from); + if (!from) { + rcu_read_unlock(); + return; + } nrt6 = ip6_rt_cache_alloc(from, daddr, saddr); if (nrt6) { rt6_do_update_pmtu(nrt6, mtu); -- cgit v1.2.3 From 614c70f35cd77a9af8e2ca841dcdb121cec3068f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 15 Apr 2019 16:47:03 +0100 Subject: bnx2x: fix spelling mistake "dicline" -> "decline" There is a spelling mistake in a BNX2X_ERR message, fix it. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c index a9bdc21873d3..10ff37d6dc78 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c @@ -957,7 +957,7 @@ int bnx2x_vfpf_update_vlan(struct bnx2x *bp, u16 vid, u8 vf_qid, bool add) bnx2x_sample_bulletin(bp); if (bp->shadow_bulletin.content.valid_bitmap & 1 << VLAN_VALID) { - BNX2X_ERR("Hypervisor will dicline the request, avoiding\n"); + BNX2X_ERR("Hypervisor will decline the request, avoiding\n"); rc = -EINVAL; goto out; } -- cgit v1.2.3 From be549d49115422f846b6d96ee8fd7173a5f7ceb0 Mon Sep 17 00:00:00 2001 From: Jaesoo Lee Date: Tue, 9 Apr 2019 17:02:22 -0700 Subject: scsi: core: set result when the command cannot be dispatched When SCSI blk-mq is enabled, there is a bug in handling errors in scsi_queue_rq. Specifically, the bug is not setting result field of scsi_request correctly when the dispatch of the command has been failed. Since the upper layer code including the sg_io ioctl expects to receive any error status from result field of scsi_request, the error is silently ignored and this could cause data corruptions for some applications. Fixes: d285203cf647 ("scsi: add support for a blk-mq based I/O path.") Cc: Signed-off-by: Jaesoo Lee Reviewed-by: Hannes Reinecke Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 601b9f1de267..07dfc17d4824 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1706,8 +1706,12 @@ out_put_budget: ret = BLK_STS_DEV_RESOURCE; break; default: + if (unlikely(!scsi_device_online(sdev))) + scsi_req(req)->result = DID_NO_CONNECT << 16; + else + scsi_req(req)->result = DID_ERROR << 16; /* - * Make sure to release all allocated ressources when + * Make sure to release all allocated resources when * we hit an error, as we will never see this command * again. */ -- cgit v1.2.3 From 6a03469a1edc94da52b65478f1e00837add869a3 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 15 Apr 2019 09:49:56 -0700 Subject: x86/build/lto: Fix truncated .bss with -fdata-sections With CONFIG_LD_DEAD_CODE_DATA_ELIMINATION=y, we compile the kernel with -fdata-sections, which also splits the .bss section. The new section, with a new .bss.* name, which pattern gets missed by the main x86 linker script which only expects the '.bss' name. This results in the discarding of the second part and a too small, truncated .bss section and an unhappy, non-working kernel. Use the common BSS_MAIN macro in the linker script to properly capture and merge all the generated BSS sections. Signed-off-by: Sami Tolvanen Reviewed-by: Nick Desaulniers Reviewed-by: Kees Cook Cc: Borislav Petkov Cc: Kees Cook Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20190415164956.124067-1-samitolvanen@google.com [ Extended the changelog. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/vmlinux.lds.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index bad8c51fee6e..a5127b2c195f 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -362,7 +362,7 @@ SECTIONS .bss : AT(ADDR(.bss) - LOAD_OFFSET) { __bss_start = .; *(.bss..page_aligned) - *(.bss) + *(BSS_MAIN) BSS_DECRYPTED . = ALIGN(PAGE_SIZE); __bss_stop = .; -- cgit v1.2.3 From 8b39adbee805c539a461dbf208b125b096152b1c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 15 Apr 2019 10:05:38 -0700 Subject: locking/lockdep: Make lockdep_unregister_key() honor 'debug_locks' again If lockdep_register_key() and lockdep_unregister_key() are called with debug_locks == false then the following warning is reported: WARNING: CPU: 2 PID: 15145 at kernel/locking/lockdep.c:4920 lockdep_unregister_key+0x1ad/0x240 That warning is reported because lockdep_unregister_key() ignores the value of 'debug_locks' and because the behavior of lockdep_register_key() depends on whether or not 'debug_locks' is set. Fix this inconsistency by making lockdep_unregister_key() take 'debug_locks' again into account. Signed-off-by: Bart Van Assche Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Waiman Long Cc: Will Deacon Cc: shenghui Fixes: 90c1cba2b3b3 ("locking/lockdep: Zap lock classes even with lock debugging disabled") Link: http://lkml.kernel.org/r/20190415170538.23491-1-bvanassche@acm.org Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index e16766ff184b..e221be724fe8 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -4907,8 +4907,9 @@ void lockdep_unregister_key(struct lock_class_key *key) return; raw_local_irq_save(flags); - arch_spin_lock(&lockdep_lock); - current->lockdep_recursion = 1; + if (!graph_lock()) + goto out_irq; + pf = get_pending_free(); hlist_for_each_entry_rcu(k, hash_head, hash_entry) { if (k == key) { @@ -4920,8 +4921,8 @@ void lockdep_unregister_key(struct lock_class_key *key) WARN_ON_ONCE(!found); __lockdep_free_key_range(pf, key, 1); call_rcu_zapped(pf); - current->lockdep_recursion = 0; - arch_spin_unlock(&lockdep_lock); + graph_unlock(); +out_irq: raw_local_irq_restore(flags); /* Wait until is_dynamic_key() has finished accessing k->hash_entry. */ -- cgit v1.2.3 From 5f843ed415581cfad4ef8fefe31c138a8346ca8a Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 15 Apr 2019 15:01:25 +0900 Subject: kprobes: Fix error check when reusing optimized probes The following commit introduced a bug in one of our error paths: 819319fc9346 ("kprobes: Return error if we fail to reuse kprobe instead of BUG_ON()") it missed to handle the return value of kprobe_optready() as error-value. In reality, the kprobe_optready() returns a bool result, so "true" case must be passed instead of 0. This causes some errors on kprobe boot-time selftests on ARM: [ ] Beginning kprobe tests... [ ] Probe ARM code [ ] kprobe [ ] kretprobe [ ] ARM instruction simulation [ ] Check decoding tables [ ] Run test cases [ ] FAIL: test_case_handler not run [ ] FAIL: Test andge r10, r11, r14, asr r7 [ ] FAIL: Scenario 11 ... [ ] FAIL: Scenario 7 [ ] Total instruction simulation tests=1631, pass=1433 fail=198 [ ] kprobe tests failed This can happen if an optimized probe is unregistered and next kprobe is registered on same address until the previous probe is not reclaimed. If this happens, a hidden aggregated probe may be kept in memory, and no new kprobe can probe same address. Also, in that case register_kprobe() will return "1" instead of minus error value, which can mislead caller logic. Signed-off-by: Masami Hiramatsu Cc: Anil S Keshavamurthy Cc: David S . Miller Cc: Linus Torvalds Cc: Naveen N . Rao Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org # v5.0+ Fixes: 819319fc9346 ("kprobes: Return error if we fail to reuse kprobe instead of BUG_ON()") Link: http://lkml.kernel.org/r/155530808559.32517.539898325433642204.stgit@devnote2 Signed-off-by: Ingo Molnar --- kernel/kprobes.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index c83e54727131..b1ea30a5540e 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -709,7 +709,6 @@ static void unoptimize_kprobe(struct kprobe *p, bool force) static int reuse_unused_kprobe(struct kprobe *ap) { struct optimized_kprobe *op; - int ret; /* * Unused kprobe MUST be on the way of delayed unoptimizing (means @@ -720,9 +719,8 @@ static int reuse_unused_kprobe(struct kprobe *ap) /* Enable the probe again */ ap->flags &= ~KPROBE_FLAG_DISABLED; /* Optimize it again (remove from op->list) */ - ret = kprobe_optready(ap); - if (ret) - return ret; + if (!kprobe_optready(ap)) + return -EINVAL; optimize_kprobe(ap); return 0; -- cgit v1.2.3 From 510bb96fe5b3480b4b22d815786377e54cb701e7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 15 Apr 2019 10:46:07 +0200 Subject: x86/mm: Prevent bogus warnings with "noexec=off" Xose Vazquez Perez reported boot warnings when NX is disabled on the kernel command line. __early_set_fixmap() triggers this warning: attempted to set unsupported pgprot: 8000000000000163 bits: 8000000000000000 supported: 7fffffffffffffff WARNING: CPU: 0 PID: 0 at arch/x86/include/asm/pgtable.h:537 __early_set_fixmap+0xa2/0xff because it uses __default_kernel_pte_mask to mask out unsupported bits. Use __supported_pte_mask instead. Disabling NX on the command line also triggers the NX warning in the page table mapping check: WARNING: CPU: 1 PID: 1 at arch/x86/mm/dump_pagetables.c:262 note_page+0x2ae/0x650 .... Make the warning depend on NX set in __supported_pte_mask. Reported-by: Xose Vazquez Perez Tested-by: Xose Vazquez Perez Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Link: http://lkml.kernel.org/r/alpine.DEB.2.21.1904151037530.1729@nanos.tec.linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/mm/dump_pagetables.c | 3 ++- arch/x86/mm/ioremap.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index ee8f8ab46941..c0309ea9abee 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -259,7 +259,8 @@ static void note_wx(struct pg_state *st) #endif /* Account the WX pages */ st->wx_pages += npages; - WARN_ONCE(1, "x86/mm: Found insecure W+X mapping at address %pS\n", + WARN_ONCE(__supported_pte_mask & _PAGE_NX, + "x86/mm: Found insecure W+X mapping at address %pS\n", (void *)st->start_address); } diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 0029604af8a4..dd73d5d74393 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -825,7 +825,7 @@ void __init __early_set_fixmap(enum fixed_addresses idx, pte = early_ioremap_pte(addr); /* Sanitize 'prot' against any unsupported bits: */ - pgprot_val(flags) &= __default_kernel_pte_mask; + pgprot_val(flags) &= __supported_pte_mask; if (pgprot_val(flags)) set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); -- cgit v1.2.3 From 0082517fa4bce073e7cf542633439f26538a14cc Mon Sep 17 00:00:00 2001 From: Jian-Hong Pan Date: Fri, 12 Apr 2019 16:01:53 +0800 Subject: x86/reboot, efi: Use EFI reboot for Acer TravelMate X514-51T Upon reboot, the Acer TravelMate X514-51T laptop appears to complete the shutdown process, but then it hangs in BIOS POST with a black screen. The problem is intermittent - at some points it has appeared related to Secure Boot settings or different kernel builds, but ultimately we have not been able to identify the exact conditions that trigger the issue to come and go. Besides, the EFI mode cannot be disabled in the BIOS of this model. However, after extensive testing, we observe that using the EFI reboot method reliably avoids the issue in all cases. So add a boot time quirk to use EFI reboot on such systems. Buglink: https://bugzilla.kernel.org/show_bug.cgi?id=203119 Signed-off-by: Jian-Hong Pan Signed-off-by: Daniel Drake Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Cc: linux@endlessm.com Link: http://lkml.kernel.org/r/20190412080152.3718-1-jian-hong@endlessm.com [ Fix !CONFIG_EFI build failure, clarify the code and the changelog a bit. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 21 +++++++++++++++++++++ include/linux/efi.h | 7 ++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 725624b6c0c0..8fd3cedd9acc 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -81,6 +81,19 @@ static int __init set_bios_reboot(const struct dmi_system_id *d) return 0; } +/* + * Some machines don't handle the default ACPI reboot method and + * require the EFI reboot method: + */ +static int __init set_efi_reboot(const struct dmi_system_id *d) +{ + if (reboot_type != BOOT_EFI && !efi_runtime_disabled()) { + reboot_type = BOOT_EFI; + pr_info("%s series board detected. Selecting EFI-method for reboot.\n", d->ident); + } + return 0; +} + void __noreturn machine_real_restart(unsigned int type) { local_irq_disable(); @@ -166,6 +179,14 @@ static const struct dmi_system_id reboot_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "AOA110"), }, }, + { /* Handle reboot issue on Acer TravelMate X514-51T */ + .callback = set_efi_reboot, + .ident = "Acer TravelMate X514-51T", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate X514-51T"), + }, + }, /* Apple */ { /* Handle problems with rebooting on Apple MacBook5 */ diff --git a/include/linux/efi.h b/include/linux/efi.h index 54357a258b35..6ebc2098cfe1 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1611,7 +1611,12 @@ efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg, struct screen_info *si, efi_guid_t *proto, unsigned long size); -bool efi_runtime_disabled(void); +#ifdef CONFIG_EFI +extern bool efi_runtime_disabled(void); +#else +static inline bool efi_runtime_disabled(void) { return true; } +#endif + extern void efi_call_virt_check_flags(unsigned long flags, const char *call); extern unsigned long efi_call_virt_save_flags(void); -- cgit v1.2.3 From 780e0106d468a2962b16b52fdf42898f2639e0a0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 16 Apr 2019 10:03:35 +0200 Subject: x86/mm/tlb: Revert "x86/mm: Align TLB invalidation info" Revert the following commit: 515ab7c41306: ("x86/mm: Align TLB invalidation info") I found out (the hard way) that under some .config options (notably L1_CACHE_SHIFT=7) and compiler combinations this on-stack alignment leads to a 320 byte stack usage, which then triggers a KASAN stack warning elsewhere. Using 320 bytes of stack space for a 40 byte structure is ludicrous and clearly not right. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Linus Torvalds Acked-by: Nadav Amit Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 515ab7c41306 ("x86/mm: Align TLB invalidation info") Link: http://lkml.kernel.org/r/20190416080335.GM7905@worktop.programming.kicks-ass.net [ Minor changelog edits. ] Signed-off-by: Ingo Molnar --- arch/x86/mm/tlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index bc4bc7b2f075..487b8474c01c 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -728,7 +728,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, { int cpu; - struct flush_tlb_info info __aligned(SMP_CACHE_BYTES) = { + struct flush_tlb_info info = { .mm = mm, .stride_shift = stride_shift, .freed_tables = freed_tables, -- cgit v1.2.3 From 690908104e39d37947f89d76388c876ce4ec5fda Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 15 Apr 2019 15:16:17 +0200 Subject: KVM: nVMX: allow tests to use bad virtual-APIC page address As mentioned in the comment, there are some special cases where we can simply clear the TPR shadow bit from the CPU-based execution controls in the vmcs02. Handle them so that we can remove some XFAILs from vmx.flat. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 25 ++++++++++++++++--------- arch/x86/kvm/vmx/vmx.c | 2 +- arch/x86/kvm/vmx/vmx.h | 2 ++ 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 7ec9bb1dd723..a22af5a85540 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2873,20 +2873,27 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) /* * If translation failed, VM entry will fail because * prepare_vmcs02 set VIRTUAL_APIC_PAGE_ADDR to -1ull. - * Failing the vm entry is _not_ what the processor - * does but it's basically the only possibility we - * have. We could still enter the guest if CR8 load - * exits are enabled, CR8 store exits are enabled, and - * virtualize APIC access is disabled; in this case - * the processor would never use the TPR shadow and we - * could simply clear the bit from the execution - * control. But such a configuration is useless, so - * let's keep the code simple. */ if (!is_error_page(page)) { vmx->nested.virtual_apic_page = page; hpa = page_to_phys(vmx->nested.virtual_apic_page); vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, hpa); + } else if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING) && + nested_cpu_has(vmcs12, CPU_BASED_CR8_STORE_EXITING) && + !nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { + /* + * The processor will never use the TPR shadow, simply + * clear the bit from the execution control. Such a + * configuration is useless, but it happens in tests. + * For any other configuration, failing the vm entry is + * _not_ what the processor does but it's basically the + * only possibility we have. + */ + vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, + CPU_BASED_TPR_SHADOW); + } else { + printk("bad virtual-APIC page address\n"); + dump_vmcs(); } } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index ab432a930ae8..7a8f75fc6b7e 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5603,7 +5603,7 @@ static void vmx_dump_dtsel(char *name, uint32_t limit) vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT)); } -static void dump_vmcs(void) +void dump_vmcs(void) { u32 vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS); u32 vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS); diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index a1e00d0a2482..f879529906b4 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -517,4 +517,6 @@ static inline void decache_tsc_multiplier(struct vcpu_vmx *vmx) vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio); } +void dump_vmcs(void); + #endif /* __KVM_X86_VMX_H */ -- cgit v1.2.3 From 7c39f7f671d2acc0a1f39ebbbee4303ad499bbfa Mon Sep 17 00:00:00 2001 From: Josh Collier Date: Mon, 15 Apr 2019 11:34:22 -0700 Subject: IB/rdmavt: Fix frwr memory registration Current implementation was not properly handling frwr memory registrations. This was uncovered by commit 27f26cec761das ("xprtrdma: Plant XID in on-the-wire RDMA offset (FRWR)") in which xprtrdma, which is used for NFS over RDMA, started failing as it was the first ULP to modify the ib_mr iova resulting in the NFS server getting REMOTE ACCESS ERROR when attempting to perform RDMA Writes to the client. The fix is to properly capture the true iova, offset, and length in the call to ib_map_mr_sg, and then update the iova when processing the IB_WR_REG_MEM on the send queue. Fixes: a41081aa5936 ("IB/rdmavt: Add support for ib_map_mr_sg") Cc: stable@vger.kernel.org Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Reviewed-by: Michael J. Ruhl Signed-off-by: Josh Collier Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rdmavt/mr.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 728795043496..0bb6e39dd03a 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -608,11 +608,6 @@ static int rvt_set_page(struct ib_mr *ibmr, u64 addr) if (unlikely(mapped_segs == mr->mr.max_segs)) return -ENOMEM; - if (mr->mr.length == 0) { - mr->mr.user_base = addr; - mr->mr.iova = addr; - } - m = mapped_segs / RVT_SEGSZ; n = mapped_segs % RVT_SEGSZ; mr->mr.map[m]->segs[n].vaddr = (void *)addr; @@ -630,17 +625,24 @@ static int rvt_set_page(struct ib_mr *ibmr, u64 addr) * @sg_nents: number of entries in sg * @sg_offset: offset in bytes into sg * + * Overwrite rvt_mr length with mr length calculated by ib_sg_to_pages. + * * Return: number of sg elements mapped to the memory region */ int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset) { struct rvt_mr *mr = to_imr(ibmr); + int ret; mr->mr.length = 0; mr->mr.page_shift = PAGE_SHIFT; - return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, - rvt_set_page); + ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rvt_set_page); + mr->mr.user_base = ibmr->iova; + mr->mr.iova = ibmr->iova; + mr->mr.offset = ibmr->iova - (u64)mr->mr.map[0]->segs[0].vaddr; + mr->mr.length = (size_t)ibmr->length; + return ret; } /** @@ -671,6 +673,7 @@ int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key, ibmr->rkey = key; mr->mr.lkey = key; mr->mr.access_flags = access; + mr->mr.iova = ibmr->iova; atomic_set(&mr->mr.lkey_invalid, 0); return 0; -- cgit v1.2.3 From 52a44f83fc2d64a5e74d5d685fad2fecc7b7a321 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Fri, 29 Mar 2019 11:12:12 +0200 Subject: perf/core: Fix the address filtering fix The following recent commit: c60f83b813e5 ("perf, pt, coresight: Fix address filters for vmas with non-zero offset") changes the address filtering logic to communicate filter ranges to the PMU driver via a single address range object, instead of having the driver do the final bit of math. That change forgets to take into account kernel filters, which are not calculated the same way as DSO based filters. Fix that by passing the kernel filters the same way as file-based filters. This doesn't require any additional changes in the drivers. Reported-by: Adrian Hunter Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Fixes: c60f83b813e5 ("perf, pt, coresight: Fix address filters for vmas with non-zero offset") Link: https://lkml.kernel.org/r/20190329091212.29870-1-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 534e01e7bc36..dc7dead2d2cc 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9077,26 +9077,29 @@ static void perf_event_addr_filters_apply(struct perf_event *event) if (task == TASK_TOMBSTONE) return; - if (!ifh->nr_file_filters) - return; - - mm = get_task_mm(event->ctx->task); - if (!mm) - goto restart; + if (ifh->nr_file_filters) { + mm = get_task_mm(event->ctx->task); + if (!mm) + goto restart; - down_read(&mm->mmap_sem); + down_read(&mm->mmap_sem); + } raw_spin_lock_irqsave(&ifh->lock, flags); list_for_each_entry(filter, &ifh->list, entry) { - event->addr_filter_ranges[count].start = 0; - event->addr_filter_ranges[count].size = 0; + if (filter->path.dentry) { + /* + * Adjust base offset if the filter is associated to a + * binary that needs to be mapped: + */ + event->addr_filter_ranges[count].start = 0; + event->addr_filter_ranges[count].size = 0; - /* - * Adjust base offset if the filter is associated to a binary - * that needs to be mapped: - */ - if (filter->path.dentry) perf_addr_filter_apply(filter, mm, &event->addr_filter_ranges[count]); + } else { + event->addr_filter_ranges[count].start = filter->offset; + event->addr_filter_ranges[count].size = filter->size; + } count++; } @@ -9104,9 +9107,11 @@ static void perf_event_addr_filters_apply(struct perf_event *event) event->addr_filters_gen++; raw_spin_unlock_irqrestore(&ifh->lock, flags); - up_read(&mm->mmap_sem); + if (ifh->nr_file_filters) { + up_read(&mm->mmap_sem); - mmput(mm); + mmput(mm); + } restart: perf_event_stop(event, 1); -- cgit v1.2.3 From 339bc4183596e1f68c2c98a03b87aa124107c317 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Fri, 29 Mar 2019 11:13:38 +0200 Subject: perf/ring_buffer: Fix AUX record suppression The following commit: 1627314fb54a33e ("perf: Suppress AUX/OVERWRITE records") has an unintended side-effect of also suppressing all AUX records with no flags and non-zero size, so all the regular records in the full trace mode. This breaks some use cases for people. Fix this by restoring "regular" AUX records. Reported-by: Ben Gainey Tested-by: Ben Gainey Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Fixes: 1627314fb54a33e ("perf: Suppress AUX/OVERWRITE records") Link: https://lkml.kernel.org/r/20190329091338.29999-1-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- kernel/events/ring_buffer.c | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 2545ac08cc77..5eedb49a65ea 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -455,24 +455,21 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) rb->aux_head += size; } - if (size || handle->aux_flags) { - /* - * Only send RECORD_AUX if we have something useful to communicate - * - * Note: the OVERWRITE records by themselves are not considered - * useful, as they don't communicate any *new* information, - * aside from the short-lived offset, that becomes history at - * the next event sched-in and therefore isn't useful. - * The userspace that needs to copy out AUX data in overwrite - * mode should know to use user_page::aux_head for the actual - * offset. So, from now on we don't output AUX records that - * have *only* OVERWRITE flag set. - */ - - if (handle->aux_flags & ~(u64)PERF_AUX_FLAG_OVERWRITE) - perf_event_aux_event(handle->event, aux_head, size, - handle->aux_flags); - } + /* + * Only send RECORD_AUX if we have something useful to communicate + * + * Note: the OVERWRITE records by themselves are not considered + * useful, as they don't communicate any *new* information, + * aside from the short-lived offset, that becomes history at + * the next event sched-in and therefore isn't useful. + * The userspace that needs to copy out AUX data in overwrite + * mode should know to use user_page::aux_head for the actual + * offset. So, from now on we don't output AUX records that + * have *only* OVERWRITE flag set. + */ + if (size || (handle->aux_flags & ~(u64)PERF_AUX_FLAG_OVERWRITE)) + perf_event_aux_event(handle->event, aux_head, size, + handle->aux_flags); rb->user_page->aux_head = rb->aux_head; if (rb_need_aux_wakeup(rb)) -- cgit v1.2.3 From 9d5dcc93a6ddfc78124f006ccd3637ce070ef2fc Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 2 Apr 2019 12:44:58 -0700 Subject: perf/x86: Fix incorrect PEBS_REGS PEBS_REGS used as mask for the supported registers for large PEBS. However, the mask cannot filter the sample_regs_user/sample_regs_intr correctly. (1ULL << PERF_REG_X86_*) should be used to replace PERF_REG_X86_*, which is only the index. Rename PEBS_REGS to PEBS_GP_REGS, because the mask is only for general purpose registers. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: acme@kernel.org Cc: jolsa@kernel.org Fixes: 2fe1bc1f501d ("perf/x86: Enable free running PEBS for REGS_USER/INTR") Link: https://lkml.kernel.org/r/20190402194509.2832-2-kan.liang@linux.intel.com [ Renamed it to PEBS_GP_REGS - as 'GPRS' is used elsewhere ;-) ] Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 2 +- arch/x86/events/perf_event.h | 38 +++++++++++++++++++------------------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index f61dcbef20ff..f9451566cd9b 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3131,7 +3131,7 @@ static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event) flags &= ~PERF_SAMPLE_TIME; if (!event->attr.exclude_kernel) flags &= ~PERF_SAMPLE_REGS_USER; - if (event->attr.sample_regs_user & ~PEBS_REGS) + if (event->attr.sample_regs_user & ~PEBS_GP_REGS) flags &= ~(PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR); return flags; } diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index a75955741c50..1e98a42b560a 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -96,25 +96,25 @@ struct amd_nb { PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER | \ PERF_SAMPLE_PERIOD) -#define PEBS_REGS \ - (PERF_REG_X86_AX | \ - PERF_REG_X86_BX | \ - PERF_REG_X86_CX | \ - PERF_REG_X86_DX | \ - PERF_REG_X86_DI | \ - PERF_REG_X86_SI | \ - PERF_REG_X86_SP | \ - PERF_REG_X86_BP | \ - PERF_REG_X86_IP | \ - PERF_REG_X86_FLAGS | \ - PERF_REG_X86_R8 | \ - PERF_REG_X86_R9 | \ - PERF_REG_X86_R10 | \ - PERF_REG_X86_R11 | \ - PERF_REG_X86_R12 | \ - PERF_REG_X86_R13 | \ - PERF_REG_X86_R14 | \ - PERF_REG_X86_R15) +#define PEBS_GP_REGS \ + ((1ULL << PERF_REG_X86_AX) | \ + (1ULL << PERF_REG_X86_BX) | \ + (1ULL << PERF_REG_X86_CX) | \ + (1ULL << PERF_REG_X86_DX) | \ + (1ULL << PERF_REG_X86_DI) | \ + (1ULL << PERF_REG_X86_SI) | \ + (1ULL << PERF_REG_X86_SP) | \ + (1ULL << PERF_REG_X86_BP) | \ + (1ULL << PERF_REG_X86_IP) | \ + (1ULL << PERF_REG_X86_FLAGS) | \ + (1ULL << PERF_REG_X86_R8) | \ + (1ULL << PERF_REG_X86_R9) | \ + (1ULL << PERF_REG_X86_R10) | \ + (1ULL << PERF_REG_X86_R11) | \ + (1ULL << PERF_REG_X86_R12) | \ + (1ULL << PERF_REG_X86_R13) | \ + (1ULL << PERF_REG_X86_R14) | \ + (1ULL << PERF_REG_X86_R15)) /* * Per register state. -- cgit v1.2.3 From c01c348ecdc66085e44912c97368809612231520 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 15 Apr 2019 11:51:38 -0400 Subject: USB: core: Fix unterminated string returned by usb_string() Some drivers (such as the vub300 MMC driver) expect usb_string() to return a properly NUL-terminated string, even when an error occurs. (In fact, vub300's probe routine doesn't bother to check the return code from usb_string().) When the driver goes on to use an unterminated string, it leads to kernel errors such as stack-out-of-bounds, as found by the syzkaller USB fuzzer. An out-of-range string index argument is not at all unlikely, given that some devices don't provide string descriptors and therefore list 0 as the value for their string indexes. This patch makes usb_string() return a properly terminated empty string along with the -EINVAL error code when an out-of-range index is encountered. And since a USB string index is a single-byte value, indexes >= 256 are just as invalid as values of 0 or below. Signed-off-by: Alan Stern Reported-by: syzbot+b75b85111c10b8d680f1@syzkaller.appspotmail.com CC: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/message.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index 82239f27c4cc..e844bb7b5676 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -820,9 +820,11 @@ int usb_string(struct usb_device *dev, int index, char *buf, size_t size) if (dev->state == USB_STATE_SUSPENDED) return -EHOSTUNREACH; - if (size <= 0 || !buf || !index) + if (size <= 0 || !buf) return -EINVAL; buf[0] = 0; + if (index <= 0 || index >= 256) + return -EINVAL; tbuf = kmalloc(256, GFP_NOIO); if (!tbuf) return -ENOMEM; -- cgit v1.2.3 From 08b7c2f9208f0e2a32159e4e7a4831b7adb10a3e Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 15 Apr 2019 12:10:14 +0100 Subject: staging: comedi: vmk80xx: Fix use of uninitialized semaphore If `vmk80xx_auto_attach()` returns an error, the core comedi module code will call `vmk80xx_detach()` to clean up. If `vmk80xx_auto_attach()` successfully allocated the comedi device private data, `vmk80xx_detach()` assumes that a `struct semaphore limit_sem` contained in the private data has been initialized and uses it. Unfortunately, there are a couple of places where `vmk80xx_auto_attach()` can return an error after allocating the device private data but before initializing the semaphore, so this assumption is invalid. Fix it by initializing the semaphore just after allocating the private data in `vmk80xx_auto_attach()` before any other errors can be returned. I believe this was the cause of the following syzbot crash report : usb 1-1: config 0 has no interface number 0 usb 1-1: New USB device found, idVendor=10cf, idProduct=8068, bcdDevice=e6.8d usb 1-1: New USB device strings: Mfr=0, Product=0, SerialNumber=0 usb 1-1: config 0 descriptor?? vmk80xx 1-1:0.117: driver 'vmk80xx' failed to auto-configure device. INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. CPU: 0 PID: 12 Comm: kworker/0:1 Not tainted 5.1.0-rc4-319354-g9a33b36 #3 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: usb_hub_wq hub_event Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xe8/0x16e lib/dump_stack.c:113 assign_lock_key kernel/locking/lockdep.c:786 [inline] register_lock_class+0x11b8/0x1250 kernel/locking/lockdep.c:1095 __lock_acquire+0xfb/0x37c0 kernel/locking/lockdep.c:3582 lock_acquire+0x10d/0x2f0 kernel/locking/lockdep.c:4211 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline] _raw_spin_lock_irqsave+0x44/0x60 kernel/locking/spinlock.c:152 down+0x12/0x80 kernel/locking/semaphore.c:58 vmk80xx_detach+0x59/0x100 drivers/staging/comedi/drivers/vmk80xx.c:829 comedi_device_detach+0xed/0x800 drivers/staging/comedi/drivers.c:204 comedi_device_cleanup.part.0+0x68/0x140 drivers/staging/comedi/comedi_fops.c:156 comedi_device_cleanup drivers/staging/comedi/comedi_fops.c:187 [inline] comedi_free_board_dev.part.0+0x16/0x90 drivers/staging/comedi/comedi_fops.c:190 comedi_free_board_dev drivers/staging/comedi/comedi_fops.c:189 [inline] comedi_release_hardware_device+0x111/0x140 drivers/staging/comedi/comedi_fops.c:2880 comedi_auto_config.cold+0x124/0x1b0 drivers/staging/comedi/drivers.c:1068 usb_probe_interface+0x31d/0x820 drivers/usb/core/driver.c:361 really_probe+0x2da/0xb10 drivers/base/dd.c:509 driver_probe_device+0x21d/0x350 drivers/base/dd.c:671 __device_attach_driver+0x1d8/0x290 drivers/base/dd.c:778 bus_for_each_drv+0x163/0x1e0 drivers/base/bus.c:454 __device_attach+0x223/0x3a0 drivers/base/dd.c:844 bus_probe_device+0x1f1/0x2a0 drivers/base/bus.c:514 device_add+0xad2/0x16e0 drivers/base/core.c:2106 usb_set_configuration+0xdf7/0x1740 drivers/usb/core/message.c:2021 generic_probe+0xa2/0xda drivers/usb/core/generic.c:210 usb_probe_device+0xc0/0x150 drivers/usb/core/driver.c:266 really_probe+0x2da/0xb10 drivers/base/dd.c:509 driver_probe_device+0x21d/0x350 drivers/base/dd.c:671 __device_attach_driver+0x1d8/0x290 drivers/base/dd.c:778 bus_for_each_drv+0x163/0x1e0 drivers/base/bus.c:454 __device_attach+0x223/0x3a0 drivers/base/dd.c:844 bus_probe_device+0x1f1/0x2a0 drivers/base/bus.c:514 device_add+0xad2/0x16e0 drivers/base/core.c:2106 usb_new_device.cold+0x537/0xccf drivers/usb/core/hub.c:2534 hub_port_connect drivers/usb/core/hub.c:5089 [inline] hub_port_connect_change drivers/usb/core/hub.c:5204 [inline] port_event drivers/usb/core/hub.c:5350 [inline] hub_event+0x138e/0x3b00 drivers/usb/core/hub.c:5432 process_one_work+0x90f/0x1580 kernel/workqueue.c:2269 worker_thread+0x9b/0xe20 kernel/workqueue.c:2415 kthread+0x313/0x420 kernel/kthread.c:253 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352 Reported-by: syzbot+54c2f58f15fe6876b6ad@syzkaller.appspotmail.com Signed-off-by: Ian Abbott Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/vmk80xx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/comedi/drivers/vmk80xx.c b/drivers/staging/comedi/drivers/vmk80xx.c index 6234b649d887..b035d662390b 100644 --- a/drivers/staging/comedi/drivers/vmk80xx.c +++ b/drivers/staging/comedi/drivers/vmk80xx.c @@ -800,6 +800,8 @@ static int vmk80xx_auto_attach(struct comedi_device *dev, devpriv->model = board->model; + sema_init(&devpriv->limit_sem, 8); + ret = vmk80xx_find_usb_endpoints(dev); if (ret) return ret; @@ -808,8 +810,6 @@ static int vmk80xx_auto_attach(struct comedi_device *dev, if (ret) return ret; - sema_init(&devpriv->limit_sem, 8); - usb_set_intfdata(intf, devpriv); if (devpriv->model == VMK8055_MODEL) -- cgit v1.2.3 From 663d294b4768bfd89e529e069bffa544a830b5bf Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 15 Apr 2019 12:52:30 +0100 Subject: staging: comedi: vmk80xx: Fix possible double-free of ->usb_rx_buf `vmk80xx_alloc_usb_buffers()` is called from `vmk80xx_auto_attach()` to allocate RX and TX buffers for USB transfers. It allocates `devpriv->usb_rx_buf` followed by `devpriv->usb_tx_buf`. If the allocation of `devpriv->usb_tx_buf` fails, it frees `devpriv->usb_rx_buf`, leaving the pointer set dangling, and returns an error. Later, `vmk80xx_detach()` will be called from the core comedi module code to clean up. `vmk80xx_detach()` also frees both `devpriv->usb_rx_buf` and `devpriv->usb_tx_buf`, but `devpriv->usb_rx_buf` may have already been freed, leading to a double-free error. Fix it by removing the call to `kfree(devpriv->usb_rx_buf)` from `vmk80xx_alloc_usb_buffers()`, relying on `vmk80xx_detach()` to free the memory. Signed-off-by: Ian Abbott Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/vmk80xx.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/staging/comedi/drivers/vmk80xx.c b/drivers/staging/comedi/drivers/vmk80xx.c index b035d662390b..65dc6c51037e 100644 --- a/drivers/staging/comedi/drivers/vmk80xx.c +++ b/drivers/staging/comedi/drivers/vmk80xx.c @@ -682,10 +682,8 @@ static int vmk80xx_alloc_usb_buffers(struct comedi_device *dev) size = usb_endpoint_maxp(devpriv->ep_tx); devpriv->usb_tx_buf = kzalloc(size, GFP_KERNEL); - if (!devpriv->usb_tx_buf) { - kfree(devpriv->usb_rx_buf); + if (!devpriv->usb_tx_buf) return -ENOMEM; - } return 0; } -- cgit v1.2.3 From a943245adc9ae31942af752e879fbbc182166573 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 16 Apr 2019 11:57:51 +0100 Subject: x86/Kconfig: Fix spelling mistake "effectivness" -> "effectiveness" The Kconfig text contains a spelling mistake, fix it. Signed-off-by: Colin Ian King Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kernel-janitors@vger.kernel.org Link: http://lkml.kernel.org/r/20190416105751.18899-1-colin.king@canonical.com Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c1f9b3cf437c..01dbb05bc498 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1499,7 +1499,7 @@ config X86_CPA_STATISTICS depends on DEBUG_FS ---help--- Expose statistics about the Change Page Attribute mechanims, which - helps to determine the effectivness of preserving large and huge + helps to determine the effectiveness of preserving large and huge page mappings when mapping protections are changed. config ARCH_HAS_MEM_ENCRYPT -- cgit v1.2.3 From f4e97f5d4c9ece362b9379d3158cf5e4c02404dc Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Fri, 12 Apr 2019 17:53:14 +0800 Subject: staging: erofs: fix unexpected out-of-bound data access Unexpected out-of-bound data will be read in erofs_read_raw_page after commit 07173c3ec276 ("block: enable multipage bvecs") since one iovec could have multiple pages. Let's fix as what Ming's pointed out in the previous email [1]. [1] https://lore.kernel.org/lkml/20190411080953.GE421@ming.t460p/ Suggested-by: Ming Lei Reviewed-by: Chao Yu Signed-off-by: Gao Xiang Fixes: 07173c3ec276 ("block: enable multipage bvecs") Signed-off-by: Greg Kroah-Hartman --- drivers/staging/erofs/data.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/erofs/data.c b/drivers/staging/erofs/data.c index 526e0dbea5b5..81af768e7248 100644 --- a/drivers/staging/erofs/data.c +++ b/drivers/staging/erofs/data.c @@ -298,7 +298,7 @@ submit_bio_retry: *last_block = current_block; /* shift in advance in case of it followed by too many gaps */ - if (unlikely(bio->bi_vcnt >= bio->bi_max_vecs)) { + if (bio->bi_iter.bi_size >= bio->bi_max_vecs * PAGE_SIZE) { /* err should reassign to 0 after submitting */ err = 0; goto submit_bio_out; -- cgit v1.2.3 From 4d86c9f73c5a9a7c3c0661e922509c2c51801671 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 26 Mar 2019 22:01:27 -0700 Subject: clocksource/drivers/timer-ti-dm: Remove omap_dm_timer_set_load_start Commit 008258d995a6 ("clocksource/drivers/timer-ti-dm: Make omap_dm_timer_set_load_start() static") made omap_dm_time_set_load_start static because its prototype was not defined in a header. Unfortunately, this causes a build warning on multi_v7_defconfig because this function is not used anywhere in this translation unit: drivers/clocksource/timer-ti-dm.c:589:12: error: unused function 'omap_dm_timer_set_load_start' [-Werror,-Wunused-function] In fact, omap_dm_timer_set_load_start hasn't been used anywhere since commit f190be7f39a5 ("staging: tidspbridge: remove driver") and the prototype was removed in commit 592ea6bd1fad ("clocksource: timer-ti-dm: Make unexported functions static"), which is probably where this should have happened. Fixes: 592ea6bd1fad ("clocksource: timer-ti-dm: Make unexported functions static") Fixes: 008258d995a6 ("clocksource/drivers/timer-ti-dm: Make omap_dm_timer_set_load_start() static") Signed-off-by: Nathan Chancellor Acked-by: Tony Lindgren Signed-off-by: Daniel Lezcano --- drivers/clocksource/timer-ti-dm.c | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/drivers/clocksource/timer-ti-dm.c b/drivers/clocksource/timer-ti-dm.c index 3352da6ed61f..ee8ec5a8cb16 100644 --- a/drivers/clocksource/timer-ti-dm.c +++ b/drivers/clocksource/timer-ti-dm.c @@ -585,34 +585,6 @@ static int omap_dm_timer_set_load(struct omap_dm_timer *timer, int autoreload, return 0; } -/* Optimized set_load which removes costly spin wait in timer_start */ -static int omap_dm_timer_set_load_start(struct omap_dm_timer *timer, - int autoreload, unsigned int load) -{ - u32 l; - - if (unlikely(!timer)) - return -EINVAL; - - omap_dm_timer_enable(timer); - - l = omap_dm_timer_read_reg(timer, OMAP_TIMER_CTRL_REG); - if (autoreload) { - l |= OMAP_TIMER_CTRL_AR; - omap_dm_timer_write_reg(timer, OMAP_TIMER_LOAD_REG, load); - } else { - l &= ~OMAP_TIMER_CTRL_AR; - } - l |= OMAP_TIMER_CTRL_ST; - - __omap_dm_timer_load_start(timer, l, load, timer->posted); - - /* Save the context */ - timer->context.tclr = l; - timer->context.tldr = load; - timer->context.tcrr = load; - return 0; -} static int omap_dm_timer_set_match(struct omap_dm_timer *timer, int enable, unsigned int match) { -- cgit v1.2.3 From ace965696da2611af759f0284e26342b7b6cec89 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 1 Apr 2019 13:25:10 +0200 Subject: serial: sh-sci: Fix HSCIF RX sampling point calculation There are several issues with the formula used for calculating the deviation from the intended rate: 1. While min_err and last_stop are signed, srr and baud are unsigned. Hence the signed values are promoted to unsigned, which will lead to a bogus value of deviation if min_err is negative, 2. Srr is the register field value, which is one less than the actual sampling rate factor, 3. The divisions do not use rounding. Fix this by casting unsigned variables to int, adding one to srr, and using a single DIV_ROUND_CLOSEST(). Fixes: 63ba1e00f178a448 ("serial: sh-sci: Support for HSCIF RX sampling point adjustment") Signed-off-by: Geert Uytterhoeven Reviewed-by: Mukesh Ojha Cc: stable Reviewed-by: Ulrich Hecht Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sh-sci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 2d1c626312cd..55ef6e577f46 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -2512,7 +2512,9 @@ done: * center of the last stop bit in sampling clocks. */ int last_stop = bits * 2 - 1; - int deviation = min_err * srr * last_stop / 2 / baud; + int deviation = DIV_ROUND_CLOSEST(min_err * last_stop * + (int)(srr + 1), + 2 * (int)baud); if (abs(deviation) >= 2) { /* At least two sampling clocks off at the -- cgit v1.2.3 From 6b87784b53592a90d21576be8eff688b56d93cce Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 29 Mar 2019 10:10:26 +0100 Subject: serial: sh-sci: Fix HSCIF RX sampling point adjustment The calculation of the sampling point has min() and max() exchanged. Fix this by using the clamp() helper instead. Fixes: 63ba1e00f178a448 ("serial: sh-sci: Support for HSCIF RX sampling point adjustment") Signed-off-by: Geert Uytterhoeven Reviewed-by: Ulrich Hecht Reviewed-by: Wolfram Sang Acked-by: Dirk Behme Cc: stable Reviewed-by: Simon Horman Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sh-sci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 55ef6e577f46..3cd139752d3f 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -2521,7 +2521,7 @@ done: * last stop bit; we can increase the error * margin by shifting the sampling point. */ - int shift = min(-8, max(7, deviation / 2)); + int shift = clamp(deviation / 2, -8, 7); hssrr |= (shift << HSCIF_SRHP_SHIFT) & HSCIF_SRHP_MASK; -- cgit v1.2.3 From e00164a0f000de893944981f41a568c981aca658 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Tue, 9 Apr 2019 16:16:38 +0800 Subject: sc16is7xx: move label 'err_spi' to correct section MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit err_spi is used when SERIAL_SC16IS7XX_SPI is enabled, so make the label only available under SERIAL_SC16IS7XX_SPI option. Otherwise, the below warning appears. drivers/tty/serial/sc16is7xx.c:1523:1: warning: label ‘err_spi’ defined but not used [-Wunused-label] err_spi: ^~~~~~~ Signed-off-by: Guoqing Jiang Fixes: ac0cdb3d9901 ("sc16is7xx: missing unregister/delete driver on error in sc16is7xx_init()") Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sc16is7xx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index 09a183dfc526..22381a8c72e4 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -1520,10 +1520,12 @@ static int __init sc16is7xx_init(void) #endif return ret; +#ifdef CONFIG_SERIAL_SC16IS7XX_SPI err_spi: #ifdef CONFIG_SERIAL_SC16IS7XX_I2C i2c_del_driver(&sc16is7xx_i2c_uart_driver); #endif +#endif err_i2c: uart_unregister_driver(&sc16is7xx_uart); return ret; -- cgit v1.2.3 From 2b27924bb1d48e3775f432b70bdad5e6dd4e7798 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 15 Apr 2019 15:57:19 +0200 Subject: KVM: nVMX: always use early vmcs check when EPT is disabled The remaining failures of vmx.flat when EPT is disabled are caused by incorrectly reflecting VMfails to the L1 hypervisor. What happens is that nested_vmx_restore_host_state corrupts the guest CR3, reloading it with the host's shadow CR3 instead, because it blindly loads GUEST_CR3 from the vmcs01. For simplicity let's just always use hardware VMCS checks when EPT is disabled. This way, nested_vmx_restore_host_state is not reached at all (or at least shouldn't be reached). Signed-off-by: Paolo Bonzini --- arch/x86/include/uapi/asm/vmx.h | 1 + arch/x86/kvm/vmx/nested.c | 22 ++++++++++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index f0b0c90dd398..d213ec5c3766 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -146,6 +146,7 @@ #define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1 #define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2 +#define VMX_ABORT_VMCS_CORRUPTED 3 #define VMX_ABORT_LOAD_HOST_MSR_FAIL 4 #endif /* _UAPIVMX_H */ diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index a22af5a85540..6401eb7ef19c 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -3796,8 +3796,18 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) vmx_set_cr4(vcpu, vmcs_readl(CR4_READ_SHADOW)); nested_ept_uninit_mmu_context(vcpu); - vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); - __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); + + /* + * This is only valid if EPT is in use, otherwise the vmcs01 GUEST_CR3 + * points to shadow pages! Fortunately we only get here after a WARN_ON + * if EPT is disabled, so a VMabort is perfectly fine. + */ + if (enable_ept) { + vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); + __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); + } else { + nested_vmx_abort(vcpu, VMX_ABORT_VMCS_CORRUPTED); + } /* * Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs @@ -5745,6 +5755,14 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)) { int i; + /* + * Without EPT it is not possible to restore L1's CR3 and PDPTR on + * VMfail, because they are not available in vmcs01. Just always + * use hardware checks. + */ + if (!enable_ept) + nested_early_check = 1; + if (!cpu_has_vmx_shadow_vmcs()) enable_shadow_vmcs = 0; if (enable_shadow_vmcs) { -- cgit v1.2.3 From bc8a3d8925a8fa09fa550e0da115d95851ce33c6 Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Mon, 8 Apr 2019 11:07:30 -0700 Subject: kvm: mmu: Fix overflow on kvm mmu page limit calculation KVM bases its memory usage limits on the total number of guest pages across all memslots. However, those limits, and the calculations to produce them, use 32 bit unsigned integers. This can result in overflow if a VM has more guest pages that can be represented by a u32. As a result of this overflow, KVM can use a low limit on the number of MMU pages it will allocate. This makes KVM unable to map all of guest memory at once, prompting spurious faults. Tested: Ran all kvm-unit-tests on an Intel Haswell machine. This patch introduced no new failures. Signed-off-by: Ben Gardon Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 12 ++++++------ arch/x86/kvm/mmu.c | 13 ++++++------- arch/x86/kvm/mmu.h | 2 +- arch/x86/kvm/x86.c | 4 ++-- 4 files changed, 15 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 159b5988292f..9b7b731a0032 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -126,7 +126,7 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) } #define KVM_PERMILLE_MMU_PAGES 20 -#define KVM_MIN_ALLOC_MMU_PAGES 64 +#define KVM_MIN_ALLOC_MMU_PAGES 64UL #define KVM_MMU_HASH_SHIFT 12 #define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT) #define KVM_MIN_FREE_MMU_PAGES 5 @@ -844,9 +844,9 @@ enum kvm_irqchip_mode { }; struct kvm_arch { - unsigned int n_used_mmu_pages; - unsigned int n_requested_mmu_pages; - unsigned int n_max_mmu_pages; + unsigned long n_used_mmu_pages; + unsigned long n_requested_mmu_pages; + unsigned long n_max_mmu_pages; unsigned int indirect_shadow_pages; struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; /* @@ -1256,8 +1256,8 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm, gfn_t gfn_offset, unsigned long mask); void kvm_mmu_zap_all(struct kvm *kvm); void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen); -unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm); -void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); +unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm); +void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages); int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3); bool pdptrs_changed(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 85f753728953..e10962dfc203 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2007,7 +2007,7 @@ static int is_empty_shadow_page(u64 *spt) * aggregate version in order to make the slab shrinker * faster */ -static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, int nr) +static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, unsigned long nr) { kvm->arch.n_used_mmu_pages += nr; percpu_counter_add(&kvm_total_used_mmu_pages, nr); @@ -2763,7 +2763,7 @@ static bool prepare_zap_oldest_mmu_page(struct kvm *kvm, * Changing the number of mmu pages allocated to the vm * Note: if goal_nr_mmu_pages is too small, you will get dead lock */ -void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages) +void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long goal_nr_mmu_pages) { LIST_HEAD(invalid_list); @@ -6031,10 +6031,10 @@ out: /* * Calculate mmu pages needed for kvm. */ -unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm) +unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm) { - unsigned int nr_mmu_pages; - unsigned int nr_pages = 0; + unsigned long nr_mmu_pages; + unsigned long nr_pages = 0; struct kvm_memslots *slots; struct kvm_memory_slot *memslot; int i; @@ -6047,8 +6047,7 @@ unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm) } nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000; - nr_mmu_pages = max(nr_mmu_pages, - (unsigned int) KVM_MIN_ALLOC_MMU_PAGES); + nr_mmu_pages = max(nr_mmu_pages, KVM_MIN_ALLOC_MMU_PAGES); return nr_mmu_pages; } diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index bbdc60f2fae8..54c2a377795b 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -64,7 +64,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu); int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, u64 fault_address, char *insn, int insn_len); -static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) +static inline unsigned long kvm_mmu_available_pages(struct kvm *kvm) { if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages) return kvm->arch.n_max_mmu_pages - diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 099b851dabaf..455f156f56ed 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4270,7 +4270,7 @@ static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm, } static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, - u32 kvm_nr_mmu_pages) + unsigned long kvm_nr_mmu_pages) { if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES) return -EINVAL; @@ -4284,7 +4284,7 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, return 0; } -static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm) +static unsigned long kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm) { return kvm->arch.n_max_mmu_pages; } -- cgit v1.2.3 From 4a58038b9e420276157785afa0a0bbb4b9bc2265 Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Wed, 20 Mar 2019 08:12:28 +0000 Subject: Revert "svm: Fix AVIC incomplete IPI emulation" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit bb218fbcfaaa3b115d4cd7a43c0ca164f3a96e57. As Oren Twaig pointed out the old discussion: https://patchwork.kernel.org/patch/8292231/ that the change coud potentially cause an extra IPI to be sent to the destination vcpu because the AVIC hardware already set the IRR bit before the incomplete IPI #VMEXIT with id=1 (target vcpu is not running). Since writting to ICR and ICR2 will also set the IRR. If something triggers the destination vcpu to get scheduled before the emulation finishes, then this could result in an additional IPI. Also, the issue mentioned in the commit bb218fbcfaaa was misdiagnosed. Cc: Radim Krčmář Cc: Paolo Bonzini Reported-by: Oren Twaig Signed-off-by: Suravee Suthikulpanit Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index e0a791c3d4fc..d7b14c902052 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4517,14 +4517,25 @@ static int avic_incomplete_ipi_interception(struct vcpu_svm *svm) kvm_lapic_reg_write(apic, APIC_ICR, icrl); break; case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: { + int i; + struct kvm_vcpu *vcpu; + struct kvm *kvm = svm->vcpu.kvm; struct kvm_lapic *apic = svm->vcpu.arch.apic; /* - * Update ICR high and low, then emulate sending IPI, - * which is handled when writing APIC_ICR. + * At this point, we expect that the AVIC HW has already + * set the appropriate IRR bits on the valid target + * vcpus. So, we just need to kick the appropriate vcpu. */ - kvm_lapic_reg_write(apic, APIC_ICR2, icrh); - kvm_lapic_reg_write(apic, APIC_ICR, icrl); + kvm_for_each_vcpu(i, vcpu, kvm) { + bool m = kvm_apic_match_dest(vcpu, apic, + icrl & KVM_APIC_SHORT_MASK, + GET_APIC_DEST_FIELD(icrh), + icrl & KVM_APIC_DEST_MASK); + + if (m && !avic_vcpu_is_running(vcpu)) + kvm_vcpu_wake_up(vcpu); + } break; } case AVIC_IPI_FAILURE_INVALID_TARGET: -- cgit v1.2.3 From e44e3eacccfd2294a1ce279f68452b1635d7fa82 Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Tue, 26 Mar 2019 03:57:37 +0000 Subject: svm/avic: Fix invalidate logical APIC id entry Only clear the valid bit when invalidate logical APIC id entry. The current logic clear the valid bit, but also set the rest of the bits (including reserved bits) to 1. Fixes: 98d90582be2e ('svm: Fix AVIC DFR and LDR handling') Signed-off-by: Suravee Suthikulpanit Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d7b14c902052..933f19d840fe 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -262,6 +262,7 @@ struct amd_svm_iommu_ir { }; #define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF) +#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31 #define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31) #define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL) @@ -4607,7 +4608,7 @@ static void avic_invalidate_logical_id_entry(struct kvm_vcpu *vcpu) u32 *entry = avic_get_logical_id_entry(vcpu, svm->ldr_reg, flat); if (entry) - WRITE_ONCE(*entry, (u32) ~AVIC_LOGICAL_ID_ENTRY_VALID_MASK); + clear_bit(AVIC_LOGICAL_ID_ENTRY_VALID_BIT, (unsigned long *)entry); } static int avic_handle_ldr_update(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From 99c221796a810055974b54c02e8f53297e48d146 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 3 Apr 2019 16:06:42 +0200 Subject: KVM: x86: svm: make sure NMI is injected after nmi_singlestep I noticed that apic test from kvm-unit-tests always hangs on my EPYC 7401P, the hanging test nmi-after-sti is trying to deliver 30000 NMIs and tracing shows that we're sometimes able to deliver a few but never all. When we're trying to inject an NMI we may fail to do so immediately for various reasons, however, we still need to inject it so enable_nmi_window() arms nmi_singlestep mode. #DB occurs as expected, but we're not checking for pending NMIs before entering the guest and unless there's a different event to process, the NMI will never get delivered. Make KVM_REQ_EVENT request on the vCPU from db_interception() to make sure pending NMIs are checked and possibly injected. Signed-off-by: Vitaly Kuznetsov Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 933f19d840fe..c6815aef2cac 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2693,6 +2693,7 @@ static int npf_interception(struct vcpu_svm *svm) static int db_interception(struct vcpu_svm *svm) { struct kvm_run *kvm_run = svm->vcpu.run; + struct kvm_vcpu *vcpu = &svm->vcpu; if (!(svm->vcpu.guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) && @@ -2703,6 +2704,8 @@ static int db_interception(struct vcpu_svm *svm) if (svm->nmi_singlestep) { disable_nmi_singlestep(svm); + /* Make sure we check for pending NMIs upon entry */ + kvm_make_request(KVM_REQ_EVENT, vcpu); } if (svm->vcpu.guest_debug & -- cgit v1.2.3 From 1811d979c71621aafc7b879477202d286f7e863b Mon Sep 17 00:00:00 2001 From: WANG Chao Date: Fri, 12 Apr 2019 15:55:39 +0800 Subject: x86/kvm: move kvm_load/put_guest_xcr0 into atomic context guest xcr0 could leak into host when MCE happens in guest mode. Because do_machine_check() could schedule out at a few places. For example: kvm_load_guest_xcr0 ... kvm_x86_ops->run(vcpu) { vmx_vcpu_run vmx_complete_atomic_exit kvm_machine_check do_machine_check do_memory_failure memory_failure lock_page In this case, host_xcr0 is 0x2ff, guest vcpu xcr0 is 0xff. After schedule out, host cpu has guest xcr0 loaded (0xff). In __switch_to { switch_fpu_finish copy_kernel_to_fpregs XRSTORS If any bit i in XSTATE_BV[i] == 1 and xcr0[i] == 0, XRSTORS will generate #GP (In this case, bit 9). Then ex_handler_fprestore kicks in and tries to reinitialize fpu by restoring init fpu state. Same story as last #GP, except we get DOUBLE FAULT this time. Cc: stable@vger.kernel.org Signed-off-by: WANG Chao Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 2 ++ arch/x86/kvm/vmx/vmx.c | 4 ++++ arch/x86/kvm/x86.c | 10 ++++------ arch/x86/kvm/x86.h | 2 ++ 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index c6815aef2cac..675cecb3fa9c 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -5636,6 +5636,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) svm->vmcb->save.cr2 = vcpu->arch.cr2; clgi(); + kvm_load_guest_xcr0(vcpu); /* * If this vCPU has touched SPEC_CTRL, restore the guest's value if @@ -5781,6 +5782,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) kvm_before_interrupt(&svm->vcpu); + kvm_put_guest_xcr0(vcpu); stgi(); /* Any pending NMI will happen here */ diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 7a8f75fc6b7e..88060a621db2 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6410,6 +6410,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) vmx_set_interrupt_shadow(vcpu, 0); + kvm_load_guest_xcr0(vcpu); + if (static_cpu_has(X86_FEATURE_PKU) && kvm_read_cr4_bits(vcpu, X86_CR4_PKE) && vcpu->arch.pkru != vmx->host_pkru) @@ -6506,6 +6508,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) __write_pkru(vmx->host_pkru); } + kvm_put_guest_xcr0(vcpu); + vmx->nested.nested_run_pending = 0; vmx->idt_vectoring_info = 0; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 455f156f56ed..f05891b8df7c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -800,7 +800,7 @@ void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) } EXPORT_SYMBOL_GPL(kvm_lmsw); -static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu) +void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu) { if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) && !vcpu->guest_xcr0_loaded) { @@ -810,8 +810,9 @@ static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu) vcpu->guest_xcr0_loaded = 1; } } +EXPORT_SYMBOL_GPL(kvm_load_guest_xcr0); -static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) +void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) { if (vcpu->guest_xcr0_loaded) { if (vcpu->arch.xcr0 != host_xcr0) @@ -819,6 +820,7 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) vcpu->guest_xcr0_loaded = 0; } } +EXPORT_SYMBOL_GPL(kvm_put_guest_xcr0); static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) { @@ -7865,8 +7867,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) goto cancel_injection; } - kvm_load_guest_xcr0(vcpu); - if (req_immediate_exit) { kvm_make_request(KVM_REQ_EVENT, vcpu); kvm_x86_ops->request_immediate_exit(vcpu); @@ -7919,8 +7919,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); - kvm_put_guest_xcr0(vcpu); - kvm_before_interrupt(vcpu); kvm_x86_ops->handle_external_intr(vcpu); kvm_after_interrupt(vcpu); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 28406aa1136d..aedc5d0d4989 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -347,4 +347,6 @@ static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu) __this_cpu_write(current_vcpu, NULL); } +void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu); +void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu); #endif -- cgit v1.2.3 From 672ff6cff80ca43bf3258410d2b887036969df5f Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Mon, 25 Mar 2019 21:10:17 +0200 Subject: KVM: x86: Raise #GP when guest vCPU do not support PMU Before this change, reading a VMware pseduo PMC will succeed even when PMU is not supported by guest. This can easily be seen by running kvm-unit-test vmware_backdoors with "-cpu host,-pmu" option. Reviewed-by: Mihai Carabas Signed-off-by: Liran Alon Signed-off-by: Paolo Bonzini --- arch/x86/kvm/pmu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 58ead7db71a3..e39741997893 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -281,9 +281,13 @@ static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) { bool fast_mode = idx & (1u << 31); + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); struct kvm_pmc *pmc; u64 ctr_val; + if (!pmu->version) + return 1; + if (is_vmware_backdoor_pmc(idx)) return kvm_pmu_rdpmc_vmware(vcpu, idx, data); -- cgit v1.2.3 From e51bfdb68725dc052d16241ace40ea3140f938aa Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Mon, 25 Mar 2019 21:09:17 +0200 Subject: KVM: nVMX: Expose RDPMC-exiting only when guest supports PMU Issue was discovered when running kvm-unit-tests on KVM running as L1 on top of Hyper-V. When vmx_instruction_intercept unit-test attempts to run RDPMC to test RDPMC-exiting, it is intercepted by L1 KVM which it's EXIT_REASON_RDPMC handler raise #GP because vCPU exposed by Hyper-V doesn't support PMU. Instead of unit-test expectation to be reflected with EXIT_REASON_RDPMC. The reason vmx_instruction_intercept unit-test attempts to run RDPMC even though Hyper-V doesn't support PMU is because L1 expose to L2 support for RDPMC-exiting. Which is reasonable to assume that is supported only in case CPU supports PMU to being with. Above issue can easily be simulated by modifying vmx_instruction_intercept config in x86/unittests.cfg to run QEMU with "-cpu host,+vmx,-pmu" and run unit-test. To handle issue, change KVM to expose RDPMC-exiting only when guest supports PMU. Reported-by: Saar Amar Reviewed-by: Mihai Carabas Reviewed-by: Jim Mattson Signed-off-by: Liran Alon Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 88060a621db2..5866e9e9f1e0 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6856,6 +6856,30 @@ static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu) } } +static bool guest_cpuid_has_pmu(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *entry; + union cpuid10_eax eax; + + entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); + if (!entry) + return false; + + eax.full = entry->eax; + return (eax.split.version_id > 0); +} + +static void nested_vmx_procbased_ctls_update(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + bool pmu_enabled = guest_cpuid_has_pmu(vcpu); + + if (pmu_enabled) + vmx->nested.msrs.procbased_ctls_high |= CPU_BASED_RDPMC_EXITING; + else + vmx->nested.msrs.procbased_ctls_high &= ~CPU_BASED_RDPMC_EXITING; +} + static void update_intel_pt_cfg(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -6944,6 +6968,7 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) if (nested_vmx_allowed(vcpu)) { nested_vmx_cr_fixed1_bits_update(vcpu); nested_vmx_entry_exit_ctls_update(vcpu); + nested_vmx_procbased_ctls_update(vcpu); } if (boot_cpu_has(X86_FEATURE_INTEL_PT) && -- cgit v1.2.3 From ed19321fb6571214f410b30322e4ad6e6b7c3915 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 2 Apr 2019 08:03:09 -0700 Subject: KVM: x86: Load SMRAM in a single shot when leaving SMM RSM emulation is currently broken on VMX when the interrupted guest has CR4.VMXE=1. Rather than dance around the issue of HF_SMM_MASK being set when loading SMSTATE into architectural state, ideally RSM emulation itself would be reworked to clear HF_SMM_MASK prior to loading non-SMM architectural state. Ostensibly, the only motivation for having HF_SMM_MASK set throughout the loading of state from the SMRAM save state area is so that the memory accesses from GET_SMSTATE() are tagged with role.smm. Load all of the SMRAM save state area from guest memory at the beginning of RSM emulation, and load state from the buffer instead of reading guest memory one-by-one. This paves the way for clearing HF_SMM_MASK prior to loading state, and also aligns RSM with the enter_smm() behavior, which fills a buffer and writes SMRAM save state in a single go. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_emulate.h | 3 +- arch/x86/include/asm/kvm_host.h | 5 +- arch/x86/kvm/emulate.c | 149 ++++++++++++++++++------------------- arch/x86/kvm/svm.c | 20 ++--- arch/x86/kvm/vmx/vmx.c | 2 +- arch/x86/kvm/x86.c | 5 +- 6 files changed, 92 insertions(+), 92 deletions(-) diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 93c4bf598fb0..ec489c432850 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -226,7 +226,8 @@ struct x86_emulate_ops { unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt); void (*set_hflags)(struct x86_emulate_ctxt *ctxt, unsigned hflags); - int (*pre_leave_smm)(struct x86_emulate_ctxt *ctxt, u64 smbase); + int (*pre_leave_smm)(struct x86_emulate_ctxt *ctxt, + const char *smstate); }; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 9b7b731a0032..a9d03af34030 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1182,7 +1182,7 @@ struct kvm_x86_ops { int (*smi_allowed)(struct kvm_vcpu *vcpu); int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate); - int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase); + int (*pre_leave_smm)(struct kvm_vcpu *vcpu, const char *smstate); int (*enable_smi_window)(struct kvm_vcpu *vcpu); int (*mem_enc_op)(struct kvm *kvm, void __user *argp); @@ -1592,4 +1592,7 @@ static inline int kvm_cpu_get_apicid(int mps_cpu) #define put_smstate(type, buf, offset, val) \ *(type *)((buf) + (offset) - 0x7e00) = val +#define GET_SMSTATE(type, buf, offset) \ + (*(type *)((buf) + (offset) - 0x7e00)) + #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index c338984c850d..ae0d289b50fe 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2339,16 +2339,6 @@ static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt) return edx & bit(X86_FEATURE_LM); } -#define GET_SMSTATE(type, smbase, offset) \ - ({ \ - type __val; \ - int r = ctxt->ops->read_phys(ctxt, smbase + offset, &__val, \ - sizeof(__val)); \ - if (r != X86EMUL_CONTINUE) \ - return X86EMUL_UNHANDLEABLE; \ - __val; \ - }) - static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags) { desc->g = (flags >> 23) & 1; @@ -2361,27 +2351,29 @@ static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags) desc->type = (flags >> 8) & 15; } -static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, u64 smbase, int n) +static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate, + int n) { struct desc_struct desc; int offset; u16 selector; - selector = GET_SMSTATE(u32, smbase, 0x7fa8 + n * 4); + selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4); if (n < 3) offset = 0x7f84 + n * 12; else offset = 0x7f2c + (n - 3) * 12; - set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8)); - set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4)); - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, offset)); + set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8)); + set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4)); + rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset)); ctxt->ops->set_segment(ctxt, selector, &desc, 0, n); return X86EMUL_CONTINUE; } -static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n) +static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate, + int n) { struct desc_struct desc; int offset; @@ -2390,11 +2382,11 @@ static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n) offset = 0x7e00 + n * 16; - selector = GET_SMSTATE(u16, smbase, offset); - rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smbase, offset + 2) << 8); - set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4)); - set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8)); - base3 = GET_SMSTATE(u32, smbase, offset + 12); + selector = GET_SMSTATE(u16, smstate, offset); + rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8); + set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4)); + set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8)); + base3 = GET_SMSTATE(u32, smstate, offset + 12); ctxt->ops->set_segment(ctxt, selector, &desc, base3, n); return X86EMUL_CONTINUE; @@ -2445,7 +2437,8 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt, return X86EMUL_CONTINUE; } -static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase) +static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, + const char *smstate) { struct desc_struct desc; struct desc_ptr dt; @@ -2453,53 +2446,54 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase) u32 val, cr0, cr3, cr4; int i; - cr0 = GET_SMSTATE(u32, smbase, 0x7ffc); - cr3 = GET_SMSTATE(u32, smbase, 0x7ff8); - ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED; - ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0); + cr0 = GET_SMSTATE(u32, smstate, 0x7ffc); + cr3 = GET_SMSTATE(u32, smstate, 0x7ff8); + ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED; + ctxt->_eip = GET_SMSTATE(u32, smstate, 0x7ff0); for (i = 0; i < 8; i++) - *reg_write(ctxt, i) = GET_SMSTATE(u32, smbase, 0x7fd0 + i * 4); + *reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4); - val = GET_SMSTATE(u32, smbase, 0x7fcc); + val = GET_SMSTATE(u32, smstate, 0x7fcc); ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1); - val = GET_SMSTATE(u32, smbase, 0x7fc8); + val = GET_SMSTATE(u32, smstate, 0x7fc8); ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1); - selector = GET_SMSTATE(u32, smbase, 0x7fc4); - set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f64)); - set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f60)); - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f5c)); + selector = GET_SMSTATE(u32, smstate, 0x7fc4); + set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f64)); + set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f60)); + rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f5c)); ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR); - selector = GET_SMSTATE(u32, smbase, 0x7fc0); - set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f80)); - set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f7c)); - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f78)); + selector = GET_SMSTATE(u32, smstate, 0x7fc0); + set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f80)); + set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f7c)); + rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f78)); ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR); - dt.address = GET_SMSTATE(u32, smbase, 0x7f74); - dt.size = GET_SMSTATE(u32, smbase, 0x7f70); + dt.address = GET_SMSTATE(u32, smstate, 0x7f74); + dt.size = GET_SMSTATE(u32, smstate, 0x7f70); ctxt->ops->set_gdt(ctxt, &dt); - dt.address = GET_SMSTATE(u32, smbase, 0x7f58); - dt.size = GET_SMSTATE(u32, smbase, 0x7f54); + dt.address = GET_SMSTATE(u32, smstate, 0x7f58); + dt.size = GET_SMSTATE(u32, smstate, 0x7f54); ctxt->ops->set_idt(ctxt, &dt); for (i = 0; i < 6; i++) { - int r = rsm_load_seg_32(ctxt, smbase, i); + int r = rsm_load_seg_32(ctxt, smstate, i); if (r != X86EMUL_CONTINUE) return r; } - cr4 = GET_SMSTATE(u32, smbase, 0x7f14); + cr4 = GET_SMSTATE(u32, smstate, 0x7f14); - ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8)); + ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8)); return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4); } -static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase) +static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, + const char *smstate) { struct desc_struct desc; struct desc_ptr dt; @@ -2509,43 +2503,43 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase) int i, r; for (i = 0; i < 16; i++) - *reg_write(ctxt, i) = GET_SMSTATE(u64, smbase, 0x7ff8 - i * 8); + *reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8); - ctxt->_eip = GET_SMSTATE(u64, smbase, 0x7f78); - ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7f70) | X86_EFLAGS_FIXED; + ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78); + ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED; - val = GET_SMSTATE(u32, smbase, 0x7f68); + val = GET_SMSTATE(u32, smstate, 0x7f68); ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1); - val = GET_SMSTATE(u32, smbase, 0x7f60); + val = GET_SMSTATE(u32, smstate, 0x7f60); ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1); - cr0 = GET_SMSTATE(u64, smbase, 0x7f58); - cr3 = GET_SMSTATE(u64, smbase, 0x7f50); - cr4 = GET_SMSTATE(u64, smbase, 0x7f48); - ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00)); - val = GET_SMSTATE(u64, smbase, 0x7ed0); + cr0 = GET_SMSTATE(u64, smstate, 0x7f58); + cr3 = GET_SMSTATE(u64, smstate, 0x7f50); + cr4 = GET_SMSTATE(u64, smstate, 0x7f48); + ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00)); + val = GET_SMSTATE(u64, smstate, 0x7ed0); ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA); - selector = GET_SMSTATE(u32, smbase, 0x7e90); - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e92) << 8); - set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e94)); - set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e98)); - base3 = GET_SMSTATE(u32, smbase, 0x7e9c); + selector = GET_SMSTATE(u32, smstate, 0x7e90); + rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e92) << 8); + set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e94)); + set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e98)); + base3 = GET_SMSTATE(u32, smstate, 0x7e9c); ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR); - dt.size = GET_SMSTATE(u32, smbase, 0x7e84); - dt.address = GET_SMSTATE(u64, smbase, 0x7e88); + dt.size = GET_SMSTATE(u32, smstate, 0x7e84); + dt.address = GET_SMSTATE(u64, smstate, 0x7e88); ctxt->ops->set_idt(ctxt, &dt); - selector = GET_SMSTATE(u32, smbase, 0x7e70); - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e72) << 8); - set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e74)); - set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e78)); - base3 = GET_SMSTATE(u32, smbase, 0x7e7c); + selector = GET_SMSTATE(u32, smstate, 0x7e70); + rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e72) << 8); + set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e74)); + set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e78)); + base3 = GET_SMSTATE(u32, smstate, 0x7e7c); ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR); - dt.size = GET_SMSTATE(u32, smbase, 0x7e64); - dt.address = GET_SMSTATE(u64, smbase, 0x7e68); + dt.size = GET_SMSTATE(u32, smstate, 0x7e64); + dt.address = GET_SMSTATE(u64, smstate, 0x7e68); ctxt->ops->set_gdt(ctxt, &dt); r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4); @@ -2553,7 +2547,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase) return r; for (i = 0; i < 6; i++) { - r = rsm_load_seg_64(ctxt, smbase, i); + r = rsm_load_seg_64(ctxt, smstate, i); if (r != X86EMUL_CONTINUE) return r; } @@ -2564,12 +2558,19 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase) static int em_rsm(struct x86_emulate_ctxt *ctxt) { unsigned long cr0, cr4, efer; + char buf[512]; u64 smbase; int ret; if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_MASK) == 0) return emulate_ud(ctxt); + smbase = ctxt->ops->get_smbase(ctxt); + + ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf)); + if (ret != X86EMUL_CONTINUE) + return X86EMUL_UNHANDLEABLE; + /* * Get back to real mode, to prepare a safe state in which to load * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU @@ -2605,20 +2606,18 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt) efer = 0; ctxt->ops->set_msr(ctxt, MSR_EFER, efer); - smbase = ctxt->ops->get_smbase(ctxt); - /* * Give pre_leave_smm() a chance to make ISA-specific changes to the * vCPU state (e.g. enter guest mode) before loading state from the SMM * state-save area. */ - if (ctxt->ops->pre_leave_smm(ctxt, smbase)) + if (ctxt->ops->pre_leave_smm(ctxt, buf)) return X86EMUL_UNHANDLEABLE; if (emulator_has_longmode(ctxt)) - ret = rsm_load_state_64(ctxt, smbase + 0x8000); + ret = rsm_load_state_64(ctxt, buf); else - ret = rsm_load_state_32(ctxt, smbase + 0x8000); + ret = rsm_load_state_32(ctxt, buf); if (ret != X86EMUL_CONTINUE) { /* FIXME: should triple fault */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 675cecb3fa9c..6b1cd73e4053 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -6232,27 +6232,23 @@ static int svm_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate) return 0; } -static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase) +static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) { struct vcpu_svm *svm = to_svm(vcpu); struct vmcb *nested_vmcb; struct page *page; - struct { - u64 guest; - u64 vmcb; - } svm_state_save; + u64 guest; + u64 vmcb; int ret; - ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfed8, &svm_state_save, - sizeof(svm_state_save)); - if (ret) - return ret; + guest = GET_SMSTATE(u64, smstate, 0x7ed8); + vmcb = GET_SMSTATE(u64, smstate, 0x7ee0); - if (svm_state_save.guest) { + if (guest) { vcpu->arch.hflags &= ~HF_SMM_MASK; - nested_vmcb = nested_svm_map(svm, svm_state_save.vmcb, &page); + nested_vmcb = nested_svm_map(svm, vmcb, &page); if (nested_vmcb) - enter_svm_guest_mode(svm, svm_state_save.vmcb, nested_vmcb, page); + enter_svm_guest_mode(svm, vmcb, nested_vmcb, page); else ret = 1; vcpu->arch.hflags |= HF_SMM_MASK; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 5866e9e9f1e0..14ea25eadde8 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7398,7 +7398,7 @@ static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate) return 0; } -static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase) +static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) { struct vcpu_vmx *vmx = to_vmx(vcpu); int ret; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f05891b8df7c..6ee1f9e5d3fb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5963,9 +5963,10 @@ static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_fla kvm_set_hflags(emul_to_vcpu(ctxt), emul_flags); } -static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt, u64 smbase) +static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt, + const char *smstate) { - return kvm_x86_ops->pre_leave_smm(emul_to_vcpu(ctxt), smbase); + return kvm_x86_ops->pre_leave_smm(emul_to_vcpu(ctxt), smstate); } static const struct x86_emulate_ops emulate_ops = { -- cgit v1.2.3 From c5833c7a43a66bfe2f36439cb2f1281a588668af Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 2 Apr 2019 08:03:10 -0700 Subject: KVM: x86: Open code kvm_set_hflags Prepare for clearing HF_SMM_MASK prior to loading state from the SMRAM save state map, i.e. kvm_smm_changed() needs to be called after state has been loaded and so cannot be done automatically when setting hflags from RSM. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_emulate.h | 1 + arch/x86/kvm/emulate.c | 3 +++ arch/x86/kvm/x86.c | 33 +++++++++++++++------------------ 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index ec489c432850..feab24cac610 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -228,6 +228,7 @@ struct x86_emulate_ops { void (*set_hflags)(struct x86_emulate_ctxt *ctxt, unsigned hflags); int (*pre_leave_smm)(struct x86_emulate_ctxt *ctxt, const char *smstate); + void (*post_leave_smm)(struct x86_emulate_ctxt *ctxt); }; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index ae0d289b50fe..a6b282853253 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2629,6 +2629,9 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt) ctxt->ops->set_hflags(ctxt, ctxt->ops->get_hflags(ctxt) & ~(X86EMUL_SMM_INSIDE_NMI_MASK | X86EMUL_SMM_MASK)); + + ctxt->ops->post_leave_smm(ctxt); + return X86EMUL_CONTINUE; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6ee1f9e5d3fb..472bbbbe153a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3530,7 +3530,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, memset(&events->reserved, 0, sizeof(events->reserved)); } -static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags); +static void kvm_smm_changed(struct kvm_vcpu *vcpu); static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events) @@ -3590,12 +3590,13 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, vcpu->arch.apic->sipi_vector = events->sipi_vector; if (events->flags & KVM_VCPUEVENT_VALID_SMM) { - u32 hflags = vcpu->arch.hflags; - if (events->smi.smm) - hflags |= HF_SMM_MASK; - else - hflags &= ~HF_SMM_MASK; - kvm_set_hflags(vcpu, hflags); + if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) { + if (events->smi.smm) + vcpu->arch.hflags |= HF_SMM_MASK; + else + vcpu->arch.hflags &= ~HF_SMM_MASK; + kvm_smm_changed(vcpu); + } vcpu->arch.smi_pending = events->smi.pending; @@ -5960,7 +5961,7 @@ static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt) static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags) { - kvm_set_hflags(emul_to_vcpu(ctxt), emul_flags); + emul_to_vcpu(ctxt)->arch.hflags = emul_flags; } static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt, @@ -5969,6 +5970,11 @@ static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt, return kvm_x86_ops->pre_leave_smm(emul_to_vcpu(ctxt), smstate); } +static void emulator_post_leave_smm(struct x86_emulate_ctxt *ctxt) +{ + kvm_smm_changed(emul_to_vcpu(ctxt)); +} + static const struct x86_emulate_ops emulate_ops = { .read_gpr = emulator_read_gpr, .write_gpr = emulator_write_gpr, @@ -6009,6 +6015,7 @@ static const struct x86_emulate_ops emulate_ops = { .get_hflags = emulator_get_hflags, .set_hflags = emulator_set_hflags, .pre_leave_smm = emulator_pre_leave_smm, + .post_leave_smm = emulator_post_leave_smm, }; static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) @@ -6250,16 +6257,6 @@ static void kvm_smm_changed(struct kvm_vcpu *vcpu) kvm_mmu_reset_context(vcpu); } -static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags) -{ - unsigned changed = vcpu->arch.hflags ^ emul_flags; - - vcpu->arch.hflags = emul_flags; - - if (changed & HF_SMM_MASK) - kvm_smm_changed(vcpu); -} - static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7, unsigned long *db) { -- cgit v1.2.3 From 9ec19493fb86d6d5fbf9286b94ff21e56ef66376 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 2 Apr 2019 08:03:11 -0700 Subject: KVM: x86: clear SMM flags before loading state while leaving SMM RSM emulation is currently broken on VMX when the interrupted guest has CR4.VMXE=1. Stop dancing around the issue of HF_SMM_MASK being set when loading SMSTATE into architectural state, e.g. by toggling it for problematic flows, and simply clear HF_SMM_MASK prior to loading architectural state (from SMRAM save state area). Reported-by: Jon Doron Cc: Jim Mattson Cc: Liran Alon Cc: Vitaly Kuznetsov Fixes: 5bea5123cbf0 ("KVM: VMX: check nested state and CR4.VMXE against SMM") Signed-off-by: Sean Christopherson Tested-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 12 ++++++------ arch/x86/kvm/svm.c | 12 ++++-------- arch/x86/kvm/vmx/vmx.c | 2 -- 3 files changed, 10 insertions(+), 16 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a6b282853253..f526acee2eed 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2571,6 +2571,12 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt) if (ret != X86EMUL_CONTINUE) return X86EMUL_UNHANDLEABLE; + if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0) + ctxt->ops->set_nmi_mask(ctxt, false); + + ctxt->ops->set_hflags(ctxt, ctxt->ops->get_hflags(ctxt) & + ~(X86EMUL_SMM_INSIDE_NMI_MASK | X86EMUL_SMM_MASK)); + /* * Get back to real mode, to prepare a safe state in which to load * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU @@ -2624,12 +2630,6 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt) return X86EMUL_UNHANDLEABLE; } - if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0) - ctxt->ops->set_nmi_mask(ctxt, false); - - ctxt->ops->set_hflags(ctxt, ctxt->ops->get_hflags(ctxt) & - ~(X86EMUL_SMM_INSIDE_NMI_MASK | X86EMUL_SMM_MASK)); - ctxt->ops->post_leave_smm(ctxt); return X86EMUL_CONTINUE; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 6b1cd73e4053..406b558abfef 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -6239,21 +6239,17 @@ static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) struct page *page; u64 guest; u64 vmcb; - int ret; guest = GET_SMSTATE(u64, smstate, 0x7ed8); vmcb = GET_SMSTATE(u64, smstate, 0x7ee0); if (guest) { - vcpu->arch.hflags &= ~HF_SMM_MASK; nested_vmcb = nested_svm_map(svm, vmcb, &page); - if (nested_vmcb) - enter_svm_guest_mode(svm, vmcb, nested_vmcb, page); - else - ret = 1; - vcpu->arch.hflags |= HF_SMM_MASK; + if (!nested_vmcb) + return 1; + enter_svm_guest_mode(svm, vmcb, nested_vmcb, page); } - return ret; + return 0; } static int enable_smi_window(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 14ea25eadde8..b4e7d645275a 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7409,9 +7409,7 @@ static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) } if (vmx->nested.smm.guest_mode) { - vcpu->arch.hflags &= ~HF_SMM_MASK; ret = nested_vmx_enter_non_root_mode(vcpu, false); - vcpu->arch.hflags |= HF_SMM_MASK; if (ret) return ret; -- cgit v1.2.3 From 8f4dc2e77cdfaf7e644ef29693fa229db29ee1de Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 2 Apr 2019 08:10:47 -0700 Subject: KVM: x86: Don't clear EFER during SMM transitions for 32-bit vCPU Neither AMD nor Intel CPUs have an EFER field in the legacy SMRAM save state area, i.e. don't save/restore EFER across SMM transitions. KVM somewhat models this, e.g. doesn't clear EFER on entry to SMM if the guest doesn't support long mode. But during RSM, KVM unconditionally clears EFER so that it can get back to pure 32-bit mode in order to start loading CRs with their actual non-SMM values. Clear EFER only when it will be written when loading the non-SMM state so as to preserve bits that can theoretically be set on 32-bit vCPUs, e.g. KVM always emulates EFER_SCE. And because CR4.PAE is cleared only to play nice with EFER, wrap that code in the long mode check as well. Note, this may result in a compiler warning about cr4 being consumed uninitialized. Re-read CR4 even though it's technically unnecessary, as doing so allows for more readable code and RSM emulation is not a performance critical path. Fixes: 660a5d517aaab ("KVM: x86: save/load state on SMM switch") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index f526acee2eed..f3284827c432 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2582,15 +2582,13 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt) * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU * supports long mode. */ - cr4 = ctxt->ops->get_cr(ctxt, 4); if (emulator_has_longmode(ctxt)) { struct desc_struct cs_desc; /* Zero CR4.PCIDE before CR0.PG. */ - if (cr4 & X86_CR4_PCIDE) { + cr4 = ctxt->ops->get_cr(ctxt, 4); + if (cr4 & X86_CR4_PCIDE) ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE); - cr4 &= ~X86_CR4_PCIDE; - } /* A 32-bit code segment is required to clear EFER.LMA. */ memset(&cs_desc, 0, sizeof(cs_desc)); @@ -2604,13 +2602,16 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt) if (cr0 & X86_CR0_PE) ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE)); - /* Now clear CR4.PAE (which must be done before clearing EFER.LME). */ - if (cr4 & X86_CR4_PAE) - ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE); + if (emulator_has_longmode(ctxt)) { + /* Clear CR4.PAE before clearing EFER.LME. */ + cr4 = ctxt->ops->get_cr(ctxt, 4); + if (cr4 & X86_CR4_PAE) + ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE); - /* And finally go back to 32-bit mode. */ - efer = 0; - ctxt->ops->set_msr(ctxt, MSR_EFER, efer); + /* And finally go back to 32-bit mode. */ + efer = 0; + ctxt->ops->set_msr(ctxt, MSR_EFER, efer); + } /* * Give pre_leave_smm() a chance to make ISA-specific changes to the -- cgit v1.2.3 From b68f3cc7d978943fcf85148165b00594c38db776 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 2 Apr 2019 08:10:48 -0700 Subject: KVM: x86: Always use 32-bit SMRAM save state for 32-bit kernels Invoking the 64-bit variation on a 32-bit kenrel will crash the guest, trigger a WARN, and/or lead to a buffer overrun in the host, e.g. rsm_load_state_64() writes r8-r15 unconditionally, but enum kvm_reg and thus x86_emulate_ctxt._regs only define r8-r15 for CONFIG_X86_64. KVM allows userspace to report long mode support via CPUID, even though the guest is all but guaranteed to crash if it actually tries to enable long mode. But, a pure 32-bit guest that is ignorant of long mode will happily plod along. SMM complicates things as 64-bit CPUs use a different SMRAM save state area. KVM handles this correctly for 64-bit kernels, e.g. uses the legacy save state map if userspace has hid long mode from the guest, but doesn't fare well when userspace reports long mode support on a 32-bit host kernel (32-bit KVM doesn't support 64-bit guests). Since the alternative is to crash the guest, e.g. by not loading state or explicitly requesting shutdown, unconditionally use the legacy SMRAM save state map for 32-bit KVM. If a guest has managed to get far enough to handle SMIs when running under a weird/buggy userspace hypervisor, then don't deliberately crash the guest since there are no downsides (from KVM's perspective) to allow it to continue running. Fixes: 660a5d517aaab ("KVM: x86: save/load state on SMM switch") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 10 ++++++++++ arch/x86/kvm/x86.c | 10 ++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index f3284827c432..d0d5dd44b4f4 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2331,12 +2331,16 @@ static int em_lseg(struct x86_emulate_ctxt *ctxt) static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt) { +#ifdef CONFIG_X86_64 u32 eax, ebx, ecx, edx; eax = 0x80000001; ecx = 0; ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false); return edx & bit(X86_FEATURE_LM); +#else + return false; +#endif } static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags) @@ -2372,6 +2376,7 @@ static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate, return X86EMUL_CONTINUE; } +#ifdef CONFIG_X86_64 static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate, int n) { @@ -2391,6 +2396,7 @@ static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate, ctxt->ops->set_segment(ctxt, selector, &desc, base3, n); return X86EMUL_CONTINUE; } +#endif static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt, u64 cr0, u64 cr3, u64 cr4) @@ -2492,6 +2498,7 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4); } +#ifdef CONFIG_X86_64 static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, const char *smstate) { @@ -2554,6 +2561,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, return X86EMUL_CONTINUE; } +#endif static int em_rsm(struct x86_emulate_ctxt *ctxt) { @@ -2621,9 +2629,11 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt) if (ctxt->ops->pre_leave_smm(ctxt, buf)) return X86EMUL_UNHANDLEABLE; +#ifdef CONFIG_X86_64 if (emulator_has_longmode(ctxt)) ret = rsm_load_state_64(ctxt, buf); else +#endif ret = rsm_load_state_32(ctxt, buf); if (ret != X86EMUL_CONTINUE) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 472bbbbe153a..f10fef561573 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7441,9 +7441,9 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf) put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase); } +#ifdef CONFIG_X86_64 static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf) { -#ifdef CONFIG_X86_64 struct desc_ptr dt; struct kvm_segment seg; unsigned long val; @@ -7493,10 +7493,8 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf) for (i = 0; i < 6; i++) enter_smm_save_seg_64(vcpu, buf, i); -#else - WARN_ON_ONCE(1); -#endif } +#endif static void enter_smm(struct kvm_vcpu *vcpu) { @@ -7507,9 +7505,11 @@ static void enter_smm(struct kvm_vcpu *vcpu) trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true); memset(buf, 0, 512); +#ifdef CONFIG_X86_64 if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) enter_smm_save_state_64(vcpu, buf); else +#endif enter_smm_save_state_32(vcpu, buf); /* @@ -7567,8 +7567,10 @@ static void enter_smm(struct kvm_vcpu *vcpu) kvm_set_segment(vcpu, &ds, VCPU_SREG_GS); kvm_set_segment(vcpu, &ds, VCPU_SREG_SS); +#ifdef CONFIG_X86_64 if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) kvm_x86_ops->set_efer(vcpu, 0); +#endif kvm_update_cpuid(vcpu); kvm_mmu_reset_context(vcpu); -- cgit v1.2.3 From c68c21ca929771a1f128d886359f9229d31cf80c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 11 Apr 2019 15:57:14 +0200 Subject: selftests: kvm/evmcs_test: complete I/O before migrating guest state Starting state migration after an IO exit without first completing IO may result in test failures. We already have two tests that need this (this patch in fact fixes evmcs_test, similar to what was fixed for state_test in commit 0f73bbc851ed, "KVM: selftests: complete IO before migrating guest state", 2019-03-13) and a third is coming. So, move the code to vcpu_save_state, and while at it do not access register state until after I/O is complete. Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/kvm_util.c | 5 +++++ tools/testing/selftests/kvm/lib/x86_64/processor.c | 8 ++++++++ tools/testing/selftests/kvm/x86_64/evmcs_test.c | 5 +++-- tools/testing/selftests/kvm/x86_64/state_test.c | 15 +-------------- 4 files changed, 17 insertions(+), 16 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index efa0aad8b3c6..4ca96b228e46 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -91,6 +91,11 @@ static void vm_open(struct kvm_vm *vm, int perm, unsigned long type) if (vm->kvm_fd < 0) exit(KSFT_SKIP); + if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) { + fprintf(stderr, "immediate_exit not available, skipping test\n"); + exit(KSFT_SKIP); + } + vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, type); TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, " "rc: %i errno: %i", vm->fd, errno); diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index f28127f4a3af..b363c9611bd6 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1030,6 +1030,14 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) nested_size, sizeof(state->nested_)); } + /* + * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees + * guest state is consistent only after userspace re-enters the + * kernel with KVM_RUN. Complete IO prior to migrating state + * to a new VM. + */ + vcpu_run_complete_io(vm, vcpuid); + nmsrs = kvm_get_num_msrs(vm); list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); list->nmsrs = nmsrs; diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index c49c2a28b0eb..36669684eca5 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -123,8 +123,6 @@ int main(int argc, char *argv[]) stage, run->exit_reason, exit_reason_str(run->exit_reason)); - memset(®s1, 0, sizeof(regs1)); - vcpu_regs_get(vm, VCPU_ID, ®s1); switch (get_ucall(vm, VCPU_ID, &uc)) { case UCALL_ABORT: TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0], @@ -144,6 +142,9 @@ int main(int argc, char *argv[]) stage, (ulong)uc.args[1]); state = vcpu_save_state(vm, VCPU_ID); + memset(®s1, 0, sizeof(regs1)); + vcpu_regs_get(vm, VCPU_ID, ®s1); + kvm_vm_release(vm); /* Restore state in a new VM. */ diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index 30f75856cf39..e0a3c0204b7c 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -134,11 +134,6 @@ int main(int argc, char *argv[]) struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); - if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) { - fprintf(stderr, "immediate_exit not available, skipping test\n"); - exit(KSFT_SKIP); - } - /* Create VM */ vm = vm_create_default(VCPU_ID, 0, guest_code); vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); @@ -179,18 +174,10 @@ int main(int argc, char *argv[]) uc.args[1] == stage, "Unexpected register values vmexit #%lx, got %lx", stage, (ulong)uc.args[1]); - /* - * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees - * guest state is consistent only after userspace re-enters the - * kernel with KVM_RUN. Complete IO prior to migrating state - * to a new VM. - */ - vcpu_run_complete_io(vm, VCPU_ID); - + state = vcpu_save_state(vm, VCPU_ID); memset(®s1, 0, sizeof(regs1)); vcpu_regs_get(vm, VCPU_ID, ®s1); - state = vcpu_save_state(vm, VCPU_ID); kvm_vm_release(vm); /* Restore state in a new VM. */ -- cgit v1.2.3 From c2390f16fc5b847a22f442a190d459beba07e86f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 11 Apr 2019 15:51:19 +0200 Subject: selftests: kvm: fix for compilers that do not support -no-pie -no-pie was added to GCC at the same time as their configuration option --enable-default-pie. Compilers that were built before do not have -no-pie, but they also do not need it. Detect the option at build time. Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 7514fcea91a7..2d2e10b219d5 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -1,3 +1,5 @@ +include ../../../../scripts/Kbuild.include + all: top_srcdir = ../../../.. @@ -30,7 +32,11 @@ INSTALL_HDR_PATH = $(top_srcdir)/usr LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include CFLAGS += -O2 -g -std=gnu99 -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$( Date: Wed, 10 Apr 2019 11:38:33 +0200 Subject: selftests: kvm: add a selftest for SMM Add a simple test for SMM, based on VMX. The test implements its own sync between the guest and the host as using our ucall library seems to be too cumbersome: SMI handler is happening in real-address mode. This patch also fixes KVM_SET_NESTED_STATE to happen after KVM_SET_VCPU_EVENTS, in fact it places it last. This is because KVM needs to know whether the processor is in SMM or not. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 1 + .../selftests/kvm/include/x86_64/processor.h | 27 ++++ tools/testing/selftests/kvm/lib/x86_64/processor.c | 12 +- tools/testing/selftests/kvm/x86_64/smm_test.c | 157 +++++++++++++++++++++ 4 files changed, 191 insertions(+), 6 deletions(-) create mode 100644 tools/testing/selftests/kvm/x86_64/smm_test.c diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 2d2e10b219d5..f8588cca2bef 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -19,6 +19,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/state_test TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test +TEST_GEN_PROGS_x86_64 += x86_64/smm_test TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += clear_dirty_log_test diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index e2884c2b81ff..6063d5b2f356 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -778,6 +778,33 @@ void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, #define MSR_IA32_APICBASE_ENABLE (1<<11) #define MSR_IA32_APICBASE_BASE (0xfffff<<12) +#define APIC_BASE_MSR 0x800 +#define X2APIC_ENABLE (1UL << 10) +#define APIC_ICR 0x300 +#define APIC_DEST_SELF 0x40000 +#define APIC_DEST_ALLINC 0x80000 +#define APIC_DEST_ALLBUT 0xC0000 +#define APIC_ICR_RR_MASK 0x30000 +#define APIC_ICR_RR_INVALID 0x00000 +#define APIC_ICR_RR_INPROG 0x10000 +#define APIC_ICR_RR_VALID 0x20000 +#define APIC_INT_LEVELTRIG 0x08000 +#define APIC_INT_ASSERT 0x04000 +#define APIC_ICR_BUSY 0x01000 +#define APIC_DEST_LOGICAL 0x00800 +#define APIC_DEST_PHYSICAL 0x00000 +#define APIC_DM_FIXED 0x00000 +#define APIC_DM_FIXED_MASK 0x00700 +#define APIC_DM_LOWEST 0x00100 +#define APIC_DM_SMI 0x00200 +#define APIC_DM_REMRD 0x00300 +#define APIC_DM_NMI 0x00400 +#define APIC_DM_INIT 0x00500 +#define APIC_DM_STARTUP 0x00600 +#define APIC_DM_EXTINT 0x00700 +#define APIC_VECTOR_MASK 0x000FF +#define APIC_ICR2 0x310 + #define MSR_IA32_TSCDEADLINE 0x000006e0 #define MSR_IA32_UCODE_WRITE 0x00000079 diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index b363c9611bd6..dc7fae9fa424 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1101,12 +1101,6 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s struct vcpu *vcpu = vcpu_find(vm, vcpuid); int r; - if (state->nested.size) { - r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i", - r); - } - r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave); TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i", r); @@ -1138,4 +1132,10 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs); TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i", r); + + if (state->nested.size) { + r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i", + r); + } } diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c new file mode 100644 index 000000000000..fb8086964d83 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/smm_test.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2018, Red Hat, Inc. + * + * Tests for SMM. + */ +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include +#include +#include +#include +#include +#include + +#include "test_util.h" + +#include "kvm_util.h" + +#include "vmx.h" + +#define VCPU_ID 1 + +#define PAGE_SIZE 4096 + +#define SMRAM_SIZE 65536 +#define SMRAM_MEMSLOT ((1 << 16) | 1) +#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE) +#define SMRAM_GPA 0x1000000 +#define SMRAM_STAGE 0xfe + +#define STR(x) #x +#define XSTR(s) STR(s) + +#define SYNC_PORT 0xe +#define DONE 0xff + +/* + * This is compiled as normal 64-bit code, however, SMI handler is executed + * in real-address mode. To stay simple we're limiting ourselves to a mode + * independent subset of asm here. + * SMI handler always report back fixed stage SMRAM_STAGE. + */ +uint8_t smi_handler[] = { + 0xb0, SMRAM_STAGE, /* mov $SMRAM_STAGE, %al */ + 0xe4, SYNC_PORT, /* in $SYNC_PORT, %al */ + 0x0f, 0xaa, /* rsm */ +}; + +void sync_with_host(uint64_t phase) +{ + asm volatile("in $" XSTR(SYNC_PORT)", %%al \n" + : : "a" (phase)); +} + +void self_smi(void) +{ + wrmsr(APIC_BASE_MSR + (APIC_ICR >> 4), + APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI); +} + +void guest_code(struct vmx_pages *vmx_pages) +{ + uint64_t apicbase = rdmsr(MSR_IA32_APICBASE); + + sync_with_host(1); + + wrmsr(MSR_IA32_APICBASE, apicbase | X2APIC_ENABLE); + + sync_with_host(2); + + self_smi(); + + sync_with_host(4); + + if (vmx_pages) { + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + + sync_with_host(5); + + self_smi(); + + sync_with_host(7); + } + + sync_with_host(DONE); +} + +int main(int argc, char *argv[]) +{ + struct vmx_pages *vmx_pages = NULL; + vm_vaddr_t vmx_pages_gva = 0; + + struct kvm_regs regs; + struct kvm_vm *vm; + struct kvm_run *run; + struct kvm_x86_state *state; + int stage, stage_reported; + + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, guest_code); + + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + + run = vcpu_state(vm, VCPU_ID); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA, + SMRAM_MEMSLOT, SMRAM_PAGES, 0); + TEST_ASSERT(vm_phy_pages_alloc(vm, SMRAM_PAGES, SMRAM_GPA, SMRAM_MEMSLOT) + == SMRAM_GPA, "could not allocate guest physical addresses?"); + + memset(addr_gpa2hva(vm, SMRAM_GPA), 0x0, SMRAM_SIZE); + memcpy(addr_gpa2hva(vm, SMRAM_GPA) + 0x8000, smi_handler, + sizeof(smi_handler)); + + vcpu_set_msr(vm, VCPU_ID, MSR_IA32_SMBASE, SMRAM_GPA); + + if (kvm_check_cap(KVM_CAP_NESTED_STATE)) { + vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); + } else { + printf("will skip SMM test with VMX enabled\n"); + vcpu_args_set(vm, VCPU_ID, 1, 0); + } + + for (stage = 1;; stage++) { + _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Stage %d: unexpected exit reason: %u (%s),\n", + stage, run->exit_reason, + exit_reason_str(run->exit_reason)); + + memset(®s, 0, sizeof(regs)); + vcpu_regs_get(vm, VCPU_ID, ®s); + + stage_reported = regs.rax & 0xff; + + if (stage_reported == DONE) + goto done; + + TEST_ASSERT(stage_reported == stage || + stage_reported == SMRAM_STAGE, + "Unexpected stage: #%x, got %x", + stage, stage_reported); + + state = vcpu_save_state(vm, VCPU_ID); + kvm_vm_release(vm); + kvm_vm_restart(vm, O_RDWR); + vm_vcpu_add(vm, VCPU_ID, 0, 0); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + vcpu_load_state(vm, VCPU_ID, state); + run = vcpu_state(vm, VCPU_ID); + free(state); + } + +done: + kvm_vm_free(vm); +} -- cgit v1.2.3 From be43c440eb5d0ccfdb0d67d5a4c9d579ff988b75 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Sat, 6 Apr 2019 15:06:58 +0530 Subject: KVM: x86: fix warning Using plain integer as NULL pointer Changed passing argument as "0 to NULL" which resolves below sparse warning arch/x86/kvm/x86.c:3096:61: warning: Using plain integer as NULL pointer Signed-off-by: Hariprasad Kelam Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f10fef561573..a0d1fc80ac5a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3095,7 +3095,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; case KVM_CAP_NESTED_STATE: r = kvm_x86_ops->get_nested_state ? - kvm_x86_ops->get_nested_state(NULL, 0, 0) : 0; + kvm_x86_ops->get_nested_state(NULL, NULL, 0) : 0; break; default: break; -- cgit v1.2.3 From 1d487e9bf8ba66a7174c56a0029c54b1eca8f99c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 11 Apr 2019 11:16:47 +0200 Subject: KVM: fix spectrev1 gadgets These were found with smatch, and then generalized when applicable. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 4 +++- include/linux/kvm_host.h | 10 ++++++---- virt/kvm/irqchip.c | 5 +++-- virt/kvm/kvm_main.c | 6 ++++-- 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 991fdf7fc17f..9bf70cf84564 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -138,6 +138,7 @@ static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map, if (offset <= max_apic_id) { u8 cluster_size = min(max_apic_id - offset + 1, 16U); + offset = array_index_nospec(offset, map->max_apic_id + 1); *cluster = &map->phys_map[offset]; *mask = dest_id & (0xffff >> (16 - cluster_size)); } else { @@ -901,7 +902,8 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, if (irq->dest_id > map->max_apic_id) { *bitmap = 0; } else { - *dst = &map->phys_map[irq->dest_id]; + u32 dest_id = array_index_nospec(irq->dest_id, map->max_apic_id + 1); + *dst = &map->phys_map[dest_id]; *bitmap = 1; } return true; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9d55c63db09b..640a03642766 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -513,10 +514,10 @@ static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx) static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) { - /* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu, in case - * the caller has read kvm->online_vcpus before (as is the case - * for kvm_for_each_vcpu, for example). - */ + int num_vcpus = atomic_read(&kvm->online_vcpus); + i = array_index_nospec(i, num_vcpus); + + /* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu. */ smp_rmb(); return kvm->vcpus[i]; } @@ -600,6 +601,7 @@ void kvm_put_kvm(struct kvm *kvm); static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id) { + as_id = array_index_nospec(as_id, KVM_ADDRESS_SPACE_NUM); return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu, lockdep_is_held(&kvm->slots_lock) || !refcount_read(&kvm->users_count)); diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index 3547b0d8c91e..79e59e4fa3dc 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -144,18 +144,19 @@ static int setup_routing_entry(struct kvm *kvm, { struct kvm_kernel_irq_routing_entry *ei; int r; + u32 gsi = array_index_nospec(ue->gsi, KVM_MAX_IRQ_ROUTES); /* * Do not allow GSI to be mapped to the same irqchip more than once. * Allow only one to one mapping between GSI and non-irqchip routing. */ - hlist_for_each_entry(ei, &rt->map[ue->gsi], link) + hlist_for_each_entry(ei, &rt->map[gsi], link) if (ei->type != KVM_IRQ_ROUTING_IRQCHIP || ue->type != KVM_IRQ_ROUTING_IRQCHIP || ue->u.irqchip.irqchip == ei->irqchip.irqchip) return -EINVAL; - e->gsi = ue->gsi; + e->gsi = gsi; e->type = ue->type; r = kvm_set_routing_entry(kvm, e, ue); if (r) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 55fe8e20d8fd..dc8edc97ba85 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2977,12 +2977,14 @@ static int kvm_ioctl_create_device(struct kvm *kvm, struct kvm_device_ops *ops = NULL; struct kvm_device *dev; bool test = cd->flags & KVM_CREATE_DEVICE_TEST; + int type; int ret; if (cd->type >= ARRAY_SIZE(kvm_device_ops_table)) return -ENODEV; - ops = kvm_device_ops_table[cd->type]; + type = array_index_nospec(cd->type, ARRAY_SIZE(kvm_device_ops_table)); + ops = kvm_device_ops_table[type]; if (ops == NULL) return -ENODEV; @@ -2997,7 +2999,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm, dev->kvm = kvm; mutex_lock(&kvm->lock); - ret = ops->create(dev, cd->type); + ret = ops->create(dev, type); if (ret < 0) { mutex_unlock(&kvm->lock); kfree(dev); -- cgit v1.2.3 From 7a223e06b1a411cef6c4cd7a9b9a33c8d225b10e Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 27 Mar 2019 15:12:20 +0100 Subject: KVM: x86: avoid misreporting level-triggered irqs as edge-triggered in tracing In __apic_accept_irq() interface trig_mode is int and actually on some code paths it is set above u8: kvm_apic_set_irq() extracts it from 'struct kvm_lapic_irq' where trig_mode is u16. This is done on purpose as e.g. kvm_set_msi_irq() sets it to (1 << 15) & e->msi.data kvm_apic_local_deliver sets it to reg & (1 << 15). Fix the immediate issue by making 'tm' into u16. We may also want to adjust __apic_accept_irq() interface and use proper sizes for vector, level, trig_mode but this is not urgent. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/trace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 6432d08c7de7..4d47a2631d1f 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -438,13 +438,13 @@ TRACE_EVENT(kvm_apic_ipi, ); TRACE_EVENT(kvm_apic_accept_irq, - TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec), + TP_PROTO(__u32 apicid, __u16 dm, __u16 tm, __u8 vec), TP_ARGS(apicid, dm, tm, vec), TP_STRUCT__entry( __field( __u32, apicid ) __field( __u16, dm ) - __field( __u8, tm ) + __field( __u16, tm ) __field( __u8, vec ) ), -- cgit v1.2.3 From 8c2f870890fd28e023b0fcf49dcee333f2c8bad7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 16 Apr 2019 15:25:00 +0200 Subject: ALSA: info: Fix racy addition/deletion of nodes The ALSA proc helper manages the child nodes in a linked list, but its addition and deletion is done without any lock. This leads to a corruption if they are operated concurrently. Usually this isn't a problem because the proc entries are added sequentially in the driver probe procedure itself. But the card registrations are done often asynchronously, and the crash could be actually reproduced with syzkaller. This patch papers over it by protecting the link addition and deletion with the parent's mutex. There is "access" mutex that is used for the file access, and this can be reused for this purpose as well. Reported-by: syzbot+48df349490c36f9f54ab@syzkaller.appspotmail.com Cc: Signed-off-by: Takashi Iwai --- sound/core/info.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/sound/core/info.c b/sound/core/info.c index 96a074019c33..0eb169acc850 100644 --- a/sound/core/info.c +++ b/sound/core/info.c @@ -713,8 +713,11 @@ snd_info_create_entry(const char *name, struct snd_info_entry *parent, INIT_LIST_HEAD(&entry->list); entry->parent = parent; entry->module = module; - if (parent) + if (parent) { + mutex_lock(&parent->access); list_add_tail(&entry->list, &parent->children); + mutex_unlock(&parent->access); + } return entry; } @@ -792,7 +795,12 @@ void snd_info_free_entry(struct snd_info_entry * entry) list_for_each_entry_safe(p, n, &entry->children, list) snd_info_free_entry(p); - list_del(&entry->list); + p = entry->parent; + if (p) { + mutex_lock(&p->access); + list_del(&entry->list); + mutex_unlock(&p->access); + } kfree(entry->name); if (entry->private_free) entry->private_free(entry); -- cgit v1.2.3 From 14c9b31a925a9f5c647523a12e2b575b97c0aa47 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 8 Apr 2019 12:33:55 -0500 Subject: perf header: Fix lock/unlock imbalances when processing BPF/BTF info Fix lock/unlock imbalances by refactoring the code a bit and adding calls to up_write() before return. Signed-off-by: Gustavo A. R. Silva Acked-by: Song Liu Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Addresses-Coverity-ID: 1444315 ("Missing unlock") Addresses-Coverity-ID: 1444316 ("Missing unlock") Fixes: a70a1123174a ("perf bpf: Save BTF information as headers to perf.data") Fixes: 606f972b1361 ("perf bpf: Save bpf_prog_info information as headers to perf.data") Link: http://lkml.kernel.org/r/20190408173355.GA10501@embeddedor [ Simplified the exit path to have just one up_write() + return ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index b9e693825873..2d2af2ac2b1e 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2606,6 +2606,7 @@ static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused) perf_env__insert_bpf_prog_info(env, info_node); } + up_write(&env->bpf_progs.lock); return 0; out: free(info_linear); @@ -2623,7 +2624,9 @@ static int process_bpf_prog_info(struct feat_fd *ff __maybe_unused, void *data _ static int process_bpf_btf(struct feat_fd *ff, void *data __maybe_unused) { struct perf_env *env = &ff->ph->env; + struct btf_node *node = NULL; u32 count, i; + int err = -1; if (ff->ph->needs_swap) { pr_warning("interpreting btf from systems with endianity is not yet supported\n"); @@ -2636,31 +2639,32 @@ static int process_bpf_btf(struct feat_fd *ff, void *data __maybe_unused) down_write(&env->bpf_progs.lock); for (i = 0; i < count; ++i) { - struct btf_node *node; u32 id, data_size; if (do_read_u32(ff, &id)) - return -1; + goto out; if (do_read_u32(ff, &data_size)) - return -1; + goto out; node = malloc(sizeof(struct btf_node) + data_size); if (!node) - return -1; + goto out; node->id = id; node->data_size = data_size; - if (__do_read(ff, node->data, data_size)) { - free(node); - return -1; - } + if (__do_read(ff, node->data, data_size)) + goto out; perf_env__insert_btf(env, node); + node = NULL; } + err = 0; +out: up_write(&env->bpf_progs.lock); - return 0; + free(node); + return err; } struct feature_ops { -- cgit v1.2.3 From 6e4b1cac30d297718218dc268199ed20df074b98 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 9 Apr 2019 09:25:57 +0300 Subject: perf scripts python: export-to-sqlite.py: Fix use of parent_id in calls_view Fix following error using calls_view: Query failed: ambiguous column name: parent_id Unable to execute statement Signed-off-by: Adrian Hunter Cc: Jiri Olsa Fixes: 8ce9a7251d11 ("perf scripts python: export-to-sqlite.py: Export calls parent_id") Link: http://lkml.kernel.org/r/20190409062557.26138-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/export-to-sqlite.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py index 3b71902a5a21..bf271fbc3a88 100644 --- a/tools/perf/scripts/python/export-to-sqlite.py +++ b/tools/perf/scripts/python/export-to-sqlite.py @@ -331,7 +331,7 @@ if perf_db_export_calls: 'return_id,' 'CASE WHEN flags=0 THEN \'\' WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' WHEN flags=6 THEN \'jump\' ELSE flags END AS flags,' 'parent_call_path_id,' - 'parent_id' + 'calls.parent_id' ' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id') do_query(query, 'CREATE VIEW samples_view AS ' -- cgit v1.2.3 From 8002a63f9ace7e9c958408f77f0a4dd4a8414511 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 9 Apr 2019 12:01:56 +0200 Subject: perf stat: Disable DIR_FORMAT feature for 'perf stat record' Arnaldo reported assertion in perf stat record: assertion failed at util/header.c:875 There's no support for this in the 'perf state record' command, disable the feature for that case. Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Fixes: 258031c017c3 ("perf header: Add DIR_FORMAT feature to describe directory data") Link: http://lkml.kernel.org/r/20190409100156.20303-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 49ee3c2033ec..c3625ec374e0 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1308,6 +1308,7 @@ static void init_features(struct perf_session *session) for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) perf_header__set_feat(&session->header, feat); + perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); perf_header__clear_feat(&session->header, HEADER_BUILD_ID); perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); -- cgit v1.2.3 From f32c2877bcb068a718bb70094cd59ccc29d4d082 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Tue, 9 Apr 2019 11:15:29 +0200 Subject: tools lib traceevent: Fix missing equality check for strcmp There was a missing comparison with 0 when checking if type is "s64" or "u64". Therefore, the body of the if-statement was entered if "type" was "u64" or not "s64", which made the first strcmp() redundant since if type is "u64", it's not "s64". If type is "s64", the body of the if-statement is not entered but since the remainder of the function consists of if-statements which will not be entered if type is "s64", we will just return "val", which is correct, albeit at the cost of a few more calls to strcmp(), i.e., it will behave just as if the if-statement was entered. If type is neither "s64" or "u64", the body of the if-statement will be entered incorrectly and "val" returned. This means that any type that is checked after "s64" and "u64" is handled the same way as "s64" and "u64", i.e., the limiting of "val" to fit in for example "s8" is never reached. This was introduced in the kernel tree when the sources were copied from trace-cmd in commit f7d82350e597 ("tools/events: Add files to create libtraceevent.a"), and in the trace-cmd repo in 1cdbae6035cei ("Implement typecasting in parser") when the function was introduced, i.e., it has always behaved the wrong way. Detected by cppcheck. Signed-off-by: Rikard Falkeborn Reviewed-by: Steven Rostedt (VMware) Cc: Tzvetomir Stoyanov Fixes: f7d82350e597 ("tools/events: Add files to create libtraceevent.a") Link: http://lkml.kernel.org/r/20190409091529.2686-1-rikard.falkeborn@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 87494c7c619d..981c6ce2da2c 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -2233,7 +2233,7 @@ eval_type_str(unsigned long long val, const char *type, int pointer) return val & 0xffffffff; if (strcmp(type, "u64") == 0 || - strcmp(type, "s64")) + strcmp(type, "s64") == 0) return val; if (strcmp(type, "s8") == 0) -- cgit v1.2.3 From 3a5b64f05d7fe36dea0dde26423e3044fbacd482 Mon Sep 17 00:00:00 2001 From: Mao Han Date: Wed, 10 Apr 2019 16:16:43 +0800 Subject: perf evsel: Use hweight64() instead of hweight_long(attr.sample_regs_user) On 32-bits platform with more than 32 registers, the 64 bits mask is truncate to the lower 32 bits and the return value of hweight_long will always smaller than 32. When kernel outputs more than 32 registers, but the user perf program only counts 32, there will be a data mismatch result to overflow check fail. Signed-off-by: Mao Han Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Fixes: 6a21c0b5c2ab ("perf tools: Add core support for sampling intr machine state regs") Fixes: d03f2170546d ("perf tools: Expand perf_event__synthesize_sample()") Fixes: 0f6a30150ca2 ("perf tools: Support user regs and stack in sample parsing") Link: http://lkml.kernel.org/r/29ad7947dc8fd1ff0abd2093a72cc27a2446be9f.1554883878.git.han_mao@c-sky.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 66d066f18b5b..966360844fff 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2368,7 +2368,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, if (data->user_regs.abi) { u64 mask = evsel->attr.sample_regs_user; - sz = hweight_long(mask) * sizeof(u64); + sz = hweight64(mask) * sizeof(u64); OVERFLOW_CHECK(array, sz, max_size); data->user_regs.mask = mask; data->user_regs.regs = (u64 *)array; @@ -2424,7 +2424,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, if (data->intr_regs.abi != PERF_SAMPLE_REGS_ABI_NONE) { u64 mask = evsel->attr.sample_regs_intr; - sz = hweight_long(mask) * sizeof(u64); + sz = hweight64(mask) * sizeof(u64); OVERFLOW_CHECK(array, sz, max_size); data->intr_regs.mask = mask; data->intr_regs.regs = (u64 *)array; @@ -2552,7 +2552,7 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, if (type & PERF_SAMPLE_REGS_USER) { if (sample->user_regs.abi) { result += sizeof(u64); - sz = hweight_long(sample->user_regs.mask) * sizeof(u64); + sz = hweight64(sample->user_regs.mask) * sizeof(u64); result += sz; } else { result += sizeof(u64); @@ -2580,7 +2580,7 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, if (type & PERF_SAMPLE_REGS_INTR) { if (sample->intr_regs.abi) { result += sizeof(u64); - sz = hweight_long(sample->intr_regs.mask) * sizeof(u64); + sz = hweight64(sample->intr_regs.mask) * sizeof(u64); result += sz; } else { result += sizeof(u64); @@ -2710,7 +2710,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, if (type & PERF_SAMPLE_REGS_USER) { if (sample->user_regs.abi) { *array++ = sample->user_regs.abi; - sz = hweight_long(sample->user_regs.mask) * sizeof(u64); + sz = hweight64(sample->user_regs.mask) * sizeof(u64); memcpy(array, sample->user_regs.regs, sz); array = (void *)array + sz; } else { @@ -2746,7 +2746,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, if (type & PERF_SAMPLE_REGS_INTR) { if (sample->intr_regs.abi) { *array++ = sample->intr_regs.abi; - sz = hweight_long(sample->intr_regs.mask) * sizeof(u64); + sz = hweight64(sample->intr_regs.mask) * sizeof(u64); memcpy(array, sample->intr_regs.regs, sz); array = (void *)array + sz; } else { -- cgit v1.2.3 From 6a3eb3360667170988f8a6477f6686242061488a Mon Sep 17 00:00:00 2001 From: ZhangXiaoxu Date: Sat, 6 Apr 2019 15:47:38 +0800 Subject: cifs: Fix use-after-free in SMB2_write There is a KASAN use-after-free: BUG: KASAN: use-after-free in SMB2_write+0x1342/0x1580 Read of size 8 at addr ffff8880b6a8e450 by task ln/4196 Should not release the 'req' because it will use in the trace. Fixes: eccb4422cf97 ("smb3: Add ftrace tracepoints for improved SMB3 debugging") Signed-off-by: ZhangXiaoxu Signed-off-by: Steve French CC: Stable 4.18+ Reviewed-by: Pavel Shilovsky --- fs/cifs/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 21ad01d55ab2..5d1f8d2d44e4 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -3769,7 +3769,6 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, rc = cifs_send_recv(xid, io_parms->tcon->ses, &rqst, &resp_buftype, flags, &rsp_iov); - cifs_small_buf_release(req); rsp = (struct smb2_write_rsp *)rsp_iov.iov_base; if (rc) { @@ -3787,6 +3786,7 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, io_parms->offset, *nbytes); } + cifs_small_buf_release(req); free_rsp_buf(resp_buftype, rsp); return rc; } -- cgit v1.2.3 From 088aaf17aa79300cab14dbee2569c58cfafd7d6e Mon Sep 17 00:00:00 2001 From: ZhangXiaoxu Date: Sat, 6 Apr 2019 15:47:39 +0800 Subject: cifs: Fix use-after-free in SMB2_read There is a KASAN use-after-free: BUG: KASAN: use-after-free in SMB2_read+0x1136/0x1190 Read of size 8 at addr ffff8880b4e45e50 by task ln/1009 Should not release the 'req' because it will use in the trace. Fixes: eccb4422cf97 ("smb3: Add ftrace tracepoints for improved SMB3 debugging") Signed-off-by: ZhangXiaoxu Signed-off-by: Steve French CC: Stable 4.18+ Reviewed-by: Pavel Shilovsky --- fs/cifs/smb2pdu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 5d1f8d2d44e4..5d6adc63ad62 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -3448,8 +3448,6 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms, rqst.rq_nvec = 1; rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov); - cifs_small_buf_release(req); - rsp = (struct smb2_read_rsp *)rsp_iov.iov_base; if (rc) { @@ -3471,6 +3469,8 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms, io_parms->tcon->tid, ses->Suid, io_parms->offset, io_parms->length); + cifs_small_buf_release(req); + *nbytes = le32_to_cpu(rsp->DataLength); if ((*nbytes > CIFS_MAX_MSGSIZE) || (*nbytes > io_parms->length)) { -- cgit v1.2.3 From b57a55e2200ede754e4dc9cce4ba9402544b9365 Mon Sep 17 00:00:00 2001 From: ZhangXiaoxu Date: Sat, 6 Apr 2019 15:30:38 +0800 Subject: cifs: Fix lease buffer length error There is a KASAN slab-out-of-bounds: BUG: KASAN: slab-out-of-bounds in _copy_from_iter_full+0x783/0xaa0 Read of size 80 at addr ffff88810c35e180 by task mount.cifs/539 CPU: 1 PID: 539 Comm: mount.cifs Not tainted 4.19 #10 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c8995f-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0xdd/0x12a print_address_description+0xa7/0x540 kasan_report+0x1ff/0x550 check_memory_region+0x2f1/0x310 memcpy+0x2f/0x80 _copy_from_iter_full+0x783/0xaa0 tcp_sendmsg_locked+0x1840/0x4140 tcp_sendmsg+0x37/0x60 inet_sendmsg+0x18c/0x490 sock_sendmsg+0xae/0x130 smb_send_kvec+0x29c/0x520 __smb_send_rqst+0x3ef/0xc60 smb_send_rqst+0x25a/0x2e0 compound_send_recv+0x9e8/0x2af0 cifs_send_recv+0x24/0x30 SMB2_open+0x35e/0x1620 open_shroot+0x27b/0x490 smb2_open_op_close+0x4e1/0x590 smb2_query_path_info+0x2ac/0x650 cifs_get_inode_info+0x1058/0x28f0 cifs_root_iget+0x3bb/0xf80 cifs_smb3_do_mount+0xe00/0x14c0 cifs_do_mount+0x15/0x20 mount_fs+0x5e/0x290 vfs_kern_mount+0x88/0x460 do_mount+0x398/0x31e0 ksys_mount+0xc6/0x150 __x64_sys_mount+0xea/0x190 do_syscall_64+0x122/0x590 entry_SYSCALL_64_after_hwframe+0x44/0xa9 It can be reproduced by the following step: 1. samba configured with: server max protocol = SMB2_10 2. mount -o vers=default When parse the mount version parameter, the 'ops' and 'vals' was setted to smb30, if negotiate result is smb21, just update the 'ops' to smb21, but the 'vals' is still smb30. When add lease context, the iov_base is allocated with smb21 ops, but the iov_len is initiallited with the smb30. Because the iov_len is longer than iov_base, when send the message, copy array out of bounds. we need to keep the 'ops' and 'vals' consistent. Fixes: 9764c02fcbad ("SMB3: Add support for multidialect negotiate (SMB2.1 and later)") Fixes: d5c7076b772a ("smb3: add smb3.1.1 to default dialect list") Signed-off-by: ZhangXiaoxu Signed-off-by: Steve French CC: Stable Reviewed-by: Pavel Shilovsky --- fs/cifs/smb2pdu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 5d6adc63ad62..b8f7262ac354 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -832,8 +832,11 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) } else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) { /* ops set to 3.0 by default for default so update */ ses->server->ops = &smb21_operations; - } else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID)) + ses->server->vals = &smb21_values; + } else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID)) { ses->server->ops = &smb311_operations; + ses->server->vals = &smb311_values; + } } else if (le16_to_cpu(rsp->DialectRevision) != ses->server->vals->protocol_id) { /* if requested single dialect ensure returned dialect matched */ -- cgit v1.2.3 From e6d0fb7b34f264f72c33053558a360a6a734905e Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 10 Apr 2019 07:47:22 +1000 Subject: cifs: fix handle leak in smb2_query_symlink() If we enter smb2_query_symlink() for something that is not a symlink and where the SMB2_open() would succeed we would never end up closing this handle and would thus leak a handle on the server. Fix this by immediately calling SMB2_close() on successfull open. Signed-off-by: Ronnie Sahlberg CC: Stable Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky --- fs/cifs/smb2ops.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 00225e699d03..c36ff0d1fe2a 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -2389,6 +2389,8 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, &err_iov, &resp_buftype); + if (!rc) + SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); if (!rc || !err_iov.iov_base) { rc = -ENOENT; goto free_path; -- cgit v1.2.3 From b98749cac4a695f084a5ff076f4510b23e353ecd Mon Sep 17 00:00:00 2001 From: Aurelien Aptel Date: Fri, 29 Mar 2019 10:49:12 +0100 Subject: CIFS: keep FileInfo handle live during oplock break In the oplock break handler, writing pending changes from pages puts the FileInfo handle. If the refcount reaches zero it closes the handle and waits for any oplock break handler to return, thus causing a deadlock. To prevent this situation: * We add a wait flag to cifsFileInfo_put() to decide whether we should wait for running/pending oplock break handlers * We keep an additionnal reference of the SMB FileInfo handle so that for the rest of the handler putting the handle won't close it. - The ref is bumped everytime we queue the handler via the cifs_queue_oplock_break() helper. - The ref is decremented at the end of the handler This bug was triggered by xfstest 464. Also important fix to address the various reports of oops in smb2_push_mandatory_locks Signed-off-by: Aurelien Aptel Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky CC: Stable --- fs/cifs/cifsglob.h | 2 ++ fs/cifs/file.c | 30 +++++++++++++++++++++++++----- fs/cifs/misc.c | 25 +++++++++++++++++++++++-- fs/cifs/smb2misc.c | 6 +++--- 4 files changed, 53 insertions(+), 10 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 5b18d4585740..585ad3207cb1 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1333,6 +1333,7 @@ cifsFileInfo_get_locked(struct cifsFileInfo *cifs_file) } struct cifsFileInfo *cifsFileInfo_get(struct cifsFileInfo *cifs_file); +void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_hdlr); void cifsFileInfo_put(struct cifsFileInfo *cifs_file); #define CIFS_CACHE_READ_FLG 1 @@ -1855,6 +1856,7 @@ GLOBAL_EXTERN spinlock_t gidsidlock; #endif /* CONFIG_CIFS_ACL */ void cifs_oplock_break(struct work_struct *work); +void cifs_queue_oplock_break(struct cifsFileInfo *cfile); extern const struct slow_work_ops cifs_oplock_break_ops; extern struct workqueue_struct *cifsiod_wq; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 89006e044973..9c0ccc06d172 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -360,12 +360,30 @@ cifsFileInfo_get(struct cifsFileInfo *cifs_file) return cifs_file; } -/* - * Release a reference on the file private data. This may involve closing - * the filehandle out on the server. Must be called without holding - * tcon->open_file_lock and cifs_file->file_info_lock. +/** + * cifsFileInfo_put - release a reference of file priv data + * + * Always potentially wait for oplock handler. See _cifsFileInfo_put(). */ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) +{ + _cifsFileInfo_put(cifs_file, true); +} + +/** + * _cifsFileInfo_put - release a reference of file priv data + * + * This may involve closing the filehandle @cifs_file out on the + * server. Must be called without holding tcon->open_file_lock and + * cifs_file->file_info_lock. + * + * If @wait_for_oplock_handler is true and we are releasing the last + * reference, wait for any running oplock break handler of the file + * and cancel any pending one. If calling this function from the + * oplock break handler, you need to pass false. + * + */ +void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler) { struct inode *inode = d_inode(cifs_file->dentry); struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); @@ -414,7 +432,8 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) spin_unlock(&tcon->open_file_lock); - oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break); + oplock_break_cancelled = wait_oplock_handler ? + cancel_work_sync(&cifs_file->oplock_break) : false; if (!tcon->need_reconnect && !cifs_file->invalidHandle) { struct TCP_Server_Info *server = tcon->ses->server; @@ -4603,6 +4622,7 @@ void cifs_oplock_break(struct work_struct *work) cinode); cifs_dbg(FYI, "Oplock release rc = %d\n", rc); } + _cifsFileInfo_put(cfile, false /* do not wait for ourself */); cifs_done_oplock_break(cinode); } diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index bee203055b30..1e1626a2cfc3 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -501,8 +501,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &pCifsInode->flags); - queue_work(cifsoplockd_wq, - &netfile->oplock_break); + cifs_queue_oplock_break(netfile); netfile->oplock_break_cancelled = false; spin_unlock(&tcon->open_file_lock); @@ -607,6 +606,28 @@ void cifs_put_writer(struct cifsInodeInfo *cinode) spin_unlock(&cinode->writers_lock); } +/** + * cifs_queue_oplock_break - queue the oplock break handler for cfile + * + * This function is called from the demultiplex thread when it + * receives an oplock break for @cfile. + * + * Assumes the tcon->open_file_lock is held. + * Assumes cfile->file_info_lock is NOT held. + */ +void cifs_queue_oplock_break(struct cifsFileInfo *cfile) +{ + /* + * Bump the handle refcount now while we hold the + * open_file_lock to enforce the validity of it for the oplock + * break handler. The matching put is done at the end of the + * handler. + */ + cifsFileInfo_get(cfile); + + queue_work(cifsoplockd_wq, &cfile->oplock_break); +} + void cifs_done_oplock_break(struct cifsInodeInfo *cinode) { clear_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cinode->flags); diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 0e3570e40ff8..e311f58dc1c8 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -555,7 +555,7 @@ smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp, clear_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags); - queue_work(cifsoplockd_wq, &cfile->oplock_break); + cifs_queue_oplock_break(cfile); kfree(lw); return true; } @@ -712,8 +712,8 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags); spin_unlock(&cfile->file_info_lock); - queue_work(cifsoplockd_wq, - &cfile->oplock_break); + + cifs_queue_oplock_break(cfile); spin_unlock(&tcon->open_file_lock); spin_unlock(&cifs_tcp_ses_lock); -- cgit v1.2.3 From 2e8e19226398db8265a8e675fcc0118b9e80c9e8 Mon Sep 17 00:00:00 2001 From: Phil Auld Date: Tue, 19 Mar 2019 09:00:05 -0400 Subject: sched/fair: Limit sched_cfs_period_timer() loop to avoid hard lockup With extremely short cfs_period_us setting on a parent task group with a large number of children the for loop in sched_cfs_period_timer() can run until the watchdog fires. There is no guarantee that the call to hrtimer_forward_now() will ever return 0. The large number of children can make do_sched_cfs_period_timer() take longer than the period. NMI watchdog: Watchdog detected hard LOCKUP on cpu 24 RIP: 0010:tg_nop+0x0/0x10 walk_tg_tree_from+0x29/0xb0 unthrottle_cfs_rq+0xe0/0x1a0 distribute_cfs_runtime+0xd3/0xf0 sched_cfs_period_timer+0xcb/0x160 ? sched_cfs_slack_timer+0xd0/0xd0 __hrtimer_run_queues+0xfb/0x270 hrtimer_interrupt+0x122/0x270 smp_apic_timer_interrupt+0x6a/0x140 apic_timer_interrupt+0xf/0x20 To prevent this we add protection to the loop that detects when the loop has run too many times and scales the period and quota up, proportionally, so that the timer can complete before then next period expires. This preserves the relative runtime quota while preventing the hard lockup. A warning is issued reporting this state and the new values. Signed-off-by: Phil Auld Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Anton Blanchard Cc: Ben Segall Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20190319130005.25492-1-pauld@redhat.com Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 40bd1e27b1b7..a4d9e14bf138 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4885,6 +4885,8 @@ static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer) return HRTIMER_NORESTART; } +extern const u64 max_cfs_quota_period; + static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) { struct cfs_bandwidth *cfs_b = @@ -4892,6 +4894,7 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) unsigned long flags; int overrun; int idle = 0; + int count = 0; raw_spin_lock_irqsave(&cfs_b->lock, flags); for (;;) { @@ -4899,6 +4902,28 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) if (!overrun) break; + if (++count > 3) { + u64 new, old = ktime_to_ns(cfs_b->period); + + new = (old * 147) / 128; /* ~115% */ + new = min(new, max_cfs_quota_period); + + cfs_b->period = ns_to_ktime(new); + + /* since max is 1s, this is limited to 1e9^2, which fits in u64 */ + cfs_b->quota *= new; + cfs_b->quota = div64_u64(cfs_b->quota, old); + + pr_warn_ratelimited( + "cfs_period_timer[cpu%d]: period too short, scaling up (new cfs_period_us %lld, cfs_quota_us = %lld)\n", + smp_processor_id(), + div_u64(new, NSEC_PER_USEC), + div_u64(cfs_b->quota, NSEC_PER_USEC)); + + /* reset count so we don't come right back in here */ + count = 0; + } + idle = do_sched_cfs_period_timer(cfs_b, overrun, flags); } if (idle) -- cgit v1.2.3 From 1b02cd6a2d7f3e2a6a5262887d2cb2912083e42f Mon Sep 17 00:00:00 2001 From: luca abeni Date: Mon, 25 Mar 2019 14:15:30 +0100 Subject: sched/deadline: Correctly handle active 0-lag timers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit syzbot reported the following warning: [ ] WARNING: CPU: 4 PID: 17089 at kernel/sched/deadline.c:255 task_non_contending+0xae0/0x1950 line 255 of deadline.c is: WARN_ON(hrtimer_active(&dl_se->inactive_timer)); in task_non_contending(). Unfortunately, in some cases (for example, a deadline task continuosly blocking and waking immediately) it can happen that a task blocks (and task_non_contending() is called) while the 0-lag timer is still active. In this case, the safest thing to do is to immediately decrease the running bandwidth of the task, without trying to re-arm the 0-lag timer. Signed-off-by: luca abeni Signed-off-by: Peter Zijlstra (Intel) Acked-by: Juri Lelli Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: chengjian (D) Link: https://lkml.kernel.org/r/20190325131530.34706-1-luca.abeni@santannapisa.it Signed-off-by: Ingo Molnar --- kernel/sched/deadline.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 6a73e41a2016..43901fa3f269 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -252,7 +252,6 @@ static void task_non_contending(struct task_struct *p) if (dl_entity_is_special(dl_se)) return; - WARN_ON(hrtimer_active(&dl_se->inactive_timer)); WARN_ON(dl_se->dl_non_contending); zerolag_time = dl_se->deadline - @@ -269,7 +268,7 @@ static void task_non_contending(struct task_struct *p) * If the "0-lag time" already passed, decrease the active * utilization now, instead of starting a timer */ - if (zerolag_time < 0) { + if ((zerolag_time < 0) || hrtimer_active(&dl_se->inactive_timer)) { if (dl_task(p)) sub_running_bw(dl_se, dl_rq); if (!dl_task(p) || p->state == TASK_DEAD) { -- cgit v1.2.3 From 2a3f7221acddfe1caa9ff09b3a8158c39b2fdeac Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 16 Apr 2019 17:06:33 +0200 Subject: ALSA: core: Fix card races between register and disconnect There is a small race window in the card disconnection code that allows the registration of another card with the very same card id. This leads to a warning in procfs creation as caught by syzkaller. The problem is that we delete snd_cards and snd_cards_lock entries at the very beginning of the disconnection procedure. This makes the slot available to be assigned for another card object while the disconnection procedure is being processed. Then it becomes possible to issue a procfs registration with the existing file name although we check the conflict beforehand. The fix is simply to move the snd_cards and snd_cards_lock clearances at the end of the disconnection procedure. The references to these entries are merely either from the global proc files like /proc/asound/cards or from the card registration / disconnection, so it should be fine to shift at the very end. Reported-by: syzbot+48df349490c36f9f54ab@syzkaller.appspotmail.com Cc: Signed-off-by: Takashi Iwai --- sound/core/init.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/sound/core/init.c b/sound/core/init.c index 0c4dc40376a7..079c12d64b0e 100644 --- a/sound/core/init.c +++ b/sound/core/init.c @@ -382,14 +382,7 @@ int snd_card_disconnect(struct snd_card *card) card->shutdown = 1; spin_unlock(&card->files_lock); - /* phase 1: disable fops (user space) operations for ALSA API */ - mutex_lock(&snd_card_mutex); - snd_cards[card->number] = NULL; - clear_bit(card->number, snd_cards_lock); - mutex_unlock(&snd_card_mutex); - - /* phase 2: replace file->f_op with special dummy operations */ - + /* replace file->f_op with special dummy operations */ spin_lock(&card->files_lock); list_for_each_entry(mfile, &card->files_list, list) { /* it's critical part, use endless loop */ @@ -405,7 +398,7 @@ int snd_card_disconnect(struct snd_card *card) } spin_unlock(&card->files_lock); - /* phase 3: notify all connected devices about disconnection */ + /* notify all connected devices about disconnection */ /* at this point, they cannot respond to any calls except release() */ #if IS_ENABLED(CONFIG_SND_MIXER_OSS) @@ -421,6 +414,13 @@ int snd_card_disconnect(struct snd_card *card) device_del(&card->card_dev); card->registered = false; } + + /* disable fops (user space) operations for ALSA API */ + mutex_lock(&snd_card_mutex); + snd_cards[card->number] = NULL; + clear_bit(card->number, snd_cards_lock); + mutex_unlock(&snd_card_mutex); + #ifdef CONFIG_PM wake_up(&card->power_sleep); #endif -- cgit v1.2.3 From 1e6db2ee86e6a4399fc0ae5689e55e0fd1c43caf Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 15 Apr 2019 14:53:33 +0200 Subject: perf top: Always sample time to satisfy needs of use of ordered queuing Bastian reported broken 'perf top -p PID' command, it won't display any data. The problem is that for -p option we monitor single thread, so we don't enable time in samples, because it's not needed. However since commit 16c66bc167cc we use ordered queues to stash data plus later commits added logic for dropping samples in case there's big load and we don't keep up. All this needs timestamp for sample. Enabling it unconditionally for perf top. Reported-by: Bastian Beischer Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Cc: bastian beischer Fixes: 16c66bc167cc ("perf top: Add processing thread") Link: http://lkml.kernel.org/r/20190415125333.27160-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 1999d6533d12..fbbb0da43abb 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1377,6 +1377,7 @@ int cmd_top(int argc, const char **argv) * */ .overwrite = 0, .sample_time = true, + .sample_time_set = true, }, .max_stack = sysctl__max_stack(), .annotation_opts = annotation__default_options, -- cgit v1.2.3 From 30e4c574969ccb624940f1f2f7886596f8fc60ad Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 16 Apr 2019 11:30:15 -0300 Subject: tools include uapi: Sync sound/asound.h copy Picking the changes from: Fixes: b5bdbb6ccd11 ("ALSA: uapi: #include in asound.h") Which entails no changes in the tooling side. To silence this perf tools build warning: Warning: Kernel ABI header at 'tools/include/uapi/sound/asound.h' differs from latest version at 'include/uapi/sound/asound.h' diff -u tools/include/uapi/sound/asound.h include/uapi/sound/asound.h Cc: Adrian Hunter Cc: Daniel Mentz Cc: Jiri Olsa Cc: Namhyung Kim Cc: Takashi Iwai Link: https://lkml.kernel.org/n/tip-15o4twfkbn6nny9aus90dyzx@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/sound/asound.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h index 404d4b9ffe76..df1153cea0b7 100644 --- a/tools/include/uapi/sound/asound.h +++ b/tools/include/uapi/sound/asound.h @@ -32,6 +32,7 @@ #ifndef __KERNEL__ #include +#include #endif /* -- cgit v1.2.3 From bd2e98b351b668fa914b46cc77040fdb2a817c06 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Tue, 16 Apr 2019 16:26:45 -0500 Subject: ipmi: Fix failure on SMBIOS specified devices An extra memset was put into a place that cleared the interface type. Reported-by: Tony Camuso Fixes: 3cd83bac481dc4 ("ipmi: Consolidate the adding of platform devices") Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_dmi.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/char/ipmi/ipmi_dmi.c b/drivers/char/ipmi/ipmi_dmi.c index ff0b199be472..f2411468f33f 100644 --- a/drivers/char/ipmi/ipmi_dmi.c +++ b/drivers/char/ipmi/ipmi_dmi.c @@ -66,7 +66,6 @@ static void __init dmi_add_platform_ipmi(unsigned long base_addr, return; } - memset(&p, 0, sizeof(p)); p.addr = base_addr; p.space = space; p.regspacing = offset; -- cgit v1.2.3 From a885bcfd152f97b25005298ab2d6b741aed9b49c Mon Sep 17 00:00:00 2001 From: Tony Camuso Date: Tue, 9 Apr 2019 15:20:03 -0400 Subject: ipmi: ipmi_si_hardcode.c: init si_type array to fix a crash The intended behavior of function ipmi_hardcode_init_one() is to default to kcs interface when no type argument is presented when initializing ipmi with hard coded addresses. However, the array of char pointers allocated on the stack by function ipmi_hardcode_init() was not inited to zeroes, so it contained stack debris. Consequently, passing the cruft stored in this array to function ipmi_hardcode_init_one() caused a crash when it was unable to detect that the char * being passed was nonsense and tried to access the address specified by the bogus pointer. The fix is simply to initialize the si_type array to zeroes, so if there were no type argument given to at the command line, function ipmi_hardcode_init_one() could properly default to the kcs interface. Signed-off-by: Tony Camuso Message-Id: <1554837603-40299-1-git-send-email-tcamuso@redhat.com> Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_si_hardcode.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/char/ipmi/ipmi_si_hardcode.c b/drivers/char/ipmi/ipmi_si_hardcode.c index 01946cad3d13..682221eebd66 100644 --- a/drivers/char/ipmi/ipmi_si_hardcode.c +++ b/drivers/char/ipmi/ipmi_si_hardcode.c @@ -118,6 +118,8 @@ void __init ipmi_hardcode_init(void) char *str; char *si_type[SI_MAX_PARMS]; + memset(si_type, 0, sizeof(si_type)); + /* Parse out the si_type string into its components. */ str = si_type_str; if (*str != '\0') { -- cgit v1.2.3 From a1e8783db8e0d58891681bc1e6d9ada66eae8e20 Mon Sep 17 00:00:00 2001 From: Petr Štetiar Date: Fri, 12 Apr 2019 23:08:32 +0200 Subject: MIPS: perf: ath79: Fix perfcount IRQ assignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently it's not possible to use perf on ath79 due to genirq flags mismatch happening on static virtual IRQ 13 which is used for performance counters hardware IRQ 5. On TP-Link Archer C7v5: CPU0 2: 0 MIPS 2 ath9k 4: 318 MIPS 4 19000000.eth 7: 55034 MIPS 7 timer 8: 1236 MISC 3 ttyS0 12: 0 INTC 1 ehci_hcd:usb1 13: 0 gpio-ath79 2 keys 14: 0 gpio-ath79 5 keys 15: 31 AR724X PCI 1 ath10k_pci $ perf top genirq: Flags mismatch irq 13. 00014c83 (mips_perf_pmu) vs. 00002003 (keys) On TP-Link Archer C7v4: CPU0 4: 0 MIPS 4 19000000.eth 5: 7135 MIPS 5 1a000000.eth 7: 98379 MIPS 7 timer 8: 30 MISC 3 ttyS0 12: 90028 INTC 0 ath9k 13: 5520 INTC 1 ehci_hcd:usb1 14: 4623 INTC 2 ehci_hcd:usb2 15: 32844 AR724X PCI 1 ath10k_pci 16: 0 gpio-ath79 16 keys 23: 0 gpio-ath79 23 keys $ perf top genirq: Flags mismatch irq 13. 00014c80 (mips_perf_pmu) vs. 00000080 (ehci_hcd:usb1) This problem is happening, because currently statically assigned virtual IRQ 13 for performance counters is not claimed during the initialization of MIPS PMU during the bootup, so the IRQ subsystem doesn't know, that this interrupt isn't available for further use. So this patch fixes the issue by simply booking hardware IRQ 5 for MIPS PMU. Tested-by: Kevin 'ldir' Darbyshire-Bryant Signed-off-by: Petr Štetiar Acked-by: John Crispin Acked-by: Marc Zyngier Signed-off-by: Paul Burton Cc: linux-mips@vger.kernel.org Cc: Ralf Baechle Cc: James Hogan Cc: Thomas Gleixner Cc: Jason Cooper --- arch/mips/ath79/setup.c | 6 ------ drivers/irqchip/irq-ath79-misc.c | 11 +++++++++++ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c index 4a70c5de8c92..25a57895a3a3 100644 --- a/arch/mips/ath79/setup.c +++ b/arch/mips/ath79/setup.c @@ -210,12 +210,6 @@ const char *get_system_type(void) return ath79_sys_type; } -int get_c0_perfcount_int(void) -{ - return ATH79_MISC_IRQ(5); -} -EXPORT_SYMBOL_GPL(get_c0_perfcount_int); - unsigned int get_c0_compare_int(void) { return CP0_LEGACY_COMPARE_IRQ; diff --git a/drivers/irqchip/irq-ath79-misc.c b/drivers/irqchip/irq-ath79-misc.c index aa7290784636..0390603170b4 100644 --- a/drivers/irqchip/irq-ath79-misc.c +++ b/drivers/irqchip/irq-ath79-misc.c @@ -22,6 +22,15 @@ #define AR71XX_RESET_REG_MISC_INT_ENABLE 4 #define ATH79_MISC_IRQ_COUNT 32 +#define ATH79_MISC_PERF_IRQ 5 + +static int ath79_perfcount_irq; + +int get_c0_perfcount_int(void) +{ + return ath79_perfcount_irq; +} +EXPORT_SYMBOL_GPL(get_c0_perfcount_int); static void ath79_misc_irq_handler(struct irq_desc *desc) { @@ -113,6 +122,8 @@ static void __init ath79_misc_intc_domain_init( { void __iomem *base = domain->host_data; + ath79_perfcount_irq = irq_create_mapping(domain, ATH79_MISC_PERF_IRQ); + /* Disable and clear all interrupts */ __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_ENABLE); __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_STATUS); -- cgit v1.2.3 From 3b2e2904deb314cc77a2192f506f2fd44e3d10d0 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 11 Apr 2019 13:56:39 +0300 Subject: net: bridge: fix per-port af_packet sockets When the commit below was introduced it changed two visible things: - the skb was no longer passed through the protocol handlers with the original device - the skb was passed up the stack with skb->dev = bridge The first change broke af_packet sockets on bridge ports. For example we use them for hostapd which listens for ETH_P_PAE packets on the ports. We discussed two possible fixes: - create a clone and pass it through NF_HOOK(), act on the original skb based on the result - somehow signal to the caller from the okfn() that it was called, meaning the skb is ok to be passed, which this patch is trying to implement via returning 1 from the bridge link-local okfn() Note that we rely on the fact that NF_QUEUE/STOLEN would return 0 and drop/error would return < 0 thus the okfn() is called only when the return was 1, so we signal to the caller that it was called by preserving the return value from nf_hook(). Fixes: 8626c56c8279 ("bridge: fix potential use-after-free when hook returns QUEUE or STOLEN verdict") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_input.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 5ea7e56119c1..ba303ee99b9b 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -197,13 +197,10 @@ static void __br_handle_local_finish(struct sk_buff *skb) /* note: already called with rcu_read_lock */ static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct net_bridge_port *p = br_port_get_rcu(skb->dev); - __br_handle_local_finish(skb); - BR_INPUT_SKB_CB(skb)->brdev = p->br->dev; - br_pass_frame_up(skb); - return 0; + /* return 1 to signal the okfn() was called so it's ok to use the skb */ + return 1; } /* @@ -280,10 +277,18 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) goto forward; } - /* Deliver packet to local host only */ - NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, dev_net(skb->dev), - NULL, skb, skb->dev, NULL, br_handle_local_finish); - return RX_HANDLER_CONSUMED; + /* The else clause should be hit when nf_hook(): + * - returns < 0 (drop/error) + * - returns = 0 (stolen/nf_queue) + * Thus return 1 from the okfn() to signal the skb is ok to pass + */ + if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, + dev_net(skb->dev), NULL, skb, skb->dev, NULL, + br_handle_local_finish) == 1) { + return RX_HANDLER_PASS; + } else { + return RX_HANDLER_CONSUMED; + } } forward: -- cgit v1.2.3 From ad910c7c01269f229a97c335f2dc669fff750f65 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 15 Apr 2019 19:14:45 +0200 Subject: net/core: work around section mismatch warning for ptp_classifier The routine ptp_classifier_init() uses an initializer for an automatic struct type variable which refers to an __initdata symbol. This is perfectly legal, but may trigger a section mismatch warning when running the compiler in -fpic mode, due to the fact that the initializer may be emitted into an anonymous .data section thats lack the __init annotation. So work around it by using assignments instead. Signed-off-by: Ard Biesheuvel Signed-off-by: Gerald Schaefer Signed-off-by: David S. Miller --- net/core/ptp_classifier.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c index 703cf76aa7c2..7109c168b5e0 100644 --- a/net/core/ptp_classifier.c +++ b/net/core/ptp_classifier.c @@ -185,9 +185,10 @@ void __init ptp_classifier_init(void) { 0x16, 0, 0, 0x00000000 }, { 0x06, 0, 0, 0x00000000 }, }; - struct sock_fprog_kern ptp_prog = { - .len = ARRAY_SIZE(ptp_filter), .filter = ptp_filter, - }; + struct sock_fprog_kern ptp_prog; + + ptp_prog.len = ARRAY_SIZE(ptp_filter); + ptp_prog.filter = ptp_filter; BUG_ON(bpf_prog_create(&ptp_insns, &ptp_prog)); } -- cgit v1.2.3 From 899537b73557aafbdd11050b501cf54b4f5c45af Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 15 Apr 2019 15:57:23 -0500 Subject: net: atm: Fix potential Spectre v1 vulnerabilities arg is controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: net/atm/lec.c:715 lec_mcast_attach() warn: potential spectre issue 'dev_lec' [r] (local cap) Fix this by sanitizing arg before using it to index dev_lec. Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://lore.kernel.org/lkml/20180423164740.GY17484@dhcp22.suse.cz/ Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/atm/lec.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/atm/lec.c b/net/atm/lec.c index d7f5cf5b7594..ad4f829193f0 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -710,7 +710,10 @@ static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg) static int lec_mcast_attach(struct atm_vcc *vcc, int arg) { - if (arg < 0 || arg >= MAX_LEC_ITF || !dev_lec[arg]) + if (arg < 0 || arg >= MAX_LEC_ITF) + return -EINVAL; + arg = array_index_nospec(arg, MAX_LEC_ITF); + if (!dev_lec[arg]) return -EINVAL; vcc->proto_data = dev_lec[arg]; return lec_mcast_make(netdev_priv(dev_lec[arg]), vcc); @@ -728,6 +731,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg) i = arg; if (arg >= MAX_LEC_ITF) return -EINVAL; + i = array_index_nospec(arg, MAX_LEC_ITF); if (!dev_lec[i]) { int size; -- cgit v1.2.3 From d85e8be2a5a02869f815dd0ac2d743deb4cd7957 Mon Sep 17 00:00:00 2001 From: Yuya Kusakabe Date: Tue, 16 Apr 2019 10:22:28 +0900 Subject: net: Fix missing meta data in skb with vlan packet skb_reorder_vlan_header() should move XDP meta data with ethernet header if XDP meta data exists. Fixes: de8f3a83b0a0 ("bpf: add meta pointer for direct access") Signed-off-by: Yuya Kusakabe Signed-off-by: Takeru Hayasaka Co-developed-by: Takeru Hayasaka Reviewed-by: Toshiaki Makita Signed-off-by: David S. Miller --- net/core/skbuff.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ef2cd5712098..40796b8bf820 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5083,7 +5083,8 @@ EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len); static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb) { - int mac_len; + int mac_len, meta_len; + void *meta; if (skb_cow(skb, skb_headroom(skb)) < 0) { kfree_skb(skb); @@ -5095,6 +5096,13 @@ static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb) memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb), mac_len - VLAN_HLEN - ETH_TLEN); } + + meta_len = skb_metadata_len(skb); + if (meta_len) { + meta = skb_metadata_end(skb) - meta_len; + memmove(meta + VLAN_HLEN, meta, meta_len); + } + skb->mac_header += VLAN_HLEN; return skb; } -- cgit v1.2.3 From f7a937801b9f8788519a23b12cb4d6c2c84d84be Mon Sep 17 00:00:00 2001 From: Tuong Lien Date: Tue, 16 Apr 2019 10:48:07 +0700 Subject: tipc: fix link established but not in session According to the link FSM, when a link endpoint got RESET_MSG (- a traditional one without the stopping bit) from its peer, it moves to PEER_RESET state and raises a LINK_DOWN event which then resets the link itself. Its state will become ESTABLISHING after the reset event and the link will be re-established soon after this endpoint starts to send ACTIVATE_MSG to the peer. There is no problem with this mechanism, however the link resetting has cleared the link 'in_session' flag (along with the other important link data such as: the link 'mtu') that was correctly set up at the 1st step (i.e. when this endpoint received the peer RESET_MSG). As a result, the link will become ESTABLISHED, but the 'in_session' flag is not set, and all STATE_MSG from its peer will be dropped at the link_validate_msg(). It means the link not synced and will sooner or later face a failure. Since the link reset action is obviously needed for a new link session (this is also true in the other situations), the problem here is that the link is re-established a bit too early when the link endpoints are not really in-sync yet. The commit forces a resync as already done in the previous commit 91986ee166cf ("tipc: fix link session and re-establish issues") by simply varying the link 'peer_session' value at the link_reset(). Acked-by: Jon Maloy Signed-off-by: Tuong Lien Signed-off-by: David S. Miller --- net/tipc/link.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tipc/link.c b/net/tipc/link.c index 341ecd796aa4..131aa2f0fd27 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -869,6 +869,8 @@ void tipc_link_reset(struct tipc_link *l) __skb_queue_head_init(&list); l->in_session = false; + /* Force re-synch of peer session number before establishing */ + l->peer_session--; l->session++; l->mtu = l->advertised_mtu; -- cgit v1.2.3 From 4bcd4ec1017205644a2697bccbc3b5143f522f5f Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Tue, 16 Apr 2019 13:10:09 +0800 Subject: tipc: set sysctl_tipc_rmem and named_timeout right range We find that sysctl_tipc_rmem and named_timeout do not have the right minimum setting. sysctl_tipc_rmem should be larger than zero, like sysctl_tcp_rmem. And named_timeout as a timeout setting should be not less than zero. Fixes: cc79dd1ba9c10 ("tipc: change socket buffer overflow control to respect sk_rcvbuf") Fixes: a5325ae5b8bff ("tipc: add name distributor resiliency queue") Signed-off-by: Jie Liu Reported-by: Qiang Ning Reviewed-by: Zhiqiang Liu Reviewed-by: Miaohe Lin Signed-off-by: David S. Miller --- net/tipc/sysctl.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c index 3481e4906bd6..9df82a573aa7 100644 --- a/net/tipc/sysctl.c +++ b/net/tipc/sysctl.c @@ -38,6 +38,8 @@ #include +static int zero; +static int one = 1; static struct ctl_table_header *tipc_ctl_hdr; static struct ctl_table tipc_table[] = { @@ -46,14 +48,16 @@ static struct ctl_table tipc_table[] = { .data = &sysctl_tipc_rmem, .maxlen = sizeof(sysctl_tipc_rmem), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, }, { .procname = "named_timeout", .data = &sysctl_tipc_named_timeout, .maxlen = sizeof(sysctl_tipc_named_timeout), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, }, { .procname = "sk_filter", -- cgit v1.2.3 From 3321b6c23fb330e690ed5cf1336c827e07843200 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 16 Apr 2019 12:43:17 +0100 Subject: qed: fix spelling mistake "faspath" -> "fastpath" There is a spelling mistake in a DP_INFO message, fix it. Signed-off-by: Colin Ian King Reviewed-by: Mukesh Ojha Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_sriov.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index 9faaa6df78ed..2f318aaf2b05 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -1591,7 +1591,7 @@ static void qed_iov_vf_mbx_acquire(struct qed_hwfn *p_hwfn, p_vfdev->eth_fp_hsi_minor = ETH_HSI_VER_NO_PKT_LEN_TUNN; } else { DP_INFO(p_hwfn, - "VF[%d] needs fastpath HSI %02x.%02x, which is incompatible with loaded FW's faspath HSI %02x.%02x\n", + "VF[%d] needs fastpath HSI %02x.%02x, which is incompatible with loaded FW's fastpath HSI %02x.%02x\n", vf->abs_vf_id, req->vfdev_info.eth_fp_hsi_major, req->vfdev_info.eth_fp_hsi_minor, -- cgit v1.2.3 From 600bea7dba1a72874ae0cd9bc66bf2abfe43b49d Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 16 Apr 2019 16:15:56 +0300 Subject: net: bridge: fix netlink export of vlan_stats_per_port option Since the introduction of the vlan_stats_per_port option the netlink export of it has been broken since I made a typo and used the ifla attribute instead of the bridge option to retrieve its state. Sysfs export is fine, only netlink export has been affected. Fixes: 9163a0fc1f0c0 ("net: bridge: add support for per-port vlan stats") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 9c07591b0232..7104cf13da84 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1441,7 +1441,7 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) nla_put_u8(skb, IFLA_BR_VLAN_STATS_ENABLED, br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) || nla_put_u8(skb, IFLA_BR_VLAN_STATS_PER_PORT, - br_opt_get(br, IFLA_BR_VLAN_STATS_PER_PORT))) + br_opt_get(br, BROPT_VLAN_STATS_PER_PORT))) return -EMSGSIZE; #endif #ifdef CONFIG_BRIDGE_IGMP_SNOOPING -- cgit v1.2.3 From a8fd48b50deaa20808bbf0f6685f6f1acba6a64c Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Tue, 16 Apr 2019 17:51:58 +0300 Subject: ocelot: Don't sleep in atomic context (irqs_disabled()) Preemption disabled at: [] dev_set_rx_mode+0x1c/0x38 Call trace: [] dump_backtrace+0x0/0x3d0 [] show_stack+0x14/0x20 [] dump_stack+0xac/0xe4 [] ___might_sleep+0x164/0x238 [] __might_sleep+0x50/0x88 [] kmem_cache_alloc+0x17c/0x1d0 [] ocelot_set_rx_mode+0x108/0x188 [mscc_ocelot_common] [] __dev_set_rx_mode+0x58/0xa0 [] dev_set_rx_mode+0x24/0x38 Fixes: a556c76adc05 ("net: mscc: Add initial Ocelot switch support") Signed-off-by: Claudiu Manoil Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index a1d0d6e42533..6cb2f03b67e6 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -613,7 +613,7 @@ static int ocelot_mact_mc_add(struct ocelot_port *port, struct netdev_hw_addr *hw_addr) { struct ocelot *ocelot = port->ocelot; - struct netdev_hw_addr *ha = kzalloc(sizeof(*ha), GFP_KERNEL); + struct netdev_hw_addr *ha = kzalloc(sizeof(*ha), GFP_ATOMIC); if (!ha) return -ENOMEM; -- cgit v1.2.3 From 1e1caa9735f90b5452fc48685c23552e56aa1920 Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Tue, 16 Apr 2019 17:51:59 +0300 Subject: ocelot: Clean up stats update deferred work This is preventive cleanup that may save troubles later. No need to cancel repeateadly queued work if code is properly refactored. Don't let the ethtool -s process interfere with the stat workqueue scheduling. Signed-off-by: Claudiu Manoil Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 6cb2f03b67e6..d715ef4fc92f 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -959,10 +959,8 @@ static void ocelot_get_strings(struct net_device *netdev, u32 sset, u8 *data) ETH_GSTRING_LEN); } -static void ocelot_check_stats(struct work_struct *work) +static void ocelot_update_stats(struct ocelot *ocelot) { - struct delayed_work *del_work = to_delayed_work(work); - struct ocelot *ocelot = container_of(del_work, struct ocelot, stats_work); int i, j; mutex_lock(&ocelot->stats_lock); @@ -986,11 +984,19 @@ static void ocelot_check_stats(struct work_struct *work) } } - cancel_delayed_work(&ocelot->stats_work); + mutex_unlock(&ocelot->stats_lock); +} + +static void ocelot_check_stats_work(struct work_struct *work) +{ + struct delayed_work *del_work = to_delayed_work(work); + struct ocelot *ocelot = container_of(del_work, struct ocelot, + stats_work); + + ocelot_update_stats(ocelot); + queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work, OCELOT_STATS_CHECK_DELAY); - - mutex_unlock(&ocelot->stats_lock); } static void ocelot_get_ethtool_stats(struct net_device *dev, @@ -1001,7 +1007,7 @@ static void ocelot_get_ethtool_stats(struct net_device *dev, int i; /* check and update now */ - ocelot_check_stats(&ocelot->stats_work.work); + ocelot_update_stats(ocelot); /* Copy all counters */ for (i = 0; i < ocelot->num_stats; i++) @@ -1809,7 +1815,7 @@ int ocelot_init(struct ocelot *ocelot) ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL(6), ANA_CPUQ_8021_CFG, i); - INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats); + INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats_work); queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work, OCELOT_STATS_CHECK_DELAY); return 0; -- cgit v1.2.3 From 50ce163a72d817a99e8974222dcf2886d5deb1ae Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 16 Apr 2019 10:55:20 -0700 Subject: tcp: tcp_grow_window() needs to respect tcp_space() For some reason, tcp_grow_window() correctly tests if enough room is present before attempting to increase tp->rcv_ssthresh, but does not prevent it to grow past tcp_space() This is causing hard to debug issues, like failing the (__tcp_select_window(sk) >= tp->rcv_wnd) test in __tcp_ack_snd_check(), causing ACK delays and possibly slow flows. Depending on tcp_rmem[2], MTU, skb->len/skb->truesize ratio, we can see the problem happening on "netperf -t TCP_RR -- -r 2000,2000" after about 60 round trips, when the active side no longer sends immediate acks. This bug predates git history. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Acked-by: Neal Cardwell Acked-by: Wei Wang Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5def3c48870e..731d3045b50a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -402,11 +402,12 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb) static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); + int room; + + room = min_t(int, tp->window_clamp, tcp_space(sk)) - tp->rcv_ssthresh; /* Check #1 */ - if (tp->rcv_ssthresh < tp->window_clamp && - (int)tp->rcv_ssthresh < tcp_space(sk) && - !tcp_under_memory_pressure(sk)) { + if (room > 0 && !tcp_under_memory_pressure(sk)) { int incr; /* Check #2. Increase window, if skb with such overhead @@ -419,8 +420,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) if (incr) { incr = max_t(int, incr, 2 * skb->len); - tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, - tp->window_clamp); + tp->rcv_ssthresh += min(room, incr); inet_csk(sk)->icsk_ack.quick |= 1; } } -- cgit v1.2.3 From e6986423d28362aafe64d3757bbbc493f2687f8f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 16 Apr 2019 22:31:14 +0200 Subject: socket: fix compat SO_RCVTIMEO_NEW/SO_SNDTIMEO_NEW It looks like the new socket options only work correctly for native execution, but in case of compat mode fall back to the old behavior as we ignore the 'old_timeval' flag. Rework so we treat SO_RCVTIMEO_NEW/SO_SNDTIMEO_NEW the same way in compat and native 32-bit mode. Cc: Deepa Dinamani Fixes: a9beb86ae6e5 ("sock: Add SO_RCVTIMEO_NEW and SO_SNDTIMEO_NEW") Signed-off-by: Arnd Bergmann Acked-by: Deepa Dinamani Signed-off-by: David S. Miller --- net/core/sock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 782343bb925b..067878a1e4c5 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -348,7 +348,7 @@ static int sock_get_timeout(long timeo, void *optval, bool old_timeval) tv.tv_usec = ((timeo % HZ) * USEC_PER_SEC) / HZ; } - if (in_compat_syscall() && !COMPAT_USE_64BIT_TIME) { + if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) { struct old_timeval32 tv32 = { tv.tv_sec, tv.tv_usec }; *(struct old_timeval32 *)optval = tv32; return sizeof(tv32); @@ -372,7 +372,7 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen, bool { struct __kernel_sock_timeval tv; - if (in_compat_syscall() && !COMPAT_USE_64BIT_TIME) { + if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) { struct old_timeval32 tv32; if (optlen < sizeof(tv32)) -- cgit v1.2.3 From 83f8bf4b837b0e3417f0e5c717a43fcf71ecc992 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 16 Apr 2019 14:43:26 +0200 Subject: drm/tegra: hdmi: Setup audio only if configured The audio configuration is only valid if the HDMI codec has been properly set up. Do not attempt to set up audio before that happens because it causes a division by zero. Note that this is only problematic on Tegra20 and Tegra30. Later chips implement the division instructions which return zero when dividing by zero and don't throw an exception. Fixes: db5adf4d6dce ("drm/tegra: hdmi: Fix audio to work with any pixel clock rate") Reported-by: Marcel Ziswiler Tested-by: Dmitry Osipenko Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/hdmi.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c index 47c55974756d..d23c4bfde790 100644 --- a/drivers/gpu/drm/tegra/hdmi.c +++ b/drivers/gpu/drm/tegra/hdmi.c @@ -1260,9 +1260,15 @@ static void tegra_hdmi_encoder_enable(struct drm_encoder *encoder) hdmi->dvi = !tegra_output_is_hdmi(output); if (!hdmi->dvi) { - err = tegra_hdmi_setup_audio(hdmi); - if (err < 0) - hdmi->dvi = true; + /* + * Make sure that the audio format has been configured before + * enabling audio, otherwise we may try to divide by zero. + */ + if (hdmi->format.sample_rate > 0) { + err = tegra_hdmi_setup_audio(hdmi); + if (err < 0) + hdmi->dvi = true; + } } if (hdmi->config->has_hda) -- cgit v1.2.3 From 35af0d469c6694c05f06e75c5d75caee9be66122 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 15 Apr 2019 12:41:08 +0200 Subject: s390: correct some inline assembly constraints Inline assembly code changed in this patch should really use "Q" constraint "Memory reference without index register and with short displacement". The kernel build with kasan instrumentation enabled might occasionally break otherwise (due to stack instrumentation). Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/fpu.c | 2 +- arch/s390/kernel/vtime.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c index 594464f2129d..0da378e2eb25 100644 --- a/arch/s390/kernel/fpu.c +++ b/arch/s390/kernel/fpu.c @@ -23,7 +23,7 @@ void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags) if (flags & KERNEL_FPC) /* Save floating point control */ - asm volatile("stfpc %0" : "=m" (state->fpc)); + asm volatile("stfpc %0" : "=Q" (state->fpc)); if (!MACHINE_HAS_VX) { if (flags & KERNEL_VXR_V0V7) { diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index a69a0911ed0e..c475ca49cfc6 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -37,7 +37,7 @@ static inline u64 get_vtimer(void) { u64 timer; - asm volatile("stpt %0" : "=m" (timer)); + asm volatile("stpt %0" : "=Q" (timer)); return timer; } @@ -48,7 +48,7 @@ static inline void set_vtimer(u64 expires) asm volatile( " stpt %0\n" /* Store current cpu timer value */ " spt %1" /* Set new value imm. afterwards */ - : "=m" (timer) : "m" (expires)); + : "=Q" (timer) : "Q" (expires)); S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer; S390_lowcore.last_update_timer = expires; } @@ -135,8 +135,8 @@ static int do_account_vtime(struct task_struct *tsk) #else " stck %1" /* Store current tod clock value */ #endif - : "=m" (S390_lowcore.last_update_timer), - "=m" (S390_lowcore.last_update_clock)); + : "=Q" (S390_lowcore.last_update_timer), + "=Q" (S390_lowcore.last_update_clock)); clock = S390_lowcore.last_update_clock - clock; timer -= S390_lowcore.last_update_timer; -- cgit v1.2.3 From b26e36b7ef36a8a3a147b1609b2505f8a4ecf511 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Wed, 17 Apr 2019 16:10:32 +0800 Subject: ALSA: hda/realtek - add two more pin configuration sets to quirk table We have two Dell laptops which have the codec 10ec0236 and 10ec0256 respectively, the headset mic on them can't work, need to apply the quirk of ALC255_FIXUP_DELL1_MIC_NO_PRESENCE. So adding their pin configurations in the pin quirk table. Cc: Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 810479766090..f5b510f119ed 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7266,6 +7266,8 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x12, 0x90a60140}, {0x14, 0x90170150}, {0x21, 0x02211020}), + SND_HDA_PIN_QUIRK(0x10ec0236, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x21, 0x02211020}), SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL2_MIC_NO_PRESENCE, {0x14, 0x90170110}, {0x21, 0x02211020}), @@ -7376,6 +7378,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x21, 0x0221101f}), SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, ALC256_STANDARD_PINS), + SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x14, 0x90170110}, + {0x1b, 0x01011020}, + {0x21, 0x0221101f}), SND_HDA_PIN_QUIRK(0x10ec0256, 0x1043, "ASUS", ALC256_FIXUP_ASUS_MIC, {0x14, 0x90170110}, {0x1b, 0x90a70130}, -- cgit v1.2.3 From 8adddf349fda0d3de2f6bb41ddf838cbf36a8ad2 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 16 Apr 2019 23:59:02 +1000 Subject: powerpc/mm/radix: Make Radix require HUGETLB_PAGE Joel reported weird crashes using skiroot_defconfig, in his case we jumped into an NX page: kernel tried to execute exec-protected page (c000000002bff4f0) - exploit attempt? (uid: 0) BUG: Unable to handle kernel instruction fetch Faulting instruction address: 0xc000000002bff4f0 Looking at the disassembly, we had simply branched to that address: c000000000c001bc 49fff335 bl c000000002bff4f0 But that didn't match the original kernel image: c000000000c001bc 4bfff335 bl c000000000bff4f0 When STRICT_KERNEL_RWX is enabled, and we're using the radix MMU, we call radix__change_memory_range() late in boot to change page protections. We do that both to mark rodata read only and also to mark init text no-execute. That involves walking the kernel page tables, and clearing _PAGE_WRITE or _PAGE_EXEC respectively. With radix we may use hugepages for the linear mapping, so the code in radix__change_memory_range() uses eg. pmd_huge() to test if it has found a huge mapping, and if so it stops the page table walk and changes the PMD permissions. However if the kernel is built without HUGETLBFS support, pmd_huge() is just a #define that always returns 0. That causes the code in radix__change_memory_range() to incorrectly interpret the PMD value as a pointer to a PTE page rather than as a PTE at the PMD level. We can see this using `dv` in xmon which also uses pmd_huge(): 0:mon> dv c000000000000000 pgd @ 0xc000000001740000 pgdp @ 0xc000000001740000 = 0x80000000ffffb009 pudp @ 0xc0000000ffffb000 = 0x80000000ffffa009 pmdp @ 0xc0000000ffffa000 = 0xc00000000000018f <- this is a PTE ptep @ 0xc000000000000100 = 0xa64bb17da64ab07d <- kernel text The end result is we treat the value at 0xc000000000000100 as a PTE and clear _PAGE_WRITE or _PAGE_EXEC, potentially corrupting the code at that address. In Joel's specific case we cleared the sign bit in the offset of the branch, causing a backward branch to turn into a forward branch which caused us to branch into a non-executable page. However the exact nature of the crash depends on kernel version, compiler version, and other factors. We need to fix radix__change_memory_range() to not use accessors that depend on HUGETLBFS, but we also have radix memory hotplug code that uses pmd_huge() etc that will also need fixing. So for now just disallow the broken combination of Radix with HUGETLBFS disabled. The only defconfig we have that is affected is skiroot_defconfig, so turn on HUGETLBFS there so that it still gets Radix. Fixes: 566ca99af026 ("powerpc/mm/radix: Add dummy radix_enabled()") Cc: stable@vger.kernel.org # v4.7+ Reported-by: Joel Stanley Signed-off-by: Michael Ellerman --- arch/powerpc/configs/skiroot_defconfig | 1 + arch/powerpc/platforms/Kconfig.cputype | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index 5ba131c30f6b..1bcd468ab422 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -266,6 +266,7 @@ CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m CONFIG_PROC_KCORE=y +CONFIG_HUGETLBFS=y # CONFIG_MISC_FILESYSTEMS is not set # CONFIG_NETWORK_FILESYSTEMS is not set CONFIG_NLS=y diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 842b2c7e156a..50cd09b4e05d 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -324,7 +324,7 @@ config ARCH_ENABLE_SPLIT_PMD_PTLOCK config PPC_RADIX_MMU bool "Radix MMU Support" - depends on PPC_BOOK3S_64 + depends on PPC_BOOK3S_64 && HUGETLB_PAGE select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA default y help -- cgit v1.2.3 From 660cf4ce9d0f3497cc7456eaa6d74c8b71d6282c Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 15 Apr 2019 12:43:01 +0100 Subject: staging: comedi: ni_usb6501: Fix use of uninitialized mutex If `ni6501_auto_attach()` returns an error, the core comedi module code will call `ni6501_detach()` to clean up. If `ni6501_auto_attach()` successfully allocated the comedi device private data, `ni6501_detach()` assumes that a `struct mutex mut` contained in the private data has been initialized and uses it. Unfortunately, there are a couple of places where `ni6501_auto_attach()` can return an error after allocating the device private data but before initializing the mutex, so this assumption is invalid. Fix it by initializing the mutex just after allocating the private data in `ni6501_auto_attach()` before any other errors can be retturned. Also move the call to `usb_set_intfdata()` just to keep the code a bit neater (either position for the call is fine). I believe this was the cause of the following syzbot crash report : usb 1-1: New USB device strings: Mfr=0, Product=0, SerialNumber=0 usb 1-1: config 0 descriptor?? usb 1-1: string descriptor 0 read error: -71 comedi comedi0: Wrong number of endpoints ni6501 1-1:0.233: driver 'ni6501' failed to auto-configure device. INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. CPU: 0 PID: 585 Comm: kworker/0:3 Not tainted 5.1.0-rc4-319354-g9a33b36 #3 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: usb_hub_wq hub_event Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xe8/0x16e lib/dump_stack.c:113 assign_lock_key kernel/locking/lockdep.c:786 [inline] register_lock_class+0x11b8/0x1250 kernel/locking/lockdep.c:1095 __lock_acquire+0xfb/0x37c0 kernel/locking/lockdep.c:3582 lock_acquire+0x10d/0x2f0 kernel/locking/lockdep.c:4211 __mutex_lock_common kernel/locking/mutex.c:925 [inline] __mutex_lock+0xfe/0x12b0 kernel/locking/mutex.c:1072 ni6501_detach+0x5b/0x110 drivers/staging/comedi/drivers/ni_usb6501.c:567 comedi_device_detach+0xed/0x800 drivers/staging/comedi/drivers.c:204 comedi_device_cleanup.part.0+0x68/0x140 drivers/staging/comedi/comedi_fops.c:156 comedi_device_cleanup drivers/staging/comedi/comedi_fops.c:187 [inline] comedi_free_board_dev.part.0+0x16/0x90 drivers/staging/comedi/comedi_fops.c:190 comedi_free_board_dev drivers/staging/comedi/comedi_fops.c:189 [inline] comedi_release_hardware_device+0x111/0x140 drivers/staging/comedi/comedi_fops.c:2880 comedi_auto_config.cold+0x124/0x1b0 drivers/staging/comedi/drivers.c:1068 usb_probe_interface+0x31d/0x820 drivers/usb/core/driver.c:361 really_probe+0x2da/0xb10 drivers/base/dd.c:509 driver_probe_device+0x21d/0x350 drivers/base/dd.c:671 __device_attach_driver+0x1d8/0x290 drivers/base/dd.c:778 bus_for_each_drv+0x163/0x1e0 drivers/base/bus.c:454 __device_attach+0x223/0x3a0 drivers/base/dd.c:844 bus_probe_device+0x1f1/0x2a0 drivers/base/bus.c:514 device_add+0xad2/0x16e0 drivers/base/core.c:2106 usb_set_configuration+0xdf7/0x1740 drivers/usb/core/message.c:2021 generic_probe+0xa2/0xda drivers/usb/core/generic.c:210 usb_probe_device+0xc0/0x150 drivers/usb/core/driver.c:266 really_probe+0x2da/0xb10 drivers/base/dd.c:509 driver_probe_device+0x21d/0x350 drivers/base/dd.c:671 __device_attach_driver+0x1d8/0x290 drivers/base/dd.c:778 bus_for_each_drv+0x163/0x1e0 drivers/base/bus.c:454 __device_attach+0x223/0x3a0 drivers/base/dd.c:844 bus_probe_device+0x1f1/0x2a0 drivers/base/bus.c:514 device_add+0xad2/0x16e0 drivers/base/core.c:2106 usb_new_device.cold+0x537/0xccf drivers/usb/core/hub.c:2534 hub_port_connect drivers/usb/core/hub.c:5089 [inline] hub_port_connect_change drivers/usb/core/hub.c:5204 [inline] port_event drivers/usb/core/hub.c:5350 [inline] hub_event+0x138e/0x3b00 drivers/usb/core/hub.c:5432 process_one_work+0x90f/0x1580 kernel/workqueue.c:2269 worker_thread+0x9b/0xe20 kernel/workqueue.c:2415 kthread+0x313/0x420 kernel/kthread.c:253 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352 Reported-by: syzbot+cf4f2b6c24aff0a3edf6@syzkaller.appspotmail.com Signed-off-by: Ian Abbott Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/ni_usb6501.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/staging/comedi/drivers/ni_usb6501.c b/drivers/staging/comedi/drivers/ni_usb6501.c index 808ed92ed66f..ed5e42655821 100644 --- a/drivers/staging/comedi/drivers/ni_usb6501.c +++ b/drivers/staging/comedi/drivers/ni_usb6501.c @@ -518,6 +518,9 @@ static int ni6501_auto_attach(struct comedi_device *dev, if (!devpriv) return -ENOMEM; + mutex_init(&devpriv->mut); + usb_set_intfdata(intf, devpriv); + ret = ni6501_find_endpoints(dev); if (ret) return ret; @@ -526,9 +529,6 @@ static int ni6501_auto_attach(struct comedi_device *dev, if (ret) return ret; - mutex_init(&devpriv->mut); - usb_set_intfdata(intf, devpriv); - ret = comedi_alloc_subdevices(dev, 2); if (ret) return ret; -- cgit v1.2.3 From af4b54a2e5ba18259ff9aac445bf546dd60d037e Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 15 Apr 2019 12:43:02 +0100 Subject: staging: comedi: ni_usb6501: Fix possible double-free of ->usb_rx_buf `ni6501_alloc_usb_buffers()` is called from `ni6501_auto_attach()` to allocate RX and TX buffers for USB transfers. It allocates `devpriv->usb_rx_buf` followed by `devpriv->usb_tx_buf`. If the allocation of `devpriv->usb_tx_buf` fails, it frees `devpriv->usb_rx_buf`, leaving the pointer set dangling, and returns an error. Later, `ni6501_detach()` will be called from the core comedi module code to clean up. `ni6501_detach()` also frees both `devpriv->usb_rx_buf` and `devpriv->usb_tx_buf`, but `devpriv->usb_rx_buf` may have already beed freed, leading to a double-free error. Fix it bu removing the call to `kfree(devpriv->usb_rx_buf)` from `ni6501_alloc_usb_buffers()`, relying on `ni6501_detach()` to free the memory. Signed-off-by: Ian Abbott Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/ni_usb6501.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/staging/comedi/drivers/ni_usb6501.c b/drivers/staging/comedi/drivers/ni_usb6501.c index ed5e42655821..1bb1cb651349 100644 --- a/drivers/staging/comedi/drivers/ni_usb6501.c +++ b/drivers/staging/comedi/drivers/ni_usb6501.c @@ -463,10 +463,8 @@ static int ni6501_alloc_usb_buffers(struct comedi_device *dev) size = usb_endpoint_maxp(devpriv->ep_tx); devpriv->usb_tx_buf = kzalloc(size, GFP_KERNEL); - if (!devpriv->usb_tx_buf) { - kfree(devpriv->usb_rx_buf); + if (!devpriv->usb_tx_buf) return -ENOMEM; - } return 0; } -- cgit v1.2.3 From eb9d7a62c38628ab0ba6e59d22d7cb7930e415d1 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 3 Apr 2019 15:12:32 +1100 Subject: powerpc/mm_iommu: Fix potential deadlock Currently mm_iommu_do_alloc() is called in 2 cases: - VFIO_IOMMU_SPAPR_REGISTER_MEMORY ioctl() for normal memory: this locks &mem_list_mutex and then locks mm::mmap_sem several times when adjusting locked_vm or pinning pages; - vfio_pci_nvgpu_regops::mmap() for GPU memory: this is called with mm::mmap_sem held already and it locks &mem_list_mutex. So one can craft a userspace program to do special ioctl and mmap in 2 threads concurrently and cause a deadlock which lockdep warns about (below). We did not hit this yet because QEMU constructs the machine in a single thread. This moves the overlap check next to where the new entry is added and reduces the amount of time spent with &mem_list_mutex held. This moves locked_vm adjustment from under &mem_list_mutex. This relies on mm_iommu_adjust_locked_vm() doing nothing when entries==0. This is one of the lockdep warnings: ====================================================== WARNING: possible circular locking dependency detected 5.1.0-rc2-le_nv2_aikATfstn1-p1 #363 Not tainted ------------------------------------------------------ qemu-system-ppc/8038 is trying to acquire lock: 000000002ec6c453 (mem_list_mutex){+.+.}, at: mm_iommu_do_alloc+0x70/0x490 but task is already holding lock: 00000000fd7da97f (&mm->mmap_sem){++++}, at: vm_mmap_pgoff+0xf0/0x160 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&mm->mmap_sem){++++}: lock_acquire+0xf8/0x260 down_write+0x44/0xa0 mm_iommu_adjust_locked_vm.part.1+0x4c/0x190 mm_iommu_do_alloc+0x310/0x490 tce_iommu_ioctl.part.9+0xb84/0x1150 [vfio_iommu_spapr_tce] vfio_fops_unl_ioctl+0x94/0x430 [vfio] do_vfs_ioctl+0xe4/0x930 ksys_ioctl+0xc4/0x110 sys_ioctl+0x28/0x80 system_call+0x5c/0x70 -> #0 (mem_list_mutex){+.+.}: __lock_acquire+0x1484/0x1900 lock_acquire+0xf8/0x260 __mutex_lock+0x88/0xa70 mm_iommu_do_alloc+0x70/0x490 vfio_pci_nvgpu_mmap+0xc0/0x130 [vfio_pci] vfio_pci_mmap+0x198/0x2a0 [vfio_pci] vfio_device_fops_mmap+0x44/0x70 [vfio] mmap_region+0x5d4/0x770 do_mmap+0x42c/0x650 vm_mmap_pgoff+0x124/0x160 ksys_mmap_pgoff+0xdc/0x2f0 sys_mmap+0x40/0x80 system_call+0x5c/0x70 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&mm->mmap_sem); lock(mem_list_mutex); lock(&mm->mmap_sem); lock(mem_list_mutex); *** DEADLOCK *** 1 lock held by qemu-system-ppc/8038: #0: 00000000fd7da97f (&mm->mmap_sem){++++}, at: vm_mmap_pgoff+0xf0/0x160 Fixes: c10c21efa4bc ("powerpc/vfio/iommu/kvm: Do not pin device memory", 2018-12-19) Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/mm/mmu_context_iommu.c | 75 +++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 36 deletions(-) diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index e7a9c4f6bfca..9d9be850f8c2 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -95,28 +95,14 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, unsigned long entries, unsigned long dev_hpa, struct mm_iommu_table_group_mem_t **pmem) { - struct mm_iommu_table_group_mem_t *mem; - long i, ret, locked_entries = 0; + struct mm_iommu_table_group_mem_t *mem, *mem2; + long i, ret, locked_entries = 0, pinned = 0; unsigned int pageshift; - mutex_lock(&mem_list_mutex); - - list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, - next) { - /* Overlap? */ - if ((mem->ua < (ua + (entries << PAGE_SHIFT))) && - (ua < (mem->ua + - (mem->entries << PAGE_SHIFT)))) { - ret = -EINVAL; - goto unlock_exit; - } - - } - if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) { ret = mm_iommu_adjust_locked_vm(mm, entries, true); if (ret) - goto unlock_exit; + return ret; locked_entries = entries; } @@ -150,15 +136,10 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, down_read(&mm->mmap_sem); ret = get_user_pages_longterm(ua, entries, FOLL_WRITE, mem->hpages, NULL); up_read(&mm->mmap_sem); + pinned = ret > 0 ? ret : 0; if (ret != entries) { - /* free the reference taken */ - for (i = 0; i < ret; i++) - put_page(mem->hpages[i]); - - vfree(mem->hpas); - kfree(mem); ret = -EFAULT; - goto unlock_exit; + goto free_exit; } pageshift = PAGE_SHIFT; @@ -183,21 +164,43 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, } good_exit: - ret = 0; atomic64_set(&mem->mapped, 1); mem->used = 1; mem->ua = ua; mem->entries = entries; - *pmem = mem; - list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list); + mutex_lock(&mem_list_mutex); -unlock_exit: - if (locked_entries && ret) - mm_iommu_adjust_locked_vm(mm, locked_entries, false); + list_for_each_entry_rcu(mem2, &mm->context.iommu_group_mem_list, next) { + /* Overlap? */ + if ((mem2->ua < (ua + (entries << PAGE_SHIFT))) && + (ua < (mem2->ua + + (mem2->entries << PAGE_SHIFT)))) { + ret = -EINVAL; + mutex_unlock(&mem_list_mutex); + goto free_exit; + } + } + + list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list); mutex_unlock(&mem_list_mutex); + *pmem = mem; + + return 0; + +free_exit: + /* free the reference taken */ + for (i = 0; i < pinned; i++) + put_page(mem->hpages[i]); + + vfree(mem->hpas); + kfree(mem); + +unlock_exit: + mm_iommu_adjust_locked_vm(mm, locked_entries, false); + return ret; } @@ -266,7 +269,7 @@ static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem) long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem) { long ret = 0; - unsigned long entries, dev_hpa; + unsigned long unlock_entries = 0; mutex_lock(&mem_list_mutex); @@ -287,17 +290,17 @@ long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem) goto unlock_exit; } + if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) + unlock_entries = mem->entries; + /* @mapped became 0 so now mappings are disabled, release the region */ - entries = mem->entries; - dev_hpa = mem->dev_hpa; mm_iommu_release(mem); - if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) - mm_iommu_adjust_locked_vm(mm, entries, false); - unlock_exit: mutex_unlock(&mem_list_mutex); + mm_iommu_adjust_locked_vm(mm, unlock_entries, false); + return ret; } EXPORT_SYMBOL_GPL(mm_iommu_put); -- cgit v1.2.3 From 7a3a4d763837d3aa654cd1059030950410c04d77 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 3 Apr 2019 15:12:33 +1100 Subject: powerpc/mm_iommu: Allow pinning large regions When called with vmas_arg==NULL, get_user_pages_longterm() allocates an array of nr_pages*8 which can easily get greater that the max order, for example, registering memory for a 256GB guest does this and fails in __alloc_pages_nodemask(). This adds a loop over chunks of entries to fit the max order limit. Fixes: 678e174c4c16 ("powerpc/mm/iommu: allow migration of cma allocated pages during mm_iommu_do_alloc", 2019-03-05) Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/mm/mmu_context_iommu.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index 9d9be850f8c2..8330f135294f 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -98,6 +98,7 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, struct mm_iommu_table_group_mem_t *mem, *mem2; long i, ret, locked_entries = 0, pinned = 0; unsigned int pageshift; + unsigned long entry, chunk; if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) { ret = mm_iommu_adjust_locked_vm(mm, entries, true); @@ -134,11 +135,26 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, } down_read(&mm->mmap_sem); - ret = get_user_pages_longterm(ua, entries, FOLL_WRITE, mem->hpages, NULL); + chunk = (1UL << (PAGE_SHIFT + MAX_ORDER - 1)) / + sizeof(struct vm_area_struct *); + chunk = min(chunk, entries); + for (entry = 0; entry < entries; entry += chunk) { + unsigned long n = min(entries - entry, chunk); + + ret = get_user_pages_longterm(ua + (entry << PAGE_SHIFT), n, + FOLL_WRITE, mem->hpages + entry, NULL); + if (ret == n) { + pinned += n; + continue; + } + if (ret > 0) + pinned += ret; + break; + } up_read(&mm->mmap_sem); - pinned = ret > 0 ? ret : 0; - if (ret != entries) { - ret = -EFAULT; + if (pinned != entries) { + if (!ret) + ret = -EFAULT; goto free_exit; } -- cgit v1.2.3 From b2ecf00631362a83744e5ec249947620db5e240c Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 4 Apr 2019 20:53:28 -0400 Subject: vt: fix cursor when clearing the screen The patch a6dbe4427559 ("vt: perform safe console erase in the right order") introduced a bug. The conditional do_update_region() was replaced by a call to update_region() that does contain the conditional already, but with unwanted extra side effects such as restoring the cursor drawing. In order to reproduce the bug: - use framebuffer console with the AMDGPU driver - type "links" to start the console www browser - press 'q' and space to exit links Now the cursor will be permanently visible in the center of the screen. It will stay there until something overwrites it. The bug goes away if we change update_region() back to the conditional do_update_region(). [ nico: reworded changelog ] Signed-off-by: Mikulas Patocka Reviewed-by: Nicolas Pitre Cc: stable@vger.kernel.org Fixes: a6dbe4427559 ("vt: perform safe console erase in the right order") Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index d34984aa646d..650c66886c80 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -1520,7 +1520,8 @@ static void csi_J(struct vc_data *vc, int vpar) return; } scr_memsetw(start, vc->vc_video_erase_char, 2 * count); - update_region(vc, (unsigned long) start, count); + if (con_should_update(vc)) + do_update_region(vc, (unsigned long) start, count); vc->vc_need_wrap = 0; } -- cgit v1.2.3 From 3b9a907223d7f6b9d1dadea29436842ae9bcd76d Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Wed, 3 Apr 2019 15:58:16 -0500 Subject: ipmi: fix sleep-in-atomic in free_user at cleanup SRCU user->release_barrier free_user() could be called in atomic context. This patch pushed the free operation off into a workqueue. Example: BUG: sleeping function called from invalid context at kernel/workqueue.c:2856 in_atomic(): 1, irqs_disabled(): 0, pid: 177, name: ksoftirqd/27 CPU: 27 PID: 177 Comm: ksoftirqd/27 Not tainted 4.19.25-3 #1 Hardware name: AIC 1S-HV26-08/MB-DPSB04-06, BIOS IVYBV060 10/21/2015 Call Trace: dump_stack+0x5c/0x7b ___might_sleep+0xec/0x110 __flush_work+0x48/0x1f0 ? try_to_del_timer_sync+0x4d/0x80 _cleanup_srcu_struct+0x104/0x140 free_user+0x18/0x30 [ipmi_msghandler] ipmi_free_recv_msg+0x3a/0x50 [ipmi_msghandler] deliver_response+0xbd/0xd0 [ipmi_msghandler] deliver_local_response+0xe/0x30 [ipmi_msghandler] handle_one_recv_msg+0x163/0xc80 [ipmi_msghandler] ? dequeue_entity+0xa0/0x960 handle_new_recv_msgs+0x15c/0x1f0 [ipmi_msghandler] tasklet_action_common.isra.22+0x103/0x120 __do_softirq+0xf8/0x2d7 run_ksoftirqd+0x26/0x50 smpboot_thread_fn+0x11d/0x1e0 kthread+0x103/0x140 ? sort_range+0x20/0x20 ? kthread_destroy_worker+0x40/0x40 ret_from_fork+0x1f/0x40 Fixes: 77f8269606bf ("ipmi: fix use-after-free of user->release_barrier.rda") Reported-by: Konstantin Khlebnikov Signed-off-by: Corey Minyard Cc: stable@vger.kernel.org # 5.0 Cc: Yang Yingliang --- drivers/char/ipmi/ipmi_msghandler.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index e8ba67834746..00bf4b17edbf 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -214,6 +214,9 @@ struct ipmi_user { /* Does this interface receive IPMI events? */ bool gets_events; + + /* Free must run in process context for RCU cleanup. */ + struct work_struct remove_work; }; static struct ipmi_user *acquire_ipmi_user(struct ipmi_user *user, int *index) @@ -1157,6 +1160,15 @@ static int intf_err_seq(struct ipmi_smi *intf, return rv; } +static void free_user_work(struct work_struct *work) +{ + struct ipmi_user *user = container_of(work, struct ipmi_user, + remove_work); + + cleanup_srcu_struct(&user->release_barrier); + kfree(user); +} + int ipmi_create_user(unsigned int if_num, const struct ipmi_user_hndl *handler, void *handler_data, @@ -1200,6 +1212,8 @@ int ipmi_create_user(unsigned int if_num, goto out_kfree; found: + INIT_WORK(&new_user->remove_work, free_user_work); + rv = init_srcu_struct(&new_user->release_barrier); if (rv) goto out_kfree; @@ -1260,8 +1274,9 @@ EXPORT_SYMBOL(ipmi_get_smi_info); static void free_user(struct kref *ref) { struct ipmi_user *user = container_of(ref, struct ipmi_user, refcount); - cleanup_srcu_struct(&user->release_barrier); - kfree(user); + + /* SRCU cleanup must happen in task context. */ + schedule_work(&user->remove_work); } static void _ipmi_destroy_user(struct ipmi_user *user) -- cgit v1.2.3 From aa52660231410b13d237299e691c43e346e3a73f Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 16 Apr 2019 15:41:51 +0200 Subject: perf bpf: Return NULL when RB tree lookup fails in perf_env__find_bpf_prog_info() We currently don't return NULL in case we don't find the bpf_prog_info_node, fixing that. Signed-off-by: Jiri Olsa Acked-by: Song Liu Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Fixes: e4378f0cb90b ("perf bpf: Save bpf_prog_info in a rbtree in perf_env") Link: http://lkml.kernel.org/r/20190416134151.15282-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/env.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index c6351b557bb0..34a363f2e71b 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -57,9 +57,11 @@ struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, else if (prog_id > node->info_linear->info.id) n = n->rb_right; else - break; + goto out; } + node = NULL; +out: up_read(&env->bpf_progs.lock); return node; } -- cgit v1.2.3 From a93e0b2365e81e5a5b61f25e269b5dc73d242cba Mon Sep 17 00:00:00 2001 From: Song Liu Date: Tue, 16 Apr 2019 18:01:22 +0200 Subject: perf tools: Check maps for bpf programs As reported by Jiri Olsa in: "[BUG] perf: intel_pt won't display kernel function" https://lore.kernel.org/lkml/20190403143738.GB32001@krava Recent changes to support PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT broke --kallsyms option. This is because it broke test __map__is_kmodule. This patch fixes this by adding check for bpf program, so that these maps are not mistaken as kernel modules. Signed-off-by: Song Liu Reported-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Andi Kleen Cc: Andrii Nakryiko Cc: Daniel Borkmann Cc: Martin KaFai Lau Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Yonghong Song Link: http://lkml.kernel.org/r/20190416160127.30203-8-jolsa@kernel.org Fixes: 76193a94522f ("perf, bpf: Introduce PERF_RECORD_KSYMBOL") Signed-off-by: Jiri Olsa Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/map.c | 16 ++++++++++++++++ tools/perf/util/map.h | 4 +++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index e32628cd20a7..28d484ef74ae 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -261,6 +261,22 @@ bool __map__is_extra_kernel_map(const struct map *map) return kmap && kmap->name[0]; } +bool __map__is_bpf_prog(const struct map *map) +{ + const char *name; + + if (map->dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO) + return true; + + /* + * If PERF_RECORD_BPF_EVENT is not included, the dso will not have + * type of DSO_BINARY_TYPE__BPF_PROG_INFO. In such cases, we can + * guess the type based on name. + */ + name = map->dso->short_name; + return name && (strstr(name, "bpf_prog_") == name); +} + bool map__has_symbols(const struct map *map) { return dso__has_symbols(map->dso); diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 0e20749f2c55..dc93787c74f0 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -159,10 +159,12 @@ int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name, bool __map__is_kernel(const struct map *map); bool __map__is_extra_kernel_map(const struct map *map); +bool __map__is_bpf_prog(const struct map *map); static inline bool __map__is_kmodule(const struct map *map) { - return !__map__is_kernel(map) && !__map__is_extra_kernel_map(map); + return !__map__is_kernel(map) && !__map__is_extra_kernel_map(map) && + !__map__is_bpf_prog(map); } bool map__has_symbols(const struct map *map); -- cgit v1.2.3 From adc6257c4a6f23ff97dca8314fcd33828e2d8db5 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 16 Apr 2019 18:01:23 +0200 Subject: perf evlist: Fix side band thread draining Current perf_evlist__poll_thread() code could finish without draining the data. Adding the logic that makes sure we won't finish before the drain. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Andi Kleen Cc: Daniel Borkmann Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Fixes: 657ee5531903 ("perf evlist: Introduce side band thread") Link: http://lkml.kernel.org/r/20190416160127.30203-9-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 6689378ee577..51ead577533f 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1868,12 +1868,12 @@ static void *perf_evlist__poll_thread(void *arg) { struct perf_evlist *evlist = arg; bool draining = false; - int i; + int i, done = 0; + + while (!done) { + bool got_data = false; - while (draining || !(evlist->thread.done)) { - if (draining) - draining = false; - else if (evlist->thread.done) + if (evlist->thread.done) draining = true; if (!draining) @@ -1894,9 +1894,13 @@ static void *perf_evlist__poll_thread(void *arg) pr_warning("cannot locate proper evsel for the side band event\n"); perf_mmap__consume(map); + got_data = true; } perf_mmap__read_done(map); } + + if (draining && !got_data) + break; } return NULL; } -- cgit v1.2.3 From b9abbdfa88024d52c8084d8f46ea4f161606c692 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 16 Apr 2019 18:01:24 +0200 Subject: perf tools: Fix map reference counting By calling maps__insert() we assume to get 2 references on the map, which we relese within maps__remove call. However if there's already same map name, we currently don't bump the reference and can crash, like: Program received signal SIGABRT, Aborted. 0x00007ffff75e60f5 in raise () from /lib64/libc.so.6 (gdb) bt #0 0x00007ffff75e60f5 in raise () from /lib64/libc.so.6 #1 0x00007ffff75d0895 in abort () from /lib64/libc.so.6 #2 0x00007ffff75d0769 in __assert_fail_base.cold () from /lib64/libc.so.6 #3 0x00007ffff75de596 in __assert_fail () from /lib64/libc.so.6 #4 0x00000000004fc006 in refcount_sub_and_test (i=1, r=0x1224e88) at tools/include/linux/refcount.h:131 #5 refcount_dec_and_test (r=0x1224e88) at tools/include/linux/refcount.h:148 #6 map__put (map=0x1224df0) at util/map.c:299 #7 0x00000000004fdb95 in __maps__remove (map=0x1224df0, maps=0xb17d80) at util/map.c:953 #8 maps__remove (maps=0xb17d80, map=0x1224df0) at util/map.c:959 #9 0x00000000004f7d8a in map_groups__remove (map=, mg=) at util/map_groups.h:65 #10 machine__process_ksymbol_unregister (sample=, event=0x7ffff7279670, machine=) at util/machine.c:728 #11 machine__process_ksymbol (machine=, event=0x7ffff7279670, sample=) at util/machine.c:741 #12 0x00000000004fffbb in perf_session__deliver_event (session=0xb11390, event=0x7ffff7279670, tool=0x7fffffffc7b0, file_offset=13936) at util/session.c:1362 #13 0x00000000005039bb in do_flush (show_progress=false, oe=0xb17e80) at util/ordered-events.c:243 #14 __ordered_events__flush (oe=0xb17e80, how=OE_FLUSH__ROUND, timestamp=) at util/ordered-events.c:322 #15 0x00000000005005e4 in perf_session__process_user_event (session=session@entry=0xb11390, event=event@entry=0x7ffff72a4af8, ... Add the map to the list and getting the reference event if we find the map with same name. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Andi Kleen Cc: Daniel Borkmann Cc: Eric Saint-Etienne Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Fixes: 1e6285699b30 ("perf symbols: Fix slowness due to -ffunction-section") Link: http://lkml.kernel.org/r/20190416160127.30203-10-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/map.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 28d484ef74ae..ee71efb9db62 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -926,10 +926,8 @@ static void __maps__insert_name(struct maps *maps, struct map *map) rc = strcmp(m->dso->short_name, map->dso->short_name); if (rc < 0) p = &(*p)->rb_left; - else if (rc > 0) - p = &(*p)->rb_right; else - return; + p = &(*p)->rb_right; } rb_link_node(&map->rb_node_name, parent, p); rb_insert_color(&map->rb_node_name, &maps->names); -- cgit v1.2.3 From 2db7b1e0bd49d2b0e7d16949e167b1cfaf5c07cf Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 17 Apr 2019 16:55:39 +0200 Subject: perf bpf: Return NULL when RB tree lookup fails in perf_env__find_btf() We don't return NULL when we don't find the bpf_prog_info_node, fix that. Signed-off-by: Jiri Olsa Reported-by: Song Liu Acked-by: Song Liu Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Fixes: 3792cb2ff43b ("perf bpf: Save BTF in a rbtree in perf_env") Link: http://lkml.kernel.org/r/20190417145539.11669-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/env.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 34a363f2e71b..9494f9dc61ec 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -111,10 +111,12 @@ struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id) else if (btf_id > node->id) n = n->rb_right; else - break; + goto out; } + node = NULL; up_read(&env->bpf_progs.lock); +out: return node; } -- cgit v1.2.3 From 74f464e97044da33b25aaed00213914b0edf1f2e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 17 Apr 2019 08:57:48 -0600 Subject: io_uring: fix CQ overflow condition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a leftover from when the rings initially were not free flowing, and hence a test for tail + 1 == head would indicate full. Since we now let them wrap instead of mask them with the size, we need to check if they drift more than the ring size from each other. This fixes a case where we'd overwrite CQ ring entries, if the user failed to reap completions. Both cases would ultimately result in lost completions as the application violated the depth it asked for. The only difference is that before this fix we'd return invalid entries for the overflowed completions, instead of properly flagging it in the cq_ring->overflow variable. Reported-by: Stefan Bühler Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index b35300e4c9a7..f65f85d89217 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -338,7 +338,7 @@ static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx) tail = ctx->cached_cq_tail; /* See comment at the top of the file */ smp_rmb(); - if (tail + 1 == READ_ONCE(ring->r.head)) + if (tail - READ_ONCE(ring->r.head) == ring->ring_entries) return NULL; ctx->cached_cq_tail++; -- cgit v1.2.3 From a7b1a4839ff979b4dd4fb6c1ccd31af11de9ca87 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 15 Apr 2019 11:54:13 -0400 Subject: SUNRPC: Ignore queue transmission errors on successful transmission If a request transmission fails due to write space or slot unavailability errors, but the queued task then gets transmitted before it has time to process the error in call_transmit_status() or call_bc_transmit_status(), we need to suppress the transmission error code to prevent it from leaking out of the RPC layer. Reported-by: Chuck Lever Signed-off-by: Trond Myklebust Tested-by: Chuck Lever --- net/sunrpc/clnt.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 1d0395ef62c9..8ff11dc98d7f 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2081,8 +2081,8 @@ call_transmit_status(struct rpc_task *task) * test first. */ if (rpc_task_transmitted(task)) { - if (task->tk_status == 0) - xprt_request_wait_receive(task); + task->tk_status = 0; + xprt_request_wait_receive(task); return; } @@ -2167,6 +2167,9 @@ call_bc_transmit_status(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; + if (rpc_task_transmitted(task)) + task->tk_status = 0; + dprint_status(task); switch (task->tk_status) { -- cgit v1.2.3 From 19fad20d15a6494f47f85d869f00b11343ee5c78 Mon Sep 17 00:00:00 2001 From: ZhangXiaoxu Date: Tue, 16 Apr 2019 09:47:24 +0800 Subject: ipv4: set the tcp_min_rtt_wlen range from 0 to one day There is a UBSAN report as below: UBSAN: Undefined behaviour in net/ipv4/tcp_input.c:2877:56 signed integer overflow: 2147483647 * 1000 cannot be represented in type 'int' CPU: 3 PID: 0 Comm: swapper/3 Not tainted 5.1.0-rc4-00058-g582549e #1 Call Trace: dump_stack+0x8c/0xba ubsan_epilogue+0x11/0x60 handle_overflow+0x12d/0x170 ? ttwu_do_wakeup+0x21/0x320 __ubsan_handle_mul_overflow+0x12/0x20 tcp_ack_update_rtt+0x76c/0x780 tcp_clean_rtx_queue+0x499/0x14d0 tcp_ack+0x69e/0x1240 ? __wake_up_sync_key+0x2c/0x50 ? update_group_capacity+0x50/0x680 tcp_rcv_established+0x4e2/0xe10 tcp_v4_do_rcv+0x22b/0x420 tcp_v4_rcv+0xfe8/0x1190 ip_protocol_deliver_rcu+0x36/0x180 ip_local_deliver+0x15b/0x1a0 ip_rcv+0xac/0xd0 __netif_receive_skb_one_core+0x7f/0xb0 __netif_receive_skb+0x33/0xc0 netif_receive_skb_internal+0x84/0x1c0 napi_gro_receive+0x2a0/0x300 receive_buf+0x3d4/0x2350 ? detach_buf_split+0x159/0x390 virtnet_poll+0x198/0x840 ? reweight_entity+0x243/0x4b0 net_rx_action+0x25c/0x770 __do_softirq+0x19b/0x66d irq_exit+0x1eb/0x230 do_IRQ+0x7a/0x150 common_interrupt+0xf/0xf It can be reproduced by: echo 2147483647 > /proc/sys/net/ipv4/tcp_min_rtt_wlen Fixes: f672258391b42 ("tcp: track min RTT using windowed min-filter") Signed-off-by: ZhangXiaoxu Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 1 + net/ipv4/sysctl_net_ipv4.c | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index acdfb5d2bcaa..e2142fe40cda 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -422,6 +422,7 @@ tcp_min_rtt_wlen - INTEGER minimum RTT when it is moved to a longer path (e.g., due to traffic engineering). A longer window makes the filter more resistant to RTT inflations such as transient congestion. The unit is seconds. + Possible values: 0 - 86400 (1 day) Default: 300 tcp_moderate_rcvbuf - BOOLEAN diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index ba0fc4b18465..eeb4041fa5f9 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -49,6 +49,7 @@ static int ip_ping_group_range_min[] = { 0, 0 }; static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; static int comp_sack_nr_max = 255; static u32 u32_max_div_HZ = UINT_MAX / HZ; +static int one_day_secs = 24 * 3600; /* obsolete */ static int sysctl_tcp_low_latency __read_mostly; @@ -1151,7 +1152,9 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_tcp_min_rtt_wlen, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one_day_secs }, { .procname = "tcp_autocorking", -- cgit v1.2.3 From 7249c8ea227a582c14f63e9e8853eb7369122f10 Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Wed, 10 Apr 2019 10:59:45 +0300 Subject: IB/mlx5: Fix scatter to CQE in DCT QP creation When scatter to CQE is enabled on a DCT QP it corrupts the mailbox command since it tried to treat it as as QP create mailbox command instead of a DCT create command. The corrupted mailbox command causes userspace to malfunction as the device doesn't create the QP as expected. A new mlx5 capability is exposed to user-space which ensures that it will not enable the feature on DCT without this fix in the kernel. Fixes: 5d6ff1babe78 ("IB/mlx5: Support scatter to CQE for DC transport type") Signed-off-by: Guy Levi Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 2 ++ drivers/infiniband/hw/mlx5/qp.c | 11 +++++++---- include/uapi/rdma/mlx5-abi.h | 1 + 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 531ff20b32ad..241d9ead79eb 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1119,6 +1119,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (MLX5_CAP_GEN(mdev, qp_packet_based)) resp.flags |= MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE; + + resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT; } if (field_avail(typeof(resp), sw_parsing_caps, diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 7cd006da1dae..8870c350fda0 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1818,13 +1818,16 @@ static void configure_responder_scat_cqe(struct ib_qp_init_attr *init_attr, rcqe_sz = mlx5_ib_get_cqe_size(init_attr->recv_cq); - if (rcqe_sz == 128) { - MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE); + if (init_attr->qp_type == MLX5_IB_QPT_DCT) { + if (rcqe_sz == 128) + MLX5_SET(dctc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE); + return; } - if (init_attr->qp_type != MLX5_IB_QPT_DCT) - MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA32_CQE); + MLX5_SET(qpc, qpc, cs_res, + rcqe_sz == 128 ? MLX5_RES_SCAT_DATA64_CQE : + MLX5_RES_SCAT_DATA32_CQE); } static void configure_requester_scat_cqe(struct mlx5_ib_dev *dev, diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 87b3198f4b5d..f4d4010b7e3e 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -238,6 +238,7 @@ enum mlx5_ib_query_dev_resp_flags { MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP = 1 << 0, MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD = 1 << 1, MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE = 1 << 2, + MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT = 1 << 3, }; enum mlx5_ib_tunnel_offloads { -- cgit v1.2.3 From f87db4dbd52f2f8a170a2b51cb0926221ca7c9e2 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 17 Apr 2019 09:49:39 +0800 Subject: net: stmmac: Use bfsize1 in ndesc_init_rx_desc gcc warn this: drivers/net/ethernet/stmicro/stmmac/norm_desc.c: In function ndesc_init_rx_desc: drivers/net/ethernet/stmicro/stmmac/norm_desc.c:138:6: warning: variable 'bfsize1' set but not used [-Wunused-but-set-variable] Like enh_desc_init_rx_desc, we should use bfsize1 in ndesc_init_rx_desc to calculate 'p->des1' Fixes: 583e63614149 ("net: stmmac: use correct DMA buffer size in the RX descriptor") Signed-off-by: YueHaibing Reviewed-by: Aaro Koskinen Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/norm_desc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c index b7dd4e3c760d..6d690678c20e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c @@ -140,7 +140,7 @@ static void ndesc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode, p->des0 |= cpu_to_le32(RDES0_OWN); bfsize1 = min(bfsize, BUF_SIZE_2KiB - 1); - p->des1 |= cpu_to_le32(bfsize & RDES1_BUFFER1_SIZE_MASK); + p->des1 |= cpu_to_le32(bfsize1 & RDES1_BUFFER1_SIZE_MASK); if (mode == STMMAC_CHAIN_MODE) ndesc_rx_set_on_chain(p, end); -- cgit v1.2.3 From d003d772e64df08af04ee63609d47169ee82ae0e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 17 Apr 2019 14:15:00 +0100 Subject: nfp: abm: fix spelling mistake "offseting" -> "offsetting" There are a couple of spelling mistakes in NL_SET_ERR_MSG_MOD error messages. Fix these. Signed-off-by: Colin Ian King Acked-by: Jakub Kicinski Reviewed-by: Mukesh Ojha Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/abm/cls.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/abm/cls.c b/drivers/net/ethernet/netronome/nfp/abm/cls.c index 9852080cf454..ff3913085665 100644 --- a/drivers/net/ethernet/netronome/nfp/abm/cls.c +++ b/drivers/net/ethernet/netronome/nfp/abm/cls.c @@ -39,7 +39,7 @@ nfp_abm_u32_check_knode(struct nfp_abm *abm, struct tc_cls_u32_knode *knode, } if (knode->sel->off || knode->sel->offshift || knode->sel->offmask || knode->sel->offoff || knode->fshift) { - NL_SET_ERR_MSG_MOD(extack, "variable offseting not supported"); + NL_SET_ERR_MSG_MOD(extack, "variable offsetting not supported"); return false; } if (knode->sel->hoff || knode->sel->hmask) { @@ -78,7 +78,7 @@ nfp_abm_u32_check_knode(struct nfp_abm *abm, struct tc_cls_u32_knode *knode, k = &knode->sel->keys[0]; if (k->offmask) { - NL_SET_ERR_MSG_MOD(extack, "offset mask - variable offseting not supported"); + NL_SET_ERR_MSG_MOD(extack, "offset mask - variable offsetting not supported"); return false; } if (k->off) { -- cgit v1.2.3 From 27b141fc234a3670d21bd742c35d7205d03cbb3a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 17 Apr 2019 18:29:13 +0200 Subject: s390: ctcm: fix ctcm_new_device error return code clang points out that the return code from this function is undefined for one of the error paths: ../drivers/s390/net/ctcm_main.c:1595:7: warning: variable 'result' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized] if (priv->channel[direction] == NULL) { ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../drivers/s390/net/ctcm_main.c:1638:9: note: uninitialized use occurs here return result; ^~~~~~ ../drivers/s390/net/ctcm_main.c:1595:3: note: remove the 'if' if its condition is always false if (priv->channel[direction] == NULL) { ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../drivers/s390/net/ctcm_main.c:1539:12: note: initialize the variable 'result' to silence this warning int result; ^ Make it return -ENODEV here, as in the related failure cases. gcc has a known bug in underreporting some of these warnings when it has already eliminated the assignment of the return code based on some earlier optimization step. Reviewed-by: Nathan Chancellor Signed-off-by: Arnd Bergmann Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/ctcm_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c index 7617d21cb296..f63c5c871d3d 100644 --- a/drivers/s390/net/ctcm_main.c +++ b/drivers/s390/net/ctcm_main.c @@ -1595,6 +1595,7 @@ static int ctcm_new_device(struct ccwgroup_device *cgdev) if (priv->channel[direction] == NULL) { if (direction == CTCM_WRITE) channel_free(priv->channel[CTCM_READ]); + result = -ENODEV; goto out_dev; } priv->channel[direction]->netdev = dev; -- cgit v1.2.3 From ec3937107ab43f3e8b2bc9dad95710043c462ff7 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Thu, 4 Apr 2019 10:03:13 +0800 Subject: x86/mm/KASLR: Fix the size of the direct mapping section kernel_randomize_memory() uses __PHYSICAL_MASK_SHIFT to calculate the maximum amount of system RAM supported. The size of the direct mapping section is obtained from the smaller one of the below two values: (actual system RAM size + padding size) vs (max system RAM size supported) This calculation is wrong since commit b83ce5ee9147 ("x86/mm/64: Make __PHYSICAL_MASK_SHIFT always 52"). In it, __PHYSICAL_MASK_SHIFT was changed to be 52, regardless of whether the kernel is using 4-level or 5-level page tables. Thus, it will always use 4 PB as the maximum amount of system RAM, even in 4-level paging mode where it should actually be 64 TB. Thus, the size of the direct mapping section will always be the sum of the actual system RAM size plus the padding size. Even when the amount of system RAM is 64 TB, the following layout will still be used. Obviously KALSR will be weakened significantly. |____|_______actual RAM_______|_padding_|______the rest_______| 0 64TB ~120TB Instead, it should be like this: |____|_______actual RAM_______|_________the rest______________| 0 64TB ~120TB The size of padding region is controlled by CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING, which is 10 TB by default. The above issue only exists when CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING is set to a non-zero value, which is the case when CONFIG_MEMORY_HOTPLUG is enabled. Otherwise, using __PHYSICAL_MASK_SHIFT doesn't affect KASLR. Fix it by replacing __PHYSICAL_MASK_SHIFT with MAX_PHYSMEM_BITS. [ bp: Massage commit message. ] Fixes: b83ce5ee9147 ("x86/mm/64: Make __PHYSICAL_MASK_SHIFT always 52") Signed-off-by: Baoquan He Signed-off-by: Borislav Petkov Reviewed-by: Thomas Garnier Acked-by: Kirill A. Shutemov Cc: "H. Peter Anvin" Cc: Andy Lutomirski Cc: Dave Hansen Cc: Ingo Molnar Cc: Kees Cook Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: frank.ramsay@hpe.com Cc: herbert@gondor.apana.org.au Cc: kirill@shutemov.name Cc: mike.travis@hpe.com Cc: thgarnie@google.com Cc: x86-ml Cc: yamada.masahiro@socionext.com Link: https://lkml.kernel.org/r/20190417083536.GE7065@MiWiFi-R3L-srv --- arch/x86/mm/kaslr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index 3f452ffed7e9..d669c5e797e0 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -94,7 +94,7 @@ void __init kernel_randomize_memory(void) if (!kaslr_memory_enabled()) return; - kaslr_regions[0].size_tb = 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT); + kaslr_regions[0].size_tb = 1 << (MAX_PHYSMEM_BITS - TB_SHIFT); kaslr_regions[1].size_tb = VMALLOC_SIZE_TB; /* -- cgit v1.2.3 From 1c6bca6d75bca2cc47b5eafb9f7f16e368ffbeca Mon Sep 17 00:00:00 2001 From: Shahar S Matityahu Date: Mon, 15 Apr 2019 14:43:04 +0300 Subject: iwlwifi: don't panic in error path on non-msix systems The driver uses msix causes-register to handle both msix and non msix interrupts when performing sync nmi. On devices that do not support msix this register is unmapped and accessing it causes a kernel panic. Solve this by differentiating the two cases and accessing the proper causes-register in each case. Reported-by: Michal Hocko Signed-off-by: Shahar S Matityahu Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 79c1dc05f948..c4375b868901 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -3644,20 +3644,27 @@ out_no_pci: void iwl_trans_pcie_sync_nmi(struct iwl_trans *trans) { + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); unsigned long timeout = jiffies + IWL_TRANS_NMI_TIMEOUT; + u32 inta_addr, sw_err_bit; + + if (trans_pcie->msix_enabled) { + inta_addr = CSR_MSIX_HW_INT_CAUSES_AD; + sw_err_bit = MSIX_HW_INT_CAUSES_REG_SW_ERR; + } else { + inta_addr = CSR_INT; + sw_err_bit = CSR_INT_BIT_SW_ERR; + } iwl_disable_interrupts(trans); iwl_force_nmi(trans); while (time_after(timeout, jiffies)) { - u32 inta_hw = iwl_read32(trans, - CSR_MSIX_HW_INT_CAUSES_AD); + u32 inta_hw = iwl_read32(trans, inta_addr); /* Error detected by uCode */ - if (inta_hw & MSIX_HW_INT_CAUSES_REG_SW_ERR) { + if (inta_hw & sw_err_bit) { /* Clear causes register */ - iwl_write32(trans, CSR_MSIX_HW_INT_CAUSES_AD, - inta_hw & - MSIX_HW_INT_CAUSES_REG_SW_ERR); + iwl_write32(trans, inta_addr, inta_hw & sw_err_bit); break; } -- cgit v1.2.3 From 72d3c7bbc9b581e5f2a455e6f399c75626653945 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 25 Mar 2019 14:11:52 +0100 Subject: iwlwifi: mvm: don't attempt debug collection in rfkill If we fail to initialize because rfkill is enabled, then trying to do debug collection currently just fails. Prevent that in the high-level code, although we should probably also fix the lower level code to do things more carefully. It's not 100% clear that it fixes this commit, as the original dump code at the time might've been more careful. In any case, we don't really need to dump anything in this expected scenario. Signed-off-by: Johannes Berg Fixes: 7125648074e8 ("iwlwifi: add fw dump upon RT ucode start failure") Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 4 +++- drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 00a47f6f1d81..ab68b5d53ec9 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -1121,7 +1121,9 @@ int iwl_mvm_up(struct iwl_mvm *mvm) ret = iwl_mvm_load_rt_fw(mvm); if (ret) { IWL_ERR(mvm, "Failed to start RT ucode: %d\n", ret); - iwl_fw_dbg_error_collect(&mvm->fwrt, FW_DBG_TRIGGER_DRIVER); + if (ret != -ERFKILL) + iwl_fw_dbg_error_collect(&mvm->fwrt, + FW_DBG_TRIGGER_DRIVER); goto error; } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index ba27dce4c2bb..13681b03c10e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -834,7 +834,7 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, mutex_lock(&mvm->mutex); iwl_mvm_ref(mvm, IWL_MVM_REF_INIT_UCODE); err = iwl_run_init_mvm_ucode(mvm, true); - if (err) + if (err && err != -ERFKILL) iwl_fw_dbg_error_collect(&mvm->fwrt, FW_DBG_TRIGGER_DRIVER); if (!iwlmvm_mod_params.init_dbg || !err) iwl_mvm_stop_device(mvm); -- cgit v1.2.3 From b35f63972c5c67fc0f908286f7fc624137788876 Mon Sep 17 00:00:00 2001 From: Shahar S Matityahu Date: Wed, 20 Mar 2019 17:41:16 +0200 Subject: iwlwifi: dbg_ini: check debug TLV type explicitly In ini debug TLVs bit 24 is set. The driver relies on it in the memory allocation for the debug configuration. This implementation is problematic in case of a new debug TLV that is not supported yet is added and uses bit 24. In such a scenario the driver allocate space without using it which causes errors in the apply point enabling flow. Solve it by explicitly checking if a given TLV is part of the list of the supported ini debug TLVs. Signed-off-by: Shahar S Matityahu Fixes: f14cda6f3b31 ("iwlwifi: trans: parse and store debug ini TLVs") Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/file.h | 15 +++++++++------ drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c | 3 ++- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/file.h b/drivers/net/wireless/intel/iwlwifi/fw/file.h index 641c95d03b15..e06407dc088b 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/file.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/file.h @@ -93,7 +93,7 @@ struct iwl_ucode_header { } u; }; -#define IWL_UCODE_INI_TLV_GROUP BIT(24) +#define IWL_UCODE_INI_TLV_GROUP 0x1000000 /* * new TLV uCode file layout @@ -148,11 +148,14 @@ enum iwl_ucode_tlv_type { IWL_UCODE_TLV_UMAC_DEBUG_ADDRS = 54, IWL_UCODE_TLV_LMAC_DEBUG_ADDRS = 55, IWL_UCODE_TLV_FW_RECOVERY_INFO = 57, - IWL_UCODE_TLV_TYPE_BUFFER_ALLOCATION = IWL_UCODE_INI_TLV_GROUP | 0x1, - IWL_UCODE_TLV_TYPE_HCMD = IWL_UCODE_INI_TLV_GROUP | 0x2, - IWL_UCODE_TLV_TYPE_REGIONS = IWL_UCODE_INI_TLV_GROUP | 0x3, - IWL_UCODE_TLV_TYPE_TRIGGERS = IWL_UCODE_INI_TLV_GROUP | 0x4, - IWL_UCODE_TLV_TYPE_DEBUG_FLOW = IWL_UCODE_INI_TLV_GROUP | 0x5, + + IWL_UCODE_TLV_TYPE_BUFFER_ALLOCATION = IWL_UCODE_INI_TLV_GROUP + 0x1, + IWL_UCODE_TLV_DEBUG_BASE = IWL_UCODE_TLV_TYPE_BUFFER_ALLOCATION, + IWL_UCODE_TLV_TYPE_HCMD = IWL_UCODE_INI_TLV_GROUP + 0x2, + IWL_UCODE_TLV_TYPE_REGIONS = IWL_UCODE_INI_TLV_GROUP + 0x3, + IWL_UCODE_TLV_TYPE_TRIGGERS = IWL_UCODE_INI_TLV_GROUP + 0x4, + IWL_UCODE_TLV_TYPE_DEBUG_FLOW = IWL_UCODE_INI_TLV_GROUP + 0x5, + IWL_UCODE_TLV_DEBUG_MAX = IWL_UCODE_TLV_TYPE_DEBUG_FLOW, /* TLVs 0x1000-0x2000 are for internal driver usage */ IWL_UCODE_TLV_FW_DBG_DUMP_LST = 0x1000, diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c index 5798f434f68f..c7070760a10a 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c @@ -126,7 +126,8 @@ void iwl_alloc_dbg_tlv(struct iwl_trans *trans, size_t len, const u8 *data, len -= ALIGN(tlv_len, 4); data += sizeof(*tlv) + ALIGN(tlv_len, 4); - if (!(tlv_type & IWL_UCODE_INI_TLV_GROUP)) + if (tlv_type < IWL_UCODE_TLV_DEBUG_BASE || + tlv_type > IWL_UCODE_TLV_DEBUG_MAX) continue; hdr = (void *)&tlv->data[0]; -- cgit v1.2.3 From 154d4899e4111ae24e68d6ba955f46856cb046bc Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 30 Mar 2019 10:31:52 +0100 Subject: iwlwifi: mvm: properly check debugfs dentry before using it debugfs can now report an error code if something went wrong instead of just NULL. So if the return value is to be used as a "real" dentry, it needs to be checked if it is an error before dereferencing it. This is now happening because of ff9fb72bc077 ("debugfs: return error values, not NULL"). If multiple iwlwifi devices are in the system, this can cause problems when the driver attempts to create the main debugfs directory again. Later on in the code we fail horribly by trying to dereference a pointer that is an error value. Reported-by: Laura Abbott Reported-by: Gabriel Ramirez Cc: Johannes Berg Cc: Emmanuel Grumbach Cc: Luca Coelho Cc: Intel Linux Wireless Cc: Kalle Valo Cc: stable # 5.0 Signed-off-by: Greg Kroah-Hartman Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c index 2453ceabf00d..738eddb2e7ac 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c @@ -774,6 +774,11 @@ void iwl_mvm_vif_dbgfs_register(struct iwl_mvm *mvm, struct ieee80211_vif *vif) return; mvmvif->dbgfs_dir = debugfs_create_dir("iwlmvm", dbgfs_dir); + if (IS_ERR_OR_NULL(mvmvif->dbgfs_dir)) { + IWL_ERR(mvm, "Failed to create debugfs directory under %pd\n", + dbgfs_dir); + return; + } if (!mvmvif->dbgfs_dir) { IWL_ERR(mvm, "Failed to create debugfs directory under %pd\n", -- cgit v1.2.3 From c537e07b000bc00c9a5ac9d119ed2c8456a99b6e Mon Sep 17 00:00:00 2001 From: Shaul Triebitz Date: Tue, 16 Apr 2019 21:02:52 +0300 Subject: iwlwifi: cfg: use family 22560 based_params for AX210 family Specifically, max_tfd_queue_size should be 0x10000 like in 22560 family and not 0x100 like in 22000 family. Signed-off-by: Shaul Triebitz Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/cfg/22000.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c index eb6defb6d0cd..0a87d87fbb4f 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c @@ -201,7 +201,7 @@ static const struct iwl_ht_params iwl_22000_ht_params = { #define IWL_DEVICE_AX210 \ IWL_DEVICE_AX200_COMMON, \ .device_family = IWL_DEVICE_FAMILY_AX210, \ - .base_params = &iwl_22000_base_params, \ + .base_params = &iwl_22560_base_params, \ .csr = &iwl_csr_v1, \ .min_txq_size = 128 -- cgit v1.2.3 From 3fe3331bb285700ab2253dbb07f8e478fcea2f1b Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Thu, 21 Mar 2019 21:15:22 +0000 Subject: perf/x86/amd: Add event map for AMD Family 17h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Family 17h differs from prior families by: - Does not support an L2 cache miss event - It has re-enumerated PMC counters for: - L2 cache references - front & back end stalled cycles So we add a new amd_f17h_perfmon_event_map[] so that the generic perf event names will resolve to the correct h/w events on family 17h and above processors. Reference sections 2.1.13.3.3 (stalls) and 2.1.13.3.6 (L2): https://www.amd.com/system/files/TechDocs/54945_PPR_Family_17h_Models_00h-0Fh.pdf Signed-off-by: Kim Phillips Cc: # v4.9+ Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Janakarajan Natarajan Cc: Jiri Olsa Cc: Linus Torvalds Cc: Martin Liška Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Pu Wen Cc: Suravee Suthikulpanit Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Fixes: e40ed1542dd7 ("perf/x86: Add perf support for AMD family-17h processors") [ Improved the formatting a bit. ] Signed-off-by: Ingo Molnar --- arch/x86/events/amd/core.c | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 0ecfac84ba91..d45f3fbd232e 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -117,22 +117,39 @@ static __initconst const u64 amd_hw_cache_event_ids }; /* - * AMD Performance Monitor K7 and later. + * AMD Performance Monitor K7 and later, up to and including Family 16h: */ static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, - [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x077d, - [PERF_COUNT_HW_CACHE_MISSES] = 0x077e, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, - [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, - [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */ - [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */ + [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x077d, + [PERF_COUNT_HW_CACHE_MISSES] = 0x077e, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */ + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */ +}; + +/* + * AMD Performance Monitor Family 17h and later: + */ +static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] = +{ + [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x0287, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187, }; static u64 amd_pmu_event_map(int hw_event) { + if (boot_cpu_data.x86 >= 0x17) + return amd_f17h_perfmon_event_map[hw_event]; + return amd_perfmon_event_map[hw_event]; } -- cgit v1.2.3 From 3f2552f7e9c5abef2775c53f7af66532f8bf65bc Mon Sep 17 00:00:00 2001 From: Chang-An Chen Date: Fri, 29 Mar 2019 10:59:09 +0800 Subject: timers/sched_clock: Prevent generic sched_clock wrap caused by tick_freeze() tick_freeze() introduced by suspend-to-idle in commit 124cf9117c5f ("PM / sleep: Make it possible to quiesce timers during suspend-to-idle") uses timekeeping_suspend() instead of syscore_suspend() during suspend-to-idle. As a consequence generic sched_clock will keep going because sched_clock_suspend() and sched_clock_resume() are not invoked during suspend-to-idle which can result in a generic sched_clock wrap. On a ARM system with suspend-to-idle enabled, sched_clock is registered as "56 bits at 13MHz, resolution 76ns, wraps every 4398046511101ns", which means the real wrapping duration is 8796093022202ns. [ 134.551779] suspend-to-idle suspend (timekeeping_suspend()) [ 1204.912239] suspend-to-idle resume (timekeeping_resume()) ...... [ 1206.912239] suspend-to-idle suspend (timekeeping_suspend()) [ 5880.502807] suspend-to-idle resume (timekeeping_resume()) ...... [ 6000.403724] suspend-to-idle suspend (timekeeping_suspend()) [ 8035.753167] suspend-to-idle resume (timekeeping_resume()) ...... [ 8795.786684] (2)[321:charger_thread]...... [ 8795.788387] (2)[321:charger_thread]...... [ 0.057226] (0)[0:swapper/0]...... [ 0.061447] (2)[0:swapper/2]...... sched_clock was not stopped during suspend-to-idle, and sched_clock_poll hrtimer was not expired because timekeeping_suspend() was invoked during suspend-to-idle. It makes sched_clock wrap at kernel time 8796s. To prevent this, invoke sched_clock_suspend() and sched_clock_resume() in tick_freeze() together with timekeeping_suspend() and timekeeping_resume(). Fixes: 124cf9117c5f (PM / sleep: Make it possible to quiesce timers during suspend-to-idle) Signed-off-by: Chang-An Chen Signed-off-by: Thomas Gleixner Cc: Frederic Weisbecker Cc: Matthias Brugger Cc: John Stultz Cc: Kees Cook Cc: Corey Minyard Cc: Cc: Cc: Stanley Chu Cc: Cc: Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1553828349-8914-1-git-send-email-chang-an.chen@mediatek.com --- kernel/time/sched_clock.c | 4 ++-- kernel/time/tick-common.c | 2 ++ kernel/time/timekeeping.h | 7 +++++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 094b82ca95e5..930113b9799a 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -272,7 +272,7 @@ static u64 notrace suspended_sched_clock_read(void) return cd.read_data[seq & 1].epoch_cyc; } -static int sched_clock_suspend(void) +int sched_clock_suspend(void) { struct clock_read_data *rd = &cd.read_data[0]; @@ -283,7 +283,7 @@ static int sched_clock_suspend(void) return 0; } -static void sched_clock_resume(void) +void sched_clock_resume(void) { struct clock_read_data *rd = &cd.read_data[0]; diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 529143b4c8d2..df401463a191 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -487,6 +487,7 @@ void tick_freeze(void) trace_suspend_resume(TPS("timekeeping_freeze"), smp_processor_id(), true); system_state = SYSTEM_SUSPEND; + sched_clock_suspend(); timekeeping_suspend(); } else { tick_suspend_local(); @@ -510,6 +511,7 @@ void tick_unfreeze(void) if (tick_freeze_depth == num_online_cpus()) { timekeeping_resume(); + sched_clock_resume(); system_state = SYSTEM_RUNNING; trace_suspend_resume(TPS("timekeeping_freeze"), smp_processor_id(), false); diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h index 7a9b4eb7a1d5..141ab3ab0354 100644 --- a/kernel/time/timekeeping.h +++ b/kernel/time/timekeeping.h @@ -14,6 +14,13 @@ extern u64 timekeeping_max_deferment(void); extern void timekeeping_warp_clock(void); extern int timekeeping_suspend(void); extern void timekeeping_resume(void); +#ifdef CONFIG_GENERIC_SCHED_CLOCK +extern int sched_clock_suspend(void); +extern void sched_clock_resume(void); +#else +static inline int sched_clock_suspend(void) { return 0; } +static inline void sched_clock_resume(void) { } +#endif extern void do_timer(unsigned long ticks); extern void update_wall_time(void); -- cgit v1.2.3 From 44427c0fbc09b448b22410978a4ef6ee37599d25 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Apr 2019 14:35:19 +0800 Subject: crypto: xts - Fix atomic sleep when walking skcipher When we perform a walk in the completion function, we need to ensure that it is atomic. Reported-by: syzbot+6f72c20560060c98b566@syzkaller.appspotmail.com Fixes: 78105c7e769b ("crypto: xts - Drop use of auxiliary buffer") Cc: Signed-off-by: Herbert Xu Acked-by: Ondrej Mosnacek Signed-off-by: Herbert Xu --- crypto/xts.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/crypto/xts.c b/crypto/xts.c index 847f54f76789..2f948328cabb 100644 --- a/crypto/xts.c +++ b/crypto/xts.c @@ -137,8 +137,12 @@ static void crypt_done(struct crypto_async_request *areq, int err) { struct skcipher_request *req = areq->data; - if (!err) + if (!err) { + struct rctx *rctx = skcipher_request_ctx(req); + + rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; err = xor_tweak_post(req); + } skcipher_request_complete(req, err); } -- cgit v1.2.3 From b257b48cd5830c5b1d0c347eb281f9c28056f881 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Apr 2019 14:37:34 +0800 Subject: crypto: lrw - Fix atomic sleep when walking skcipher When we perform a walk in the completion function, we need to ensure that it is atomic. Fixes: ac3c8f36c31d ("crypto: lrw - Do not use auxiliary buffer") Cc: Signed-off-by: Herbert Xu Acked-by: Ondrej Mosnacek Signed-off-by: Herbert Xu --- crypto/lrw.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/crypto/lrw.c b/crypto/lrw.c index 0430ccd08728..08a0e458bc3e 100644 --- a/crypto/lrw.c +++ b/crypto/lrw.c @@ -212,8 +212,12 @@ static void crypt_done(struct crypto_async_request *areq, int err) { struct skcipher_request *req = areq->data; - if (!err) + if (!err) { + struct rctx *rctx = skcipher_request_ctx(req); + + rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; err = xor_tweak_post(req); + } skcipher_request_complete(req, err); } -- cgit v1.2.3 From 71adf60f0a925c0e0c7dc2d0311fe40b825be737 Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Thu, 18 Apr 2019 15:27:25 +0200 Subject: drm/sun4i: Add missing drm_atomic_helper_shutdown at driver unbind A call to drm_atomic_helper_shutdown is required to properly release the internal references taken by the core and avoid warnings about leaking objects. Add it since it was missing. Fixes: 9026e0d122ac ("drm: Add Allwinner A10 Display Engine support") Signed-off-by: Paul Kocialkowski Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20190418132727.5128-2-paul.kocialkowski@bootlin.com --- drivers/gpu/drm/sun4i/sun4i_drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index 3ebd9f5e2719..c5871b15714f 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -144,6 +145,7 @@ static void sun4i_drv_unbind(struct device *dev) drm_dev_unregister(drm); drm_kms_helper_poll_fini(drm); + drm_atomic_helper_shutdown(drm); drm_mode_config_cleanup(drm); of_reserved_mem_device_release(dev); drm_dev_put(drm); -- cgit v1.2.3 From 02b92adbe33e6dbd15dc6e32540b22f47c4ff0a2 Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Thu, 18 Apr 2019 15:27:26 +0200 Subject: drm/sun4i: Set device driver data at bind time for use in unbind Our sun4i_drv_unbind gets the drm device using dev_get_drvdata. However, that driver data is never set in sun4i_drv_bind. Set it there to avoid getting a NULL pointer at unbind time. Fixes: 9026e0d122ac ("drm: Add Allwinner A10 Display Engine support") Signed-off-by: Paul Kocialkowski Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20190418132727.5128-3-paul.kocialkowski@bootlin.com --- drivers/gpu/drm/sun4i/sun4i_drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index c5871b15714f..8abaabde08ab 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -86,6 +86,8 @@ static int sun4i_drv_bind(struct device *dev) ret = -ENOMEM; goto free_drm; } + + dev_set_drvdata(dev, drm); drm->dev_private = drv; INIT_LIST_HEAD(&drv->frontend_list); INIT_LIST_HEAD(&drv->engine_list); -- cgit v1.2.3 From f5a9ed867c83875546c9aadd4ed8e785e9adcc3c Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Thu, 18 Apr 2019 15:27:27 +0200 Subject: drm/sun4i: Fix component unbinding and component master deletion For our component-backed driver to be properly removed, we need to delete the component master in sun4i_drv_remove and make sure to call component_unbind_all in the master's unbind so that all components are unbound when the master is. Fixes: 9026e0d122ac ("drm: Add Allwinner A10 Display Engine support") Signed-off-by: Paul Kocialkowski Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20190418132727.5128-4-paul.kocialkowski@bootlin.com --- drivers/gpu/drm/sun4i/sun4i_drv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index 8abaabde08ab..843b86661833 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -151,6 +151,8 @@ static void sun4i_drv_unbind(struct device *dev) drm_mode_config_cleanup(drm); of_reserved_mem_device_release(dev); drm_dev_put(drm); + + component_unbind_all(dev, NULL); } static const struct component_master_ops sun4i_drv_master_ops = { @@ -399,6 +401,8 @@ static int sun4i_drv_probe(struct platform_device *pdev) static int sun4i_drv_remove(struct platform_device *pdev) { + component_master_del(&pdev->dev, &sun4i_drv_master_ops); + return 0; } -- cgit v1.2.3 From 738a7832d21e3d911fcddab98ce260b79010b461 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 18 Apr 2019 12:18:39 +0200 Subject: signal: use fdget() since we don't allow O_PATH As stated in the original commit for pidfd_send_signal() we don't allow to signal processes through O_PATH file descriptors since it is semantically equivalent to a write on the pidfd. We already correctly error out right now and return EBADF if an O_PATH fd is passed. This is because we use file->f_op to detect whether a pidfd is passed and O_PATH fds have their file->f_op set to empty_fops in do_dentry_open() and thus fail the test. Thus, there is no regression. It's just semantically correct to use fdget() and return an error right from there instead of taking a reference and returning an error later. Signed-off-by: Christian Brauner Acked-by: Oleg Nesterov Cc: Arnd Bergmann Cc: "Eric W. Biederman" Cc: Kees Cook Cc: Thomas Gleixner Cc: Jann Horn Cc: David Howells Cc: "Michael Kerrisk (man-pages)" Cc: Andy Lutomirsky Cc: Andrew Morton Cc: Oleg Nesterov Cc: Aleksa Sarai Cc: Al Viro Signed-off-by: Linus Torvalds --- kernel/signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/signal.c b/kernel/signal.c index f98448cf2def..227ba170298e 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -3581,7 +3581,7 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig, if (flags) return -EINVAL; - f = fdget_raw(pidfd); + f = fdget(pidfd); if (!f.file) return -EBADF; -- cgit v1.2.3 From ff8acf929014b7f87315588e0daf8597c8aa9d1c Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 17 Apr 2019 00:21:21 -0700 Subject: arm64: futex: Restore oldval initialization to work around buggy compilers Commit 045afc24124d ("arm64: futex: Fix FUTEX_WAKE_OP atomic ops with non-zero result value") removed oldval's zero initialization in arch_futex_atomic_op_inuser because it is not necessary. Unfortunately, Android's arm64 GCC 4.9.4 [1] does not agree: ../kernel/futex.c: In function 'do_futex': ../kernel/futex.c:1658:17: warning: 'oldval' may be used uninitialized in this function [-Wmaybe-uninitialized] return oldval == cmparg; ^ In file included from ../kernel/futex.c:73:0: ../arch/arm64/include/asm/futex.h:53:6: note: 'oldval' was declared here int oldval, ret, tmp; ^ GCC fails to follow that when ret is non-zero, futex_atomic_op_inuser returns right away, avoiding the uninitialized use that it claims. Restoring the zero initialization works around this issue. [1]: https://android.googlesource.com/platform/prebuilts/gcc/linux-x86/aarch64/aarch64-linux-android-4.9/ Cc: stable@vger.kernel.org Fixes: 045afc24124d ("arm64: futex: Fix FUTEX_WAKE_OP atomic ops with non-zero result value") Reviewed-by: Greg Kroah-Hartman Signed-off-by: Nathan Chancellor Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/futex.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index e1d95f08f8e1..c7e1a7837706 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -50,7 +50,7 @@ do { \ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *_uaddr) { - int oldval, ret, tmp; + int oldval = 0, ret, tmp; u32 __user *uaddr = __uaccess_mask_ptr(_uaddr); pagefault_disable(); -- cgit v1.2.3 From f476b3f809fa02f47af6333ed63715058c3fc348 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 18 Apr 2019 07:14:13 +0000 Subject: mlxsw: spectrum: Put MC TCs into DWRR mode Both Spectrum-1 and Spectrum-2 chips are currently configured such that pairs of TC n (which is used for UC traffic) and TC n+8 (which is used for MC traffic) are feeding into the same subgroup. Strict prioritization is configured between the two TCs, and by enabling MC-aware mode on the switch, the lower-numbered (UC) TCs are favored over the higher-numbered (MC) TCs. On Spectrum-2 however, there is an issue in configuration of the MC-aware mode. As a result, MC traffic is prioritized over UC traffic. To work around the issue, configure the MC TCs with DWRR mode (while keeping the UC TCs in strict mode). With this patch, the multicast-unicast arbitration results in the same behavior on both Spectrum-1 and Spectrum-2 chips. Fixes: 7b8195306694 ("mlxsw: spectrum: Configure MC-aware mode on mlxsw ports") Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 9eb63300c1d3..3745ea194632 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3316,7 +3316,7 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) err = mlxsw_sp_port_ets_set(mlxsw_sp_port, MLXSW_REG_QEEC_HIERARCY_TC, i + 8, i, - false, 0); + true, 100); if (err) return err; } -- cgit v1.2.3 From 1ab3030193d25878b3b1409060e1e0a879800c95 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 18 Apr 2019 07:14:14 +0000 Subject: mlxsw: pci: Reincrease PCI reset timeout During driver initialization the driver sends a reset to the device and waits for the firmware to signal that it is ready to continue. Commit d2f372ba0914 ("mlxsw: pci: Increase PCI SW reset timeout") increased the timeout to 13 seconds due to longer PHY calibration in Spectrum-2 compared to Spectrum-1. Recently it became apparent that this timeout is too short and therefore this patch increases it again to a safer limit that will be reduced in the future. Fixes: c3ab435466d5 ("mlxsw: spectrum: Extend to support Spectrum-2 ASIC") Fixes: d2f372ba0914 ("mlxsw: pci: Increase PCI SW reset timeout") Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/pci_hw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h index ffee38e36ce8..8648ca171254 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h @@ -27,7 +27,7 @@ #define MLXSW_PCI_SW_RESET 0xF0010 #define MLXSW_PCI_SW_RESET_RST_BIT BIT(0) -#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 13000 +#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 20000 #define MLXSW_PCI_SW_RESET_WAIT_MSECS 100 #define MLXSW_PCI_FW_READY 0xA1844 #define MLXSW_PCI_FW_READY_MASK 0xFFFF -- cgit v1.2.3 From 151f0dddbbfe4c35c9c5b64873115aafd436af9d Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Thu, 18 Apr 2019 07:14:16 +0000 Subject: mlxsw: spectrum: Fix autoneg status in ethtool If link is down and autoneg is set to on/off, the status in ethtool does not change. The reason is when the link is down the function returns with zero before changing autoneg value. Move the checking of link state (up/down) to be performed after setting autoneg value, in order to be sure that autoneg will change in any case. Fixes: 56ade8fe3fe1 ("mlxsw: spectrum: Add initial support for Spectrum ASIC") Signed-off-by: Amit Cohen Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 3745ea194632..6b8aa3761899 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3126,11 +3126,11 @@ mlxsw_sp_port_set_link_ksettings(struct net_device *dev, if (err) return err; + mlxsw_sp_port->link.autoneg = autoneg; + if (!netif_running(dev)) return 0; - mlxsw_sp_port->link.autoneg = autoneg; - mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false); mlxsw_sp_port_admin_status_set(mlxsw_sp_port, true); -- cgit v1.2.3 From d5f6db353829fe3867bbf9cd73fd8d631e2346f1 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 18 Apr 2019 11:39:18 +0100 Subject: net: ipv6: addrlabel: fix spelling mistake "requewst" -> "request" There is a spelling mistake in a NL_SET_ERR_MSG_MOD error message, fix it. Signed-off-by: Colin Ian King Reviewed-by: Mukesh Ojha Signed-off-by: David S. Miller --- net/ipv6/addrlabel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index d43d076c98f5..1766325423b5 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -476,7 +476,7 @@ static int ip6addrlbl_valid_dump_req(const struct nlmsghdr *nlh, } if (nlmsg_attrlen(nlh, sizeof(*ifal))) { - NL_SET_ERR_MSG_MOD(extack, "Invalid data after header for address label dump requewst"); + NL_SET_ERR_MSG_MOD(extack, "Invalid data after header for address label dump request"); return -EINVAL; } -- cgit v1.2.3 From a7cf2cc3cd3622eae9d5619cdde56b4462398c7f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 18 Apr 2019 18:03:50 +0100 Subject: firestream: fix spelling mistake "tramsitted" -> "transmitted" There is a spelling mistake in a debug message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/atm/firestream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c index 11e1663bdc4d..b2c06da4f62e 100644 --- a/drivers/atm/firestream.c +++ b/drivers/atm/firestream.c @@ -1646,7 +1646,7 @@ static irqreturn_t fs_irq (int irq, void *dev_id) } if (status & ISR_TBRQ_W) { - fs_dprintk (FS_DEBUG_IRQ, "Data tramsitted!\n"); + fs_dprintk (FS_DEBUG_IRQ, "Data transmitted!\n"); process_txdone_queue (dev, &dev->tx_relq); } -- cgit v1.2.3 From e0c1d14a1a3211dccf0540a6703ffbd5d2a75bdb Mon Sep 17 00:00:00 2001 From: Su Bao Cheng Date: Thu, 18 Apr 2019 11:14:56 +0200 Subject: stmmac: pci: Adjust IOT2000 matching Since there are more IOT2040 variants with identical hardware but different asset tags, the asset tag matching should be adjusted to support them. For the board name "SIMATIC IOT2000", currently there are 2 types of hardware, IOT2020 and IOT2040. The IOT2020 is identified by its unique asset tag. Match on it first. If we then match on the board name only, we will catch all IOT2040 variants. In the future there will be no other devices with the "SIMATIC IOT2000" DMI board name but different hardware. Signed-off-by: Su Bao Cheng Reviewed-by: Jan Kiszka Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c index d819e8eaba12..cc1e887e47b5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c @@ -159,6 +159,12 @@ static const struct dmi_system_id quark_pci_dmi[] = { }, .driver_data = (void *)&galileo_stmmac_dmi_data, }, + /* + * There are 2 types of SIMATIC IOT2000: IOT20202 and IOT2040. + * The asset tag "6ES7647-0AA00-0YA2" is only for IOT2020 which + * has only one pci network device while other asset tags are + * for IOT2040 which has two. + */ { .matches = { DMI_EXACT_MATCH(DMI_BOARD_NAME, "SIMATIC IOT2000"), @@ -170,8 +176,6 @@ static const struct dmi_system_id quark_pci_dmi[] = { { .matches = { DMI_EXACT_MATCH(DMI_BOARD_NAME, "SIMATIC IOT2000"), - DMI_EXACT_MATCH(DMI_BOARD_ASSET_TAG, - "6ES7647-0AA00-1YA2"), }, .driver_data = (void *)&iot2040_stmmac_dmi_data, }, -- cgit v1.2.3 From 9188d5ca454fd665145904267e726e9e8d122f5c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 17 Apr 2019 10:51:19 -0700 Subject: net/tls: fix refcount adjustment in fallback Unlike atomic_add(), refcount_add() does not deal well with a negative argument. TLS fallback code reallocates the skb and is very likely to shrink the truesize, leading to: [ 189.513254] WARNING: CPU: 5 PID: 0 at lib/refcount.c:81 refcount_add_not_zero_checked+0x15c/0x180 Call Trace: refcount_add_checked+0x6/0x40 tls_enc_skb+0xb93/0x13e0 [tls] Once wmem_allocated count saturates the application can no longer send data on the socket. This is similar to Eric's fixes for GSO, TCP: commit 7ec318feeed1 ("tcp: gso: avoid refcount_t warning from tcp_gso_segment()") and UDP: commit 575b65bc5bff ("udp: avoid refcount_t saturation in __udp_gso_segment()"). Unlike the GSO case, for TLS fallback it's likely that the skb has shrunk, so the "likely" annotation is the other way around (likely branch being "sub"). Fixes: e8f69799810c ("net/tls: Add generic NIC offload infrastructure") Signed-off-by: Jakub Kicinski Reviewed-by: John Hurley Signed-off-by: David S. Miller --- net/tls/tls_device_fallback.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index 54c3a758f2a7..a3ebd4b02714 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -194,6 +194,9 @@ static void update_chksum(struct sk_buff *skb, int headln) static void complete_skb(struct sk_buff *nskb, struct sk_buff *skb, int headln) { + struct sock *sk = skb->sk; + int delta; + skb_copy_header(nskb, skb); skb_put(nskb, skb->len); @@ -201,11 +204,15 @@ static void complete_skb(struct sk_buff *nskb, struct sk_buff *skb, int headln) update_chksum(nskb, headln); nskb->destructor = skb->destructor; - nskb->sk = skb->sk; + nskb->sk = sk; skb->destructor = NULL; skb->sk = NULL; - refcount_add(nskb->truesize - skb->truesize, - &nskb->sk->sk_wmem_alloc); + + delta = nskb->truesize - skb->truesize; + if (likely(delta < 0)) + WARN_ON_ONCE(refcount_sub_and_test(-delta, &sk->sk_wmem_alloc)); + else if (delta) + refcount_add(delta, &sk->sk_wmem_alloc); } /* This function may be called after the user socket is already -- cgit v1.2.3 From 0228034d8e5915b98c33db35a98f5e909e848ae9 Mon Sep 17 00:00:00 2001 From: Saurav Kashyap Date: Thu, 18 Apr 2019 03:40:12 -0700 Subject: Revert "scsi: fcoe: clear FC_RP_STARTED flags when receiving a LOGO" This patch clears FC_RP_STARTED flag during logoff, because of this re-login(flogi) didn't happen to the switch. This reverts commit 1550ec458e0cf1a40a170ab1f4c46e3f52860f65. Fixes: 1550ec458e0c ("scsi: fcoe: clear FC_RP_STARTED flags when receiving a LOGO") Cc: # v4.18+ Signed-off-by: Saurav Kashyap Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/libfc/fc_rport.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c index dfba4921b265..5bf61431434b 100644 --- a/drivers/scsi/libfc/fc_rport.c +++ b/drivers/scsi/libfc/fc_rport.c @@ -2162,7 +2162,6 @@ static void fc_rport_recv_logo_req(struct fc_lport *lport, struct fc_frame *fp) FC_RPORT_DBG(rdata, "Received LOGO request while in state %s\n", fc_rport_state(rdata)); - rdata->flags &= ~FC_RP_STARTED; fc_rport_enter_delete(rdata, RPORT_EV_STOP); mutex_unlock(&rdata->rp_mutex); kref_put(&rdata->kref, fc_rport_destroy); -- cgit v1.2.3 From 144ec97493af34efdb77c5aba146e9c7de8d0a06 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 18 Apr 2019 18:13:58 +0200 Subject: scsi: aic7xxx: fix EISA support Instead of relying on the now removed NULL argument to pci_alloc_consistent, switch to the generic DMA API, and store the struct device so that we can pass it. Fixes: 4167b2ad5182 ("PCI: Remove NULL device handling from PCI DMA API") Reported-by: Matthew Whitehead Signed-off-by: Christoph Hellwig Tested-by: Matthew Whitehead Signed-off-by: Martin K. Petersen --- drivers/scsi/aic7xxx/aic7770_osm.c | 1 + drivers/scsi/aic7xxx/aic7xxx.h | 1 + drivers/scsi/aic7xxx/aic7xxx_osm.c | 10 ++++------ drivers/scsi/aic7xxx/aic7xxx_osm_pci.c | 1 + 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/aic7xxx/aic7770_osm.c b/drivers/scsi/aic7xxx/aic7770_osm.c index 3d401d02c019..bdd177e3d762 100644 --- a/drivers/scsi/aic7xxx/aic7770_osm.c +++ b/drivers/scsi/aic7xxx/aic7770_osm.c @@ -91,6 +91,7 @@ aic7770_probe(struct device *dev) ahc = ahc_alloc(&aic7xxx_driver_template, name); if (ahc == NULL) return (ENOMEM); + ahc->dev = dev; error = aic7770_config(ahc, aic7770_ident_table + edev->id.driver_data, eisaBase); if (error != 0) { diff --git a/drivers/scsi/aic7xxx/aic7xxx.h b/drivers/scsi/aic7xxx/aic7xxx.h index 5614921b4041..88b90f9806c9 100644 --- a/drivers/scsi/aic7xxx/aic7xxx.h +++ b/drivers/scsi/aic7xxx/aic7xxx.h @@ -943,6 +943,7 @@ struct ahc_softc { * Platform specific device information. */ ahc_dev_softc_t dev_softc; + struct device *dev; /* * Bus specific device information. diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.c b/drivers/scsi/aic7xxx/aic7xxx_osm.c index 3c9c17450bb3..d5c4a0d23706 100644 --- a/drivers/scsi/aic7xxx/aic7xxx_osm.c +++ b/drivers/scsi/aic7xxx/aic7xxx_osm.c @@ -860,8 +860,8 @@ int ahc_dmamem_alloc(struct ahc_softc *ahc, bus_dma_tag_t dmat, void** vaddr, int flags, bus_dmamap_t *mapp) { - *vaddr = pci_alloc_consistent(ahc->dev_softc, - dmat->maxsize, mapp); + /* XXX: check if we really need the GFP_ATOMIC and unwind this mess! */ + *vaddr = dma_alloc_coherent(ahc->dev, dmat->maxsize, mapp, GFP_ATOMIC); if (*vaddr == NULL) return ENOMEM; return 0; @@ -871,8 +871,7 @@ void ahc_dmamem_free(struct ahc_softc *ahc, bus_dma_tag_t dmat, void* vaddr, bus_dmamap_t map) { - pci_free_consistent(ahc->dev_softc, dmat->maxsize, - vaddr, map); + dma_free_coherent(ahc->dev, dmat->maxsize, vaddr, map); } int @@ -1123,8 +1122,7 @@ ahc_linux_register_host(struct ahc_softc *ahc, struct scsi_host_template *templa host->transportt = ahc_linux_transport_template; - retval = scsi_add_host(host, - (ahc->dev_softc ? &ahc->dev_softc->dev : NULL)); + retval = scsi_add_host(host, ahc->dev); if (retval) { printk(KERN_WARNING "aic7xxx: scsi_add_host failed\n"); scsi_host_put(host); diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm_pci.c b/drivers/scsi/aic7xxx/aic7xxx_osm_pci.c index 0fc14dac7070..717d8d1082ce 100644 --- a/drivers/scsi/aic7xxx/aic7xxx_osm_pci.c +++ b/drivers/scsi/aic7xxx/aic7xxx_osm_pci.c @@ -250,6 +250,7 @@ ahc_linux_pci_dev_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } } ahc->dev_softc = pci; + ahc->dev = &pci->dev; error = ahc_pci_config(ahc, entry); if (error != 0) { ahc_free(ahc); -- cgit v1.2.3 From c53051128bb0e8754e13345d782ca69e5e1ce36d Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Thu, 18 Apr 2019 10:01:55 +0800 Subject: sc16is7xx: put err_spi and err_i2c into correct #ifdef err_spi is only called within SERIAL_SC16IS7XX_SPI while err_i2c is called inside SERIAL_SC16IS7XX_I2C. So we need to put err_spi and err_i2c into each #ifdef accordingly. This change fixes ("sc16is7xx: move label 'err_spi' to correct section"). Signed-off-by: Guoqing Jiang Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sc16is7xx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index 22381a8c72e4..a31db15cd7c0 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -1522,11 +1522,11 @@ static int __init sc16is7xx_init(void) #ifdef CONFIG_SERIAL_SC16IS7XX_SPI err_spi: +#endif #ifdef CONFIG_SERIAL_SC16IS7XX_I2C i2c_del_driver(&sc16is7xx_i2c_uart_driver); -#endif -#endif err_i2c: +#endif uart_unregister_driver(&sc16is7xx_uart); return ret; } -- cgit v1.2.3 From fc834e607ae3d18e1a20bca3f9a2d7f52ea7a2be Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 18 Apr 2019 13:12:07 -0400 Subject: USB: dummy-hcd: Fix failure to give back unlinked URBs The syzkaller USB fuzzer identified a failure mode in which dummy-hcd would never give back an unlinked URB. This causes usb_kill_urb() to hang, leading to WARNINGs and unkillable threads. In dummy-hcd, all URBs are given back by the dummy_timer() routine as it scans through the list of pending URBS. Failure to give back URBs can be caused by failure to start or early exit from the scanning loop. The code currently has two such pathways: One is triggered when an unsupported bus transfer speed is encountered, and the other by exhausting the simulated bandwidth for USB transfers during a frame. This patch removes those two paths, thereby allowing all unlinked URBs to be given back in a timely manner. It adds a check for the bus speed when the gadget first starts running, so that dummy_timer() will never thereafter encounter an unsupported speed. And it prevents the loop from exiting as soon as the total bandwidth has been used up (the scanning loop continues, giving back unlinked URBs as they are found, but not transferring any more data). Thanks to Andrey Konovalov for manually running the syzkaller fuzzer to help track down the source of the bug. Signed-off-by: Alan Stern Reported-and-tested-by: syzbot+d919b0f29d7b5a4994b9@syzkaller.appspotmail.com CC: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/dummy_hcd.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index baf72f95f0f1..213b52508621 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c @@ -979,8 +979,18 @@ static int dummy_udc_start(struct usb_gadget *g, struct dummy_hcd *dum_hcd = gadget_to_dummy_hcd(g); struct dummy *dum = dum_hcd->dum; - if (driver->max_speed == USB_SPEED_UNKNOWN) + switch (g->speed) { + /* All the speeds we support */ + case USB_SPEED_LOW: + case USB_SPEED_FULL: + case USB_SPEED_HIGH: + case USB_SPEED_SUPER: + break; + default: + dev_err(dummy_dev(dum_hcd), "Unsupported driver max speed %d\n", + driver->max_speed); return -EINVAL; + } /* * SLAVE side init ... the layer above hardware, which @@ -1784,9 +1794,10 @@ static void dummy_timer(struct timer_list *t) /* Bus speed is 500000 bytes/ms, so use a little less */ total = 490000; break; - default: + default: /* Can't happen */ dev_err(dummy_dev(dum_hcd), "bogus device speed\n"); - return; + total = 0; + break; } /* FIXME if HZ != 1000 this will probably misbehave ... */ @@ -1828,7 +1839,7 @@ restart: /* Used up this frame's bandwidth? */ if (total <= 0) - break; + continue; /* find the gadget's ep for this request (if configured) */ address = usb_pipeendpoint (urb->pipe); -- cgit v1.2.3 From b50776ae011cfd26df3cc2b4af8b2dc3b683e553 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 13 Feb 2019 11:59:48 -0800 Subject: locking/atomics: Don't assume that scripts are executable patch(1) doesn't set the x bit on files. So if someone downloads and applies patch-4.21.xz, their kernel won't build. Fix that by executing /bin/sh. Signed-off-by: Andrew Morton Acked-by: Mark Rutland Cc: Boqun Feng Cc: Linus Torvalds Cc: Peter Zijlstra (Intel) Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Ingo Molnar --- scripts/atomic/gen-atomics.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/atomic/gen-atomics.sh b/scripts/atomic/gen-atomics.sh index 27400b0cd732..000dc6437893 100644 --- a/scripts/atomic/gen-atomics.sh +++ b/scripts/atomic/gen-atomics.sh @@ -13,7 +13,7 @@ gen-atomic-long.sh asm-generic/atomic-long.h gen-atomic-fallback.sh linux/atomic-fallback.h EOF while read script header; do - ${ATOMICDIR}/${script} ${ATOMICTBL} > ${LINUXDIR}/include/${header} + /bin/sh ${ATOMICDIR}/${script} ${ATOMICTBL} > ${LINUXDIR}/include/${header} HASH="$(sha1sum ${LINUXDIR}/include/${header})" HASH="${HASH%% *}" printf "// %s\n" "${HASH}" >> ${LINUXDIR}/include/${header} -- cgit v1.2.3 From 3ff9c075cc767b3060bdac12da72fc94dd7da1b8 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sun, 24 Feb 2019 01:49:52 +0900 Subject: x86/kprobes: Verify stack frame on kretprobe Verify the stack frame pointer on kretprobe trampoline handler, If the stack frame pointer does not match, it skips the wrong entry and tries to find correct one. This can happen if user puts the kretprobe on the function which can be used in the path of ftrace user-function call. Such functions should not be probed, so this adds a warning message that reports which function should be blacklisted. Tested-by: Andrea Righi Signed-off-by: Masami Hiramatsu Acked-by: Steven Rostedt Cc: Linus Torvalds Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/155094059185.6137.15527904013362842072.stgit@devbox Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes/core.c | 26 ++++++++++++++++++++++++++ include/linux/kprobes.h | 1 + 2 files changed, 27 insertions(+) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index a034cb808e7e..18fbe9be2d68 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -569,6 +569,7 @@ void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) unsigned long *sara = stack_addr(regs); ri->ret_addr = (kprobe_opcode_t *) *sara; + ri->fp = sara; /* Replace the return addr with trampoline addr */ *sara = (unsigned long) &kretprobe_trampoline; @@ -759,15 +760,21 @@ static __used void *trampoline_handler(struct pt_regs *regs) unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; kprobe_opcode_t *correct_ret_addr = NULL; + void *frame_pointer; + bool skipped = false; INIT_HLIST_HEAD(&empty_rp); kretprobe_hash_lock(current, &head, &flags); /* fixup registers */ #ifdef CONFIG_X86_64 regs->cs = __KERNEL_CS; + /* On x86-64, we use pt_regs->sp for return address holder. */ + frame_pointer = ®s->sp; #else regs->cs = __KERNEL_CS | get_kernel_rpl(); regs->gs = 0; + /* On x86-32, we use pt_regs->flags for return address holder. */ + frame_pointer = ®s->flags; #endif regs->ip = trampoline_address; regs->orig_ax = ~0UL; @@ -789,8 +796,25 @@ static __used void *trampoline_handler(struct pt_regs *regs) if (ri->task != current) /* another task is sharing our hash bucket */ continue; + /* + * Return probes must be pushed on this hash list correct + * order (same as return order) so that it can be poped + * correctly. However, if we find it is pushed it incorrect + * order, this means we find a function which should not be + * probed, because the wrong order entry is pushed on the + * path of processing other kretprobe itself. + */ + if (ri->fp != frame_pointer) { + if (!skipped) + pr_warn("kretprobe is stacked incorrectly. Trying to fixup.\n"); + skipped = true; + continue; + } orig_ret_address = (unsigned long)ri->ret_addr; + if (skipped) + pr_warn("%ps must be blacklisted because of incorrect kretprobe order\n", + ri->rp->kp.addr); if (orig_ret_address != trampoline_address) /* @@ -808,6 +832,8 @@ static __used void *trampoline_handler(struct pt_regs *regs) if (ri->task != current) /* another task is sharing our hash bucket */ continue; + if (ri->fp != frame_pointer) + continue; orig_ret_address = (unsigned long)ri->ret_addr; if (ri->rp && ri->rp->handler) { diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 201f0f2683f2..9a897256e481 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -173,6 +173,7 @@ struct kretprobe_instance { struct kretprobe *rp; kprobe_opcode_t *ret_addr; struct task_struct *task; + void *fp; char data[0]; }; -- cgit v1.2.3 From fabe38ab6b2bd9418350284c63825f13b8a6abba Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sun, 24 Feb 2019 01:50:20 +0900 Subject: kprobes: Mark ftrace mcount handler functions nokprobe Mark ftrace mcount handler functions nokprobe since probing on these functions with kretprobe pushes return address incorrectly on kretprobe shadow stack. Reported-by: Francis Deslauriers Tested-by: Andrea Righi Signed-off-by: Masami Hiramatsu Acked-by: Steven Rostedt Acked-by: Steven Rostedt (VMware) Cc: Linus Torvalds Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/155094062044.6137.6419622920568680640.stgit@devbox Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 26c8ca9bd06b..b920358dd8f7 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -33,6 +33,7 @@ #include #include #include +#include #include @@ -6246,7 +6247,7 @@ void ftrace_reset_array_ops(struct trace_array *tr) tr->ops->func = ftrace_stub; } -static inline void +static nokprobe_inline void __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ignored, struct pt_regs *regs) { @@ -6306,11 +6307,13 @@ static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, { __ftrace_ops_list_func(ip, parent_ip, NULL, regs); } +NOKPROBE_SYMBOL(ftrace_ops_list_func); #else static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip) { __ftrace_ops_list_func(ip, parent_ip, NULL, NULL); } +NOKPROBE_SYMBOL(ftrace_ops_no_ops); #endif /* @@ -6337,6 +6340,7 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip, preempt_enable_notrace(); trace_clear_recursion(bit); } +NOKPROBE_SYMBOL(ftrace_ops_assist_func); /** * ftrace_ops_get_func - get the function a trampoline should call -- cgit v1.2.3 From b191fa96ea6dc00d331dcc28c1f7db5e075693a0 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sun, 24 Feb 2019 01:50:49 +0900 Subject: x86/kprobes: Avoid kretprobe recursion bug Avoid kretprobe recursion loop bg by setting a dummy kprobes to current_kprobe per-CPU variable. This bug has been introduced with the asm-coded trampoline code, since previously it used another kprobe for hooking the function return placeholder (which only has a nop) and trampoline handler was called from that kprobe. This revives the old lost kprobe again. With this fix, we don't see deadlock anymore. And you can see that all inner-called kretprobe are skipped. event_1 235 0 event_2 19375 19612 The 1st column is recorded count and the 2nd is missed count. Above shows (event_1 rec) + (event_2 rec) ~= (event_2 missed) (some difference are here because the counter is racy) Reported-by: Andrea Righi Tested-by: Andrea Righi Signed-off-by: Masami Hiramatsu Acked-by: Steven Rostedt Cc: Linus Torvalds Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: c9becf58d935 ("[PATCH] kretprobe: kretprobe-booster") Link: http://lkml.kernel.org/r/155094064889.6137.972160690963039.stgit@devbox Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes/core.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 18fbe9be2d68..fed46ddb1eef 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -749,11 +749,16 @@ asm( NOKPROBE_SYMBOL(kretprobe_trampoline); STACK_FRAME_NON_STANDARD(kretprobe_trampoline); +static struct kprobe kretprobe_kprobe = { + .addr = (void *)kretprobe_trampoline, +}; + /* * Called from kretprobe_trampoline */ static __used void *trampoline_handler(struct pt_regs *regs) { + struct kprobe_ctlblk *kcb; struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; struct hlist_node *tmp; @@ -763,6 +768,17 @@ static __used void *trampoline_handler(struct pt_regs *regs) void *frame_pointer; bool skipped = false; + preempt_disable(); + + /* + * Set a dummy kprobe for avoiding kretprobe recursion. + * Since kretprobe never run in kprobe handler, kprobe must not + * be running at this point. + */ + kcb = get_kprobe_ctlblk(); + __this_cpu_write(current_kprobe, &kretprobe_kprobe); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + INIT_HLIST_HEAD(&empty_rp); kretprobe_hash_lock(current, &head, &flags); /* fixup registers */ @@ -838,10 +854,9 @@ static __used void *trampoline_handler(struct pt_regs *regs) orig_ret_address = (unsigned long)ri->ret_addr; if (ri->rp && ri->rp->handler) { __this_cpu_write(current_kprobe, &ri->rp->kp); - get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; ri->ret_addr = correct_ret_addr; ri->rp->handler(ri, regs); - __this_cpu_write(current_kprobe, NULL); + __this_cpu_write(current_kprobe, &kretprobe_kprobe); } recycle_rp_inst(ri, &empty_rp); @@ -857,6 +872,9 @@ static __used void *trampoline_handler(struct pt_regs *regs) kretprobe_hash_unlock(current, &flags); + __this_cpu_write(current_kprobe, NULL); + preempt_enable(); + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri); -- cgit v1.2.3 From 1de7edbb59c8f1b46071f66c5c97b8a59569eb51 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 29 Mar 2019 17:47:43 -0700 Subject: x86/cpu/bugs: Use __initconst for 'const' init data Some of the recently added const tables use __initdata which causes section attribute conflicts. Use __initconst instead. Fixes: fa1202ef2243 ("x86/speculation: Add command line control") Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20190330004743.29541-9-andi@firstfloor.org --- arch/x86/kernel/cpu/bugs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 2da82eff0eb4..b91b3bfa5cfb 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -275,7 +275,7 @@ static const struct { const char *option; enum spectre_v2_user_cmd cmd; bool secure; -} v2_user_options[] __initdata = { +} v2_user_options[] __initconst = { { "auto", SPECTRE_V2_USER_CMD_AUTO, false }, { "off", SPECTRE_V2_USER_CMD_NONE, false }, { "on", SPECTRE_V2_USER_CMD_FORCE, true }, @@ -419,7 +419,7 @@ static const struct { const char *option; enum spectre_v2_mitigation_cmd cmd; bool secure; -} mitigation_options[] __initdata = { +} mitigation_options[] __initconst = { { "off", SPECTRE_V2_CMD_NONE, false }, { "on", SPECTRE_V2_CMD_FORCE, true }, { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, @@ -658,7 +658,7 @@ static const char * const ssb_strings[] = { static const struct { const char *option; enum ssb_mitigation_cmd cmd; -} ssb_mitigation_options[] __initdata = { +} ssb_mitigation_options[] __initconst = { { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ -- cgit v1.2.3 From bd4264112f93045704731850c5e4d85db981cd85 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 16 Apr 2019 11:49:17 +0200 Subject: drm/ttm: fix re-init of global structures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a driver unloads without unloading TTM we don't correctly clear the global structures leading to errors on re-init. Next step should probably be to remove the global structures and kobjs all together, but this is tricky since we need to maintain backward compatibility. Signed-off-by: Christian König Reviewed-by: Karol Herbst Tested-by: Karol Herbst CC: stable@vger.kernel.org # 5.0.x Signed-off-by: Alex Deucher --- drivers/gpu/drm/ttm/ttm_bo.c | 10 +++++----- drivers/gpu/drm/ttm/ttm_memory.c | 5 +++-- include/drm/ttm/ttm_bo_driver.h | 1 - 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 0fa5034b9f9e..1a01669b159a 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -49,9 +49,8 @@ static void ttm_bo_global_kobj_release(struct kobject *kobj); * ttm_global_mutex - protecting the global BO state */ DEFINE_MUTEX(ttm_global_mutex); -struct ttm_bo_global ttm_bo_glob = { - .use_count = 0 -}; +unsigned ttm_bo_glob_use_count; +struct ttm_bo_global ttm_bo_glob; static struct attribute ttm_bo_count = { .name = "bo_count", @@ -1531,12 +1530,13 @@ static void ttm_bo_global_release(void) struct ttm_bo_global *glob = &ttm_bo_glob; mutex_lock(&ttm_global_mutex); - if (--glob->use_count > 0) + if (--ttm_bo_glob_use_count > 0) goto out; kobject_del(&glob->kobj); kobject_put(&glob->kobj); ttm_mem_global_release(&ttm_mem_glob); + memset(glob, 0, sizeof(*glob)); out: mutex_unlock(&ttm_global_mutex); } @@ -1548,7 +1548,7 @@ static int ttm_bo_global_init(void) unsigned i; mutex_lock(&ttm_global_mutex); - if (++glob->use_count > 1) + if (++ttm_bo_glob_use_count > 1) goto out; ret = ttm_mem_global_init(&ttm_mem_glob); diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c index f1567c353b54..9a0909decb36 100644 --- a/drivers/gpu/drm/ttm/ttm_memory.c +++ b/drivers/gpu/drm/ttm/ttm_memory.c @@ -461,8 +461,8 @@ out_no_zone: void ttm_mem_global_release(struct ttm_mem_global *glob) { - unsigned int i; struct ttm_mem_zone *zone; + unsigned int i; /* let the page allocator first stop the shrink work. */ ttm_page_alloc_fini(); @@ -475,9 +475,10 @@ void ttm_mem_global_release(struct ttm_mem_global *glob) zone = glob->zones[i]; kobject_del(&zone->kobj); kobject_put(&zone->kobj); - } + } kobject_del(&glob->kobj); kobject_put(&glob->kobj); + memset(glob, 0, sizeof(*glob)); } static void ttm_check_swapping(struct ttm_mem_global *glob) diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index cbf3180cb612..668ad971cd7b 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -420,7 +420,6 @@ extern struct ttm_bo_global { /** * Protected by ttm_global_mutex. */ - unsigned int use_count; struct list_head device_list; /** -- cgit v1.2.3 From 1a62b18d51e5c5ecc0345c85bb9fef870ab721ed Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Thu, 18 Apr 2019 17:49:55 -0700 Subject: slab: store tagged freelist for off-slab slabmgmt Commit 51dedad06b5f ("kasan, slab: make freelist stored without tags") calls kasan_reset_tag() for off-slab slab management object leading to freelist being stored non-tagged. However, cache_grow_begin() calls alloc_slabmgmt() which calls kmem_cache_alloc_node() assigns a tag for the address and stores it in the shadow address. As the result, it causes endless errors below during boot due to drain_freelist() -> slab_destroy() -> kasan_slab_free() which compares already untagged freelist against the stored tag in the shadow address. Since off-slab slab management object freelist is such a special case, just store it tagged. Non-off-slab management object freelist is still stored untagged which has not been assigned a tag and should not cause any other troubles with this inconsistency. BUG: KASAN: double-free or invalid-free in slab_destroy+0x84/0x88 Pointer tag: [ff], memory tag: [99] CPU: 0 PID: 1376 Comm: kworker/0:4 Tainted: G W 5.1.0-rc3+ #8 Hardware name: HPE Apollo 70 /C01_APACHE_MB , BIOS L50_5.13_1.0.6 07/10/2018 Workqueue: cgroup_destroy css_killed_work_fn Call trace: print_address_description+0x74/0x2a4 kasan_report_invalid_free+0x80/0xc0 __kasan_slab_free+0x204/0x208 kasan_slab_free+0xc/0x18 kmem_cache_free+0xe4/0x254 slab_destroy+0x84/0x88 drain_freelist+0xd0/0x104 __kmem_cache_shrink+0x1ac/0x224 __kmemcg_cache_deactivate+0x1c/0x28 memcg_deactivate_kmem_caches+0xa0/0xe8 memcg_offline_kmem+0x8c/0x3d4 mem_cgroup_css_offline+0x24c/0x290 css_killed_work_fn+0x154/0x618 process_one_work+0x9cc/0x183c worker_thread+0x9b0/0xe38 kthread+0x374/0x390 ret_from_fork+0x10/0x18 Allocated by task 1625: __kasan_kmalloc+0x168/0x240 kasan_slab_alloc+0x18/0x20 kmem_cache_alloc_node+0x1f8/0x3a0 cache_grow_begin+0x4fc/0xa24 cache_alloc_refill+0x2f8/0x3e8 kmem_cache_alloc+0x1bc/0x3bc sock_alloc_inode+0x58/0x334 alloc_inode+0xb8/0x164 new_inode_pseudo+0x20/0xec sock_alloc+0x74/0x284 __sock_create+0xb0/0x58c sock_create+0x98/0xb8 __sys_socket+0x60/0x138 __arm64_sys_socket+0xa4/0x110 el0_svc_handler+0x2c0/0x47c el0_svc+0x8/0xc Freed by task 1625: __kasan_slab_free+0x114/0x208 kasan_slab_free+0xc/0x18 kfree+0x1a8/0x1e0 single_release+0x7c/0x9c close_pdeo+0x13c/0x43c proc_reg_release+0xec/0x108 __fput+0x2f8/0x784 ____fput+0x1c/0x28 task_work_run+0xc0/0x1b0 do_notify_resume+0xb44/0x1278 work_pending+0x8/0x10 The buggy address belongs to the object at ffff809681b89e00 which belongs to the cache kmalloc-128 of size 128 The buggy address is located 0 bytes inside of 128-byte region [ffff809681b89e00, ffff809681b89e80) The buggy address belongs to the page: page:ffff7fe025a06e00 count:1 mapcount:0 mapping:01ff80082000fb00 index:0xffff809681b8fe04 flags: 0x17ffffffc000200(slab) raw: 017ffffffc000200 ffff7fe025a06d08 ffff7fe022ef7b88 01ff80082000fb00 raw: ffff809681b8fe04 ffff809681b80000 00000001000000e0 0000000000000000 page dumped because: kasan: bad access detected page allocated via order 0, migratetype Unmovable, gfp_mask 0x2420c0(__GFP_IO|__GFP_FS|__GFP_NOWARN|__GFP_COMP|__GFP_THISNODE) prep_new_page+0x4e0/0x5e0 get_page_from_freelist+0x4ce8/0x50d4 __alloc_pages_nodemask+0x738/0x38b8 cache_grow_begin+0xd8/0xa24 ____cache_alloc_node+0x14c/0x268 __kmalloc+0x1c8/0x3fc ftrace_free_mem+0x408/0x1284 ftrace_free_init_mem+0x20/0x28 kernel_init+0x24/0x548 ret_from_fork+0x10/0x18 Memory state around the buggy address: ffff809681b89c00: fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe ffff809681b89d00: fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe >ffff809681b89e00: 99 99 99 99 99 99 99 99 fe fe fe fe fe fe fe fe ^ ffff809681b89f00: 43 43 43 43 43 fe fe fe fe fe fe fe fe fe fe fe ffff809681b8a000: 6d fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe Link: http://lkml.kernel.org/r/20190403022858.97584-1-cai@lca.pw Fixes: 51dedad06b5f ("kasan, slab: make freelist stored without tags") Signed-off-by: Qian Cai Reviewed-by: Andrey Konovalov Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/slab.c b/mm/slab.c index 47a380a486ee..9142ee992493 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2374,7 +2374,6 @@ static void *alloc_slabmgmt(struct kmem_cache *cachep, /* Slab management obj is off-slab. */ freelist = kmem_cache_alloc_node(cachep->freelist_cache, local_flags, nodeid); - freelist = kasan_reset_tag(freelist); if (!freelist) return NULL; } else { -- cgit v1.2.3 From 87039546544479d4bedb19d0ea525270c43c1c9b Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 18 Apr 2019 17:49:58 -0700 Subject: mm: swapoff: shmem_find_swap_entries() filter out other types Swapfile "type" was passed all the way down to shmem_unuse_inode(), but then forgotten from shmem_find_swap_entries(): with the result that removing one swapfile would try to free up all the swap from shmem - no problem when only one swapfile anyway, but counter-productive when more, causing swapoff to be unnecessarily OOM-killed when it should succeed. Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1904081254470.1523@eggly.anvils Fixes: b56a2d8af914 ("mm: rid swapoff of quadratic complexity") Signed-off-by: Hugh Dickins Cc: Konstantin Khlebnikov Cc: "Alex Xu (Hello71)" Cc: Vineeth Pillai Cc: Kelley Nielsen Cc: Rik van Riel Cc: Huang Ying Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/shmem.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index b3db3779a30a..859e8628071f 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1099,10 +1099,11 @@ extern struct swap_info_struct *swap_info[]; static int shmem_find_swap_entries(struct address_space *mapping, pgoff_t start, unsigned int nr_entries, struct page **entries, pgoff_t *indices, - bool frontswap) + unsigned int type, bool frontswap) { XA_STATE(xas, &mapping->i_pages, start); struct page *page; + swp_entry_t entry; unsigned int ret = 0; if (!nr_entries) @@ -1116,13 +1117,12 @@ static int shmem_find_swap_entries(struct address_space *mapping, if (!xa_is_value(page)) continue; - if (frontswap) { - swp_entry_t entry = radix_to_swp_entry(page); - - if (!frontswap_test(swap_info[swp_type(entry)], - swp_offset(entry))) - continue; - } + entry = radix_to_swp_entry(page); + if (swp_type(entry) != type) + continue; + if (frontswap && + !frontswap_test(swap_info[type], swp_offset(entry))) + continue; indices[ret] = xas.xa_index; entries[ret] = page; @@ -1194,7 +1194,7 @@ static int shmem_unuse_inode(struct inode *inode, unsigned int type, pvec.nr = shmem_find_swap_entries(mapping, start, nr_entries, pvec.pages, indices, - frontswap); + type, frontswap); if (pvec.nr == 0) { ret = 0; break; -- cgit v1.2.3 From dd862deb151aad2548e72b077a82ad3aa91b715f Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 18 Apr 2019 17:50:02 -0700 Subject: mm: swapoff: remove too limiting SWAP_UNUSE_MAX_TRIES SWAP_UNUSE_MAX_TRIES 3 appeared to work well in earlier testing, but further testing has proved it to be a source of unnecessary swapoff EBUSY failures (which can then be followed by unmount EBUSY failures). When mmget_not_zero() or shmem's igrab() fails, there is an mm exiting or inode being evicted, freeing up swap independent of try_to_unuse(). Those typically completed much sooner than the old quadratic swapoff, but now it's more common that swapoff may need to wait for them. It's possible to move those cases from init_mm.mmlist and shmem_swaplist to separate "exiting" swaplists, and try_to_unuse() then wait for those lists to be emptied; but we've not bothered with that in the past, and don't want to risk missing some other forgotten case. So just revert to cycling around until the swap is gone, without any retries limit. Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1904081256170.1523@eggly.anvils Fixes: b56a2d8af914 ("mm: rid swapoff of quadratic complexity") Signed-off-by: Hugh Dickins Cc: "Alex Xu (Hello71)" Cc: Huang Ying Cc: Kelley Nielsen Cc: Konstantin Khlebnikov Cc: Rik van Riel Cc: Vineeth Pillai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/swapfile.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index 2b8d9c3fbb47..bf4ef2e40f23 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2023,7 +2023,6 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si, * If the boolean frontswap is true, only unuse pages_to_unuse pages; * pages_to_unuse==0 means all pages; ignored if frontswap is false */ -#define SWAP_UNUSE_MAX_TRIES 3 int try_to_unuse(unsigned int type, bool frontswap, unsigned long pages_to_unuse) { @@ -2035,7 +2034,6 @@ int try_to_unuse(unsigned int type, bool frontswap, struct page *page; swp_entry_t entry; unsigned int i; - int retries = 0; if (!si->inuse_pages) return 0; @@ -2117,14 +2115,16 @@ retry: * If yes, we would need to do retry the unuse logic again. * Under global memory pressure, swap entries can be reinserted back * into process space after the mmlist loop above passes over them. - * Its not worth continuosuly retrying to unuse the swap in this case. - * So we try SWAP_UNUSE_MAX_TRIES times. + * + * Limit the number of retries? No: when shmem_unuse()'s igrab() fails, + * a shmem inode using swap is being evicted; and when mmget_not_zero() + * above fails, that mm is likely to be freeing swap from exit_mmap(). + * Both proceed at their own independent pace: we could move them to + * separate lists, and wait for those lists to be emptied; but it's + * easier and more robust (though cpu-intensive) just to keep retrying. */ - if (++retries >= SWAP_UNUSE_MAX_TRIES) - retval = -EBUSY; - else if (si->inuse_pages) + if (si->inuse_pages) goto retry; - out: return (retval == FRONTSWAP_PAGES_UNUSED) ? 0 : retval; } -- cgit v1.2.3 From 64165b1affc5bc16231ac971e66aae7d68d57f2c Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 18 Apr 2019 17:50:09 -0700 Subject: mm: swapoff: take notice of completion sooner The old try_to_unuse() implementation was driven by find_next_to_unuse(), which terminated as soon as all the swap had been freed. Add inuse_pages checks now (alongside signal_pending()) to stop scanning mms and swap_map once finished. The same ought to be done in shmem_unuse() too, but never was before, and needs a different interface: so leave it as is for now. Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1904081258200.1523@eggly.anvils Fixes: b56a2d8af914 ("mm: rid swapoff of quadratic complexity") Signed-off-by: Hugh Dickins Cc: "Alex Xu (Hello71)" Cc: Huang Ying Cc: Kelley Nielsen Cc: Konstantin Khlebnikov Cc: Rik van Riel Cc: Vineeth Pillai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/swapfile.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index bf4ef2e40f23..71383625a582 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2051,11 +2051,9 @@ retry: spin_lock(&mmlist_lock); p = &init_mm.mmlist; - while ((p = p->next) != &init_mm.mmlist) { - if (signal_pending(current)) { - retval = -EINTR; - break; - } + while (si->inuse_pages && + !signal_pending(current) && + (p = p->next) != &init_mm.mmlist) { mm = list_entry(p, struct mm_struct, mmlist); if (!mmget_not_zero(mm)) @@ -2082,7 +2080,9 @@ retry: mmput(prev_mm); i = 0; - while ((i = find_next_to_unuse(si, i, frontswap)) != 0) { + while (si->inuse_pages && + !signal_pending(current) && + (i = find_next_to_unuse(si, i, frontswap)) != 0) { entry = swp_entry(type, i); page = find_get_page(swap_address_space(entry), i); @@ -2123,8 +2123,11 @@ retry: * separate lists, and wait for those lists to be emptied; but it's * easier and more robust (though cpu-intensive) just to keep retrying. */ - if (si->inuse_pages) - goto retry; + if (si->inuse_pages) { + if (!signal_pending(current)) + goto retry; + retval = -EINTR; + } out: return (retval == FRONTSWAP_PAGES_UNUSED) ? 0 : retval; } -- cgit v1.2.3 From af53d3e9e04024885de5b4fda51e5fa362ae2bd8 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 18 Apr 2019 17:50:13 -0700 Subject: mm: swapoff: shmem_unuse() stop eviction without igrab() The igrab() in shmem_unuse() looks good, but we forgot that it gives no protection against concurrent unmounting: a point made by Konstantin Khlebnikov eight years ago, and then fixed in 2.6.39 by 778dd893ae78 ("tmpfs: fix race between umount and swapoff"). The current 5.1-rc swapoff is liable to hit "VFS: Busy inodes after unmount of tmpfs. Self-destruct in 5 seconds. Have a nice day..." followed by GPF. Once again, give up on using igrab(); but don't go back to making such heavy-handed use of shmem_swaplist_mutex as last time: that would spoil the new design, and I expect could deadlock inside shmem_swapin_page(). Instead, shmem_unuse() just raise a "stop_eviction" count in the shmem- specific inode, and shmem_evict_inode() wait for that to go down to 0. Call it "stop_eviction" rather than "swapoff_busy" because it can be put to use for others later (huge tmpfs patches expect to use it). That simplifies shmem_unuse(), protecting it from both unlink and unmount; and in practice lets it locate all the swap in its first try. But do not rely on that: there's still a theoretical case, when shmem_writepage() might have been preempted after its get_swap_page(), before making the swap entry visible to swapoff. [hughd@google.com: remove incorrect list_del()] Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1904091133570.1898@eggly.anvils Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1904081259400.1523@eggly.anvils Fixes: b56a2d8af914 ("mm: rid swapoff of quadratic complexity") Signed-off-by: Hugh Dickins Cc: "Alex Xu (Hello71)" Cc: Huang Ying Cc: Kelley Nielsen Cc: Konstantin Khlebnikov Cc: Rik van Riel Cc: Vineeth Pillai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/shmem_fs.h | 1 + mm/shmem.c | 40 ++++++++++++++++++---------------------- mm/swapfile.c | 11 +++++------ 3 files changed, 24 insertions(+), 28 deletions(-) diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index f3fb1edb3526..20d815a33145 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -21,6 +21,7 @@ struct shmem_inode_info { struct list_head swaplist; /* chain of maybes on swap */ struct shared_policy policy; /* NUMA memory alloc policy */ struct simple_xattrs xattrs; /* list of xattrs */ + atomic_t stop_eviction; /* hold when working on inode */ struct inode vfs_inode; }; diff --git a/mm/shmem.c b/mm/shmem.c index 859e8628071f..2275a0ff7c30 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1081,9 +1081,14 @@ static void shmem_evict_inode(struct inode *inode) } spin_unlock(&sbinfo->shrinklist_lock); } - if (!list_empty(&info->swaplist)) { + while (!list_empty(&info->swaplist)) { + /* Wait while shmem_unuse() is scanning this inode... */ + wait_var_event(&info->stop_eviction, + !atomic_read(&info->stop_eviction)); mutex_lock(&shmem_swaplist_mutex); - list_del_init(&info->swaplist); + /* ...but beware of the race if we peeked too early */ + if (!atomic_read(&info->stop_eviction)) + list_del_init(&info->swaplist); mutex_unlock(&shmem_swaplist_mutex); } } @@ -1227,36 +1232,27 @@ int shmem_unuse(unsigned int type, bool frontswap, unsigned long *fs_pages_to_unuse) { struct shmem_inode_info *info, *next; - struct inode *inode; - struct inode *prev_inode = NULL; int error = 0; if (list_empty(&shmem_swaplist)) return 0; mutex_lock(&shmem_swaplist_mutex); - - /* - * The extra refcount on the inode is necessary to safely dereference - * p->next after re-acquiring the lock. New shmem inodes with swap - * get added to the end of the list and we will scan them all. - */ list_for_each_entry_safe(info, next, &shmem_swaplist, swaplist) { if (!info->swapped) { list_del_init(&info->swaplist); continue; } - - inode = igrab(&info->vfs_inode); - if (!inode) - continue; - + /* + * Drop the swaplist mutex while searching the inode for swap; + * but before doing so, make sure shmem_evict_inode() will not + * remove placeholder inode from swaplist, nor let it be freed + * (igrab() would protect from unlink, but not from unmount). + */ + atomic_inc(&info->stop_eviction); mutex_unlock(&shmem_swaplist_mutex); - if (prev_inode) - iput(prev_inode); - prev_inode = inode; - error = shmem_unuse_inode(inode, type, frontswap, + error = shmem_unuse_inode(&info->vfs_inode, type, frontswap, fs_pages_to_unuse); cond_resched(); @@ -1264,14 +1260,13 @@ int shmem_unuse(unsigned int type, bool frontswap, next = list_next_entry(info, swaplist); if (!info->swapped) list_del_init(&info->swaplist); + if (atomic_dec_and_test(&info->stop_eviction)) + wake_up_var(&info->stop_eviction); if (error) break; } mutex_unlock(&shmem_swaplist_mutex); - if (prev_inode) - iput(prev_inode); - return error; } @@ -2238,6 +2233,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode info = SHMEM_I(inode); memset(info, 0, (char *)inode - (char *)info); spin_lock_init(&info->lock); + atomic_set(&info->stop_eviction, 0); info->seals = F_SEAL_SEAL; info->flags = flags & VM_NORESERVE; INIT_LIST_HEAD(&info->shrinklist); diff --git a/mm/swapfile.c b/mm/swapfile.c index 71383625a582..cf63b5f01adf 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2116,12 +2116,11 @@ retry: * Under global memory pressure, swap entries can be reinserted back * into process space after the mmlist loop above passes over them. * - * Limit the number of retries? No: when shmem_unuse()'s igrab() fails, - * a shmem inode using swap is being evicted; and when mmget_not_zero() - * above fails, that mm is likely to be freeing swap from exit_mmap(). - * Both proceed at their own independent pace: we could move them to - * separate lists, and wait for those lists to be emptied; but it's - * easier and more robust (though cpu-intensive) just to keep retrying. + * Limit the number of retries? No: when mmget_not_zero() above fails, + * that mm is likely to be freeing swap from exit_mmap(), which proceeds + * at its own independent pace; and even shmem_writepage() could have + * been preempted after get_swap_page(), temporarily hiding that swap. + * It's easy and robust (though cpu-intensive) just to keep retrying. */ if (si->inuse_pages) { if (!signal_pending(current)) -- cgit v1.2.3 From 37803841c92d7b327147e0b1be3436423189e1cf Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Thu, 18 Apr 2019 17:50:16 -0700 Subject: mm/memory_hotplug: do not unlock after failing to take the device_hotplug_lock When adding memory by probing a memory block in the sysfs interface, there is an obvious issue where we will unlock the device_hotplug_lock when we failed to takes it. That issue was introduced in 8df1d0e4a265 ("mm/memory_hotplug: make add_memory() take the device_hotplug_lock"). We should drop out in time when failing to take the device_hotplug_lock. Link: http://lkml.kernel.org/r/1554696437-9593-1-git-send-email-zhongjiang@huawei.com Fixes: 8df1d0e4a265 ("mm/memory_hotplug: make add_memory() take the device_hotplug_lock") Signed-off-by: zhong jiang Reported-by: Yang yingliang Acked-by: Michal Hocko Reviewed-by: David Hildenbrand Reviewed-by: Oscar Salvador Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/memory.c b/drivers/base/memory.c index cb8347500ce2..e49028a60429 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -506,7 +506,7 @@ static ssize_t probe_store(struct device *dev, struct device_attribute *attr, ret = lock_device_hotplug_sysfs(); if (ret) - goto out; + return ret; nid = memory_add_physaddr_to_nid(phys_addr); ret = __add_memory(nid, phys_addr, -- cgit v1.2.3 From e8277b3b52240ec1caad8e6df278863e4bf42eac Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 18 Apr 2019 17:50:20 -0700 Subject: mm/vmstat.c: fix /proc/vmstat format for CONFIG_DEBUG_TLBFLUSH=y CONFIG_SMP=n Commit 58bc4c34d249 ("mm/vmstat.c: skip NR_TLB_REMOTE_FLUSH* properly") depends on skipping vmstat entries with empty name introduced in 7aaf77272358 ("mm: don't show nr_indirectly_reclaimable in /proc/vmstat") but reverted in b29940c1abd7 ("mm: rename and change semantics of nr_indirectly_reclaimable_bytes"). So skipping no longer works and /proc/vmstat has misformatted lines " 0". This patch simply shows debug counters "nr_tlb_remote_*" for UP. Link: http://lkml.kernel.org/r/155481488468.467.4295519102880913454.stgit@buzz Fixes: 58bc4c34d249 ("mm/vmstat.c: skip NR_TLB_REMOTE_FLUSH* properly") Signed-off-by: Konstantin Khlebnikov Acked-by: Vlastimil Babka Cc: Roman Gushchin Cc: Jann Horn Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmstat.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/mm/vmstat.c b/mm/vmstat.c index 36b56f858f0f..a7d493366a65 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1274,13 +1274,8 @@ const char * const vmstat_text[] = { #endif #endif /* CONFIG_MEMORY_BALLOON */ #ifdef CONFIG_DEBUG_TLBFLUSH -#ifdef CONFIG_SMP "nr_tlb_remote_flush", "nr_tlb_remote_flush_received", -#else - "", /* nr_tlb_remote_flush */ - "", /* nr_tlb_remote_flush_received */ -#endif /* CONFIG_SMP */ "nr_tlb_local_flush_all", "nr_tlb_local_flush_one", #endif /* CONFIG_DEBUG_TLBFLUSH */ -- cgit v1.2.3 From 8cd40d1d41ffc305d9aed77937896e1712b2490c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 18 Apr 2019 17:50:23 -0700 Subject: proc: fix map_files test on F29 F29 bans mapping first 64KB even for root making test fail. Iterate from address 0 until mmap() works. Gentoo (root): openat(AT_FDCWD, "/dev/zero", O_RDONLY) = 3 mmap(NULL, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = 0 Gentoo (non-root): openat(AT_FDCWD, "/dev/zero", O_RDONLY) = 3 mmap(NULL, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = -1 EPERM (Operation not permitted) mmap(0x1000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = 0x1000 F29 (root): openat(AT_FDCWD, "/dev/zero", O_RDONLY) = 3 mmap(NULL, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = -1 EACCES (Permission denied) mmap(0x1000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = -1 EACCES (Permission denied) mmap(0x2000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = -1 EACCES (Permission denied) mmap(0x3000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = -1 EACCES (Permission denied) mmap(0x4000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = -1 EACCES (Permission denied) mmap(0x5000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = -1 EACCES (Permission denied) mmap(0x6000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = -1 EACCES (Permission denied) mmap(0x7000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = -1 EACCES (Permission denied) mmap(0x8000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = -1 EACCES (Permission denied) mmap(0x9000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = -1 EACCES (Permission denied) mmap(0xa000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = -1 EACCES (Permission denied) mmap(0xb000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = -1 EACCES (Permission denied) mmap(0xc000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = -1 EACCES (Permission denied) mmap(0xd000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = -1 EACCES (Permission denied) mmap(0xe000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = -1 EACCES (Permission denied) mmap(0xf000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = -1 EACCES (Permission denied) mmap(0x10000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = 0x10000 Now all proc tests succeed on F29 if run as root, at last! Link: http://lkml.kernel.org/r/20190414123612.GB12971@avx2 Signed-off-by: Alexey Dobriyan Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- .../testing/selftests/proc/proc-self-map-files-002.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/proc/proc-self-map-files-002.c b/tools/testing/selftests/proc/proc-self-map-files-002.c index 762cb01f2ca7..47b7473dedef 100644 --- a/tools/testing/selftests/proc/proc-self-map-files-002.c +++ b/tools/testing/selftests/proc/proc-self-map-files-002.c @@ -46,12 +46,9 @@ static void fail(const char *fmt, unsigned long a, unsigned long b) int main(void) { - const unsigned int PAGE_SIZE = sysconf(_SC_PAGESIZE); -#ifdef __arm__ - unsigned long va = 2 * PAGE_SIZE; -#else - unsigned long va = 0; -#endif + const int PAGE_SIZE = sysconf(_SC_PAGESIZE); + const unsigned long va_max = 1UL << 32; + unsigned long va; void *p; int fd; unsigned long a, b; @@ -60,10 +57,13 @@ int main(void) if (fd == -1) return 1; - p = mmap((void *)va, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE|MAP_FIXED, fd, 0); - if (p == MAP_FAILED) { - if (errno == EPERM) - return 4; + for (va = 0; va < va_max; va += PAGE_SIZE) { + p = mmap((void *)va, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE|MAP_FIXED, fd, 0); + if (p == (void *)va) + break; + } + if (va == va_max) { + fprintf(stderr, "error: mmap doesn't like you\n"); return 1; } -- cgit v1.2.3 From 68545aa1cda847c4fdda7e49331807f99f799838 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 18 Apr 2019 17:50:27 -0700 Subject: proc: fixup proc-pid-vm test Silly sizeof(pointer) vs sizeof(uint8_t[]) bug. Link: http://lkml.kernel.org/r/20190414123009.GA12971@avx2 Fixes: e483b0208784 ("proc: test /proc/*/maps, smaps, smaps_rollup, statm") Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/proc/proc-pid-vm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/proc/proc-pid-vm.c b/tools/testing/selftests/proc/proc-pid-vm.c index 7202bbac976e..853aa164a401 100644 --- a/tools/testing/selftests/proc/proc-pid-vm.c +++ b/tools/testing/selftests/proc/proc-pid-vm.c @@ -187,8 +187,8 @@ static int make_exe(const uint8_t *payload, size_t len) ph.p_offset = 0; ph.p_vaddr = VADDR; ph.p_paddr = 0; - ph.p_filesz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + sizeof(payload); - ph.p_memsz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + sizeof(payload); + ph.p_filesz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len; + ph.p_memsz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len; ph.p_align = 4096; fd = openat(AT_FDCWD, "/tmp", O_WRONLY|O_EXCL|O_TMPFILE, 0700); -- cgit v1.2.3 From 1a9f219157b22d0ffb340a9c5f431afd02cd2cf3 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Thu, 18 Apr 2019 17:50:30 -0700 Subject: mm/hotplug: treat CMA pages as unmovable has_unmovable_pages() is used by allocating CMA and gigantic pages as well as the memory hotplug. The later doesn't know how to offline CMA pool properly now, but if an unused (free) CMA page is encountered, then has_unmovable_pages() happily considers it as a free memory and propagates this up the call chain. Memory offlining code then frees the page without a proper CMA tear down which leads to an accounting issues. Moreover if the same memory range is onlined again then the memory never gets back to the CMA pool. State after memory offline: # grep cma /proc/vmstat nr_free_cma 205824 # cat /sys/kernel/debug/cma/cma-kvm_cma/count 209920 Also, kmemleak still think those memory address are reserved below but have already been used by the buddy allocator after onlining. This patch fixes the situation by treating CMA pageblocks as unmovable except when has_unmovable_pages() is called as part of CMA allocation. Offlined Pages 4096 kmemleak: Cannot insert 0xc000201f7d040008 into the object search tree (overlaps existing) Call Trace: dump_stack+0xb0/0xf4 (unreliable) create_object+0x344/0x380 __kmalloc_node+0x3ec/0x860 kvmalloc_node+0x58/0x110 seq_read+0x41c/0x620 __vfs_read+0x3c/0x70 vfs_read+0xbc/0x1a0 ksys_read+0x7c/0x140 system_call+0x5c/0x70 kmemleak: Kernel memory leak detector disabled kmemleak: Object 0xc000201cc8000000 (size 13757317120): kmemleak: comm "swapper/0", pid 0, jiffies 4294937297 kmemleak: min_count = -1 kmemleak: count = 0 kmemleak: flags = 0x5 kmemleak: checksum = 0 kmemleak: backtrace: cma_declare_contiguous+0x2a4/0x3b0 kvm_cma_reserve+0x11c/0x134 setup_arch+0x300/0x3f8 start_kernel+0x9c/0x6e8 start_here_common+0x1c/0x4b0 kmemleak: Automatic memory scanning thread ended [cai@lca.pw: use is_migrate_cma_page() and update commit log] Link: http://lkml.kernel.org/r/20190416170510.20048-1-cai@lca.pw Link: http://lkml.kernel.org/r/20190413002623.8967-1-cai@lca.pw Signed-off-by: Qian Cai Acked-by: Michal Hocko Acked-by: Vlastimil Babka Reviewed-by: Oscar Salvador Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d96ca5bc555b..c6ce20aaf80b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -8005,7 +8005,10 @@ void *__init alloc_large_system_hash(const char *tablename, bool has_unmovable_pages(struct zone *zone, struct page *page, int count, int migratetype, int flags) { - unsigned long pfn, iter, found; + unsigned long found; + unsigned long iter = 0; + unsigned long pfn = page_to_pfn(page); + const char *reason = "unmovable page"; /* * TODO we could make this much more efficient by not checking every @@ -8015,17 +8018,20 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count, * can still lead to having bootmem allocations in zone_movable. */ - /* - * CMA allocations (alloc_contig_range) really need to mark isolate - * CMA pageblocks even when they are not movable in fact so consider - * them movable here. - */ - if (is_migrate_cma(migratetype) && - is_migrate_cma(get_pageblock_migratetype(page))) - return false; + if (is_migrate_cma_page(page)) { + /* + * CMA allocations (alloc_contig_range) really need to mark + * isolate CMA pageblocks even when they are not movable in fact + * so consider them movable here. + */ + if (is_migrate_cma(migratetype)) + return false; + + reason = "CMA page"; + goto unmovable; + } - pfn = page_to_pfn(page); - for (found = 0, iter = 0; iter < pageblock_nr_pages; iter++) { + for (found = 0; iter < pageblock_nr_pages; iter++) { unsigned long check = pfn + iter; if (!pfn_valid_within(check)) @@ -8105,7 +8111,7 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count, unmovable: WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE); if (flags & REPORT_FAILURE) - dump_page(pfn_to_page(pfn+iter), "unmovable page"); + dump_page(pfn_to_page(pfn + iter), reason); return true; } -- cgit v1.2.3 From 3b991208b897f52507168374033771a984b947b1 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 18 Apr 2019 17:50:34 -0700 Subject: mm: fix inactive list balancing between NUMA nodes and cgroups During !CONFIG_CGROUP reclaim, we expand the inactive list size if it's thrashing on the node that is about to be reclaimed. But when cgroups are enabled, we suddenly ignore the node scope and use the cgroup scope only. The result is that pressure bleeds between NUMA nodes depending on whether cgroups are merely compiled into Linux. This behavioral difference is unexpected and undesirable. When the refault adaptivity of the inactive list was first introduced, there were no statistics at the lruvec level - the intersection of node and memcg - so it was better than nothing. But now that we have that infrastructure, use lruvec_page_state() to make the list balancing decision always NUMA aware. [hannes@cmpxchg.org: fix bisection hole] Link: http://lkml.kernel.org/r/20190417155241.GB23013@cmpxchg.org Link: http://lkml.kernel.org/r/20190412144438.2645-1-hannes@cmpxchg.org Fixes: 2a2e48854d70 ("mm: vmscan: fix IO/refault regression in cache workingset transition") Signed-off-by: Johannes Weiner Reviewed-by: Shakeel Butt Cc: Roman Gushchin Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index a5ad0b35ab8e..a815f73ee4d5 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2176,7 +2176,6 @@ static void shrink_active_list(unsigned long nr_to_scan, * 10TB 320 32GB */ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, - struct mem_cgroup *memcg, struct scan_control *sc, bool actual_reclaim) { enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE; @@ -2197,16 +2196,12 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, inactive = lruvec_lru_size(lruvec, inactive_lru, sc->reclaim_idx); active = lruvec_lru_size(lruvec, active_lru, sc->reclaim_idx); - if (memcg) - refaults = memcg_page_state(memcg, WORKINGSET_ACTIVATE); - else - refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE); - /* * When refaults are being observed, it means a new workingset * is being established. Disable active list protection to get * rid of the stale workingset quickly. */ + refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE); if (file && actual_reclaim && lruvec->refaults != refaults) { inactive_ratio = 0; } else { @@ -2227,12 +2222,10 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, } static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, - struct lruvec *lruvec, struct mem_cgroup *memcg, - struct scan_control *sc) + struct lruvec *lruvec, struct scan_control *sc) { if (is_active_lru(lru)) { - if (inactive_list_is_low(lruvec, is_file_lru(lru), - memcg, sc, true)) + if (inactive_list_is_low(lruvec, is_file_lru(lru), sc, true)) shrink_active_list(nr_to_scan, lruvec, sc, lru); return 0; } @@ -2332,7 +2325,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, * anonymous pages on the LRU in eligible zones. * Otherwise, the small LRU gets thrashed. */ - if (!inactive_list_is_low(lruvec, false, memcg, sc, false) && + if (!inactive_list_is_low(lruvec, false, sc, false) && lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, sc->reclaim_idx) >> sc->priority) { scan_balance = SCAN_ANON; @@ -2350,7 +2343,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, * lruvec even if it has plenty of old anonymous pages unless the * system is under heavy pressure. */ - if (!inactive_list_is_low(lruvec, true, memcg, sc, false) && + if (!inactive_list_is_low(lruvec, true, sc, false) && lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) { scan_balance = SCAN_FILE; goto out; @@ -2503,7 +2496,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc nr[lru] -= nr_to_scan; nr_reclaimed += shrink_list(lru, nr_to_scan, - lruvec, memcg, sc); + lruvec, sc); } } @@ -2570,7 +2563,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc * Even if we did not try to evict anon pages at all, we want to * rebalance the anon lru active/inactive ratio. */ - if (inactive_list_is_low(lruvec, false, memcg, sc, true)) + if (inactive_list_is_low(lruvec, false, sc, true)) shrink_active_list(SWAP_CLUSTER_MAX, lruvec, sc, LRU_ACTIVE_ANON); } @@ -2969,12 +2962,8 @@ static void snapshot_refaults(struct mem_cgroup *root_memcg, pg_data_t *pgdat) unsigned long refaults; struct lruvec *lruvec; - if (memcg) - refaults = memcg_page_state(memcg, WORKINGSET_ACTIVATE); - else - refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE); - lruvec = mem_cgroup_lruvec(pgdat, memcg); + refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE); lruvec->refaults = refaults; } while ((memcg = mem_cgroup_iter(root_memcg, memcg, NULL))); } @@ -3339,7 +3328,7 @@ static void age_active_anon(struct pglist_data *pgdat, do { struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg); - if (inactive_list_is_low(lruvec, false, memcg, sc, true)) + if (inactive_list_is_low(lruvec, false, sc, true)) shrink_active_list(SWAP_CLUSTER_MAX, lruvec, sc, LRU_ACTIVE_ANON); -- cgit v1.2.3 From 40453c4f9bb6d166a56a102a8c51dd24b0801557 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 18 Apr 2019 17:50:37 -0700 Subject: kcov: improve CONFIG_ARCH_HAS_KCOV help text The help text for CONFIG_ARCH_HAS_KCOV is stale, and describes the feature as being enabled only for x86_64, when it is now enabled for several architectures, including arm, arm64, powerpc, and s390. Let's remove that stale help text, and update it along the lines of hat for ARCH_HAS_FORTIFY_SOURCE, better describing when an architecture should select CONFIG_ARCH_HAS_KCOV. Link: http://lkml.kernel.org/r/20190412102733.5154-1-mark.rutland@arm.com Signed-off-by: Mark Rutland Acked-by: Dmitry Vyukov Cc: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 0d9e81779e37..00dbcdbc9a0d 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -753,9 +753,9 @@ endmenu # "Memory Debugging" config ARCH_HAS_KCOV bool help - KCOV does not have any arch-specific code, but currently it is enabled - only for x86_64. KCOV requires testing on other archs, and most likely - disabling of instrumentation for some early boot code. + An architecture should select this when it can successfully + build and run with CONFIG_KCOV. This typically requires + disabling instrumentation for some early boot code. config CC_HAS_SANCOV_TRACE_PC def_bool $(cc-option,-fsanitize-coverage=trace-pc) -- cgit v1.2.3 From 8f4a8c12cafe54535f334a09be7ec7f236134764 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 18 Apr 2019 17:50:41 -0700 Subject: kernel/watchdog_hld.c: hard lockup message should end with a newline Separate print_modules() and hard lockup error message. Before the patch: NMI watchdog: Watchdog detected hard LOCKUP on cpu 1Modules linked in: nls_cp437 Link: http://lkml.kernel.org/r/20190412062557.2700-1-sergey.senozhatsky@gmail.com Signed-off-by: Sergey Senozhatsky Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/watchdog_hld.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 71381168dede..247bf0b1582c 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -135,7 +135,8 @@ static void watchdog_overflow_callback(struct perf_event *event, if (__this_cpu_read(hard_watchdog_warn) == true) return; - pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu); + pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n", + this_cpu); print_modules(); print_irqtrace_events(current); if (regs) -- cgit v1.2.3 From 6041186a32585fc7a1d0f6cfe2f138b05fdc3c82 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 18 Apr 2019 17:50:44 -0700 Subject: init: initialize jump labels before command line option parsing When a module option, or core kernel argument, toggles a static-key it requires jump labels to be initialized early. While x86, PowerPC, and ARM64 arrange for jump_label_init() to be called before parse_args(), ARM does not. Kernel command line: rdinit=/sbin/init page_alloc.shuffle=1 panic=-1 console=ttyAMA0,115200 page_alloc.shuffle=1 ------------[ cut here ]------------ WARNING: CPU: 0 PID: 0 at ./include/linux/jump_label.h:303 page_alloc_shuffle+0x12c/0x1ac static_key_enable(): static key 'page_alloc_shuffle_key+0x0/0x4' used before call to jump_label_init() Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 5.1.0-rc4-next-20190410-00003-g3367c36ce744 #1 Hardware name: ARM Integrator/CP (Device Tree) [] (unwind_backtrace) from [] (show_stack+0x10/0x18) [] (show_stack) from [] (dump_stack+0x18/0x24) [] (dump_stack) from [] (__warn+0xe0/0x108) [] (__warn) from [] (warn_slowpath_fmt+0x44/0x6c) [] (warn_slowpath_fmt) from [] (page_alloc_shuffle+0x12c/0x1ac) [] (page_alloc_shuffle) from [] (shuffle_store+0x28/0x48) [] (shuffle_store) from [] (parse_args+0x1f4/0x350) [] (parse_args) from [] (start_kernel+0x1c0/0x488) Move the fallback call to jump_label_init() to occur before parse_args(). The redundant calls to jump_label_init() in other archs are left intact in case they have static key toggling use cases that are even earlier than option parsing. Link: http://lkml.kernel.org/r/155544804466.1032396.13418949511615676665.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Reported-by: Guenter Roeck Reviewed-by: Kees Cook Cc: Mathieu Desnoyers Cc: Thomas Gleixner Cc: Mike Rapoport Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/init/main.c b/init/main.c index 598e278b46f7..7d4025d665eb 100644 --- a/init/main.c +++ b/init/main.c @@ -582,6 +582,8 @@ asmlinkage __visible void __init start_kernel(void) page_alloc_init(); pr_notice("Kernel command line: %s\n", boot_command_line); + /* parameters may set static keys */ + jump_label_init(); parse_early_param(); after_dashes = parse_args("Booting kernel", static_command_line, __start___param, @@ -591,8 +593,6 @@ asmlinkage __visible void __init start_kernel(void) parse_args("Setting init args", after_dashes, NULL, 0, -1, -1, NULL, set_init_arg); - jump_label_init(); - /* * These use large bootmem allocations and must precede * kmem_cache_init() -- cgit v1.2.3 From dce5b0bdeec61bdbee56121ceb1d014151d5cab1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 18 Apr 2019 17:50:48 -0700 Subject: mm/kmemleak.c: fix unused-function warning The only references outside of the #ifdef have been removed, so now we get a warning in non-SMP configurations: mm/kmemleak.c:1404:13: error: unused function 'scan_large_block' [-Werror,-Wunused-function] Add a new #ifdef around it. Link: http://lkml.kernel.org/r/20190416123148.3502045-1-arnd@arndb.de Fixes: 298a32b13208 ("kmemleak: powerpc: skip scanning holes in the .bss section") Signed-off-by: Arnd Bergmann Acked-by: Catalin Marinas Cc: Vincent Whitchurch Cc: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kmemleak.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 6c318f5ac234..2e435b8142e5 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1401,6 +1401,7 @@ static void scan_block(void *_start, void *_end, /* * Scan a large memory block in MAX_SCAN_SIZE chunks to reduce the latency. */ +#ifdef CONFIG_SMP static void scan_large_block(void *start, void *end) { void *next; @@ -1412,6 +1413,7 @@ static void scan_large_block(void *start, void *end) cond_resched(); } } +#endif /* * Scan a memory block corresponding to a kmemleak_object. A condition is -- cgit v1.2.3 From 04f5866e41fb70690e28397487d8bd8eea7d712a Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 18 Apr 2019 17:50:52 -0700 Subject: coredump: fix race condition between mmget_not_zero()/get_task_mm() and core dumping The core dumping code has always run without holding the mmap_sem for writing, despite that is the only way to ensure that the entire vma layout will not change from under it. Only using some signal serialization on the processes belonging to the mm is not nearly enough. This was pointed out earlier. For example in Hugh's post from Jul 2017: https://lkml.kernel.org/r/alpine.LSU.2.11.1707191716030.2055@eggly.anvils "Not strictly relevant here, but a related note: I was very surprised to discover, only quite recently, how handle_mm_fault() may be called without down_read(mmap_sem) - when core dumping. That seems a misguided optimization to me, which would also be nice to correct" In particular because the growsdown and growsup can move the vm_start/vm_end the various loops the core dump does around the vma will not be consistent if page faults can happen concurrently. Pretty much all users calling mmget_not_zero()/get_task_mm() and then taking the mmap_sem had the potential to introduce unexpected side effects in the core dumping code. Adding mmap_sem for writing around the ->core_dump invocation is a viable long term fix, but it requires removing all copy user and page faults and to replace them with get_dump_page() for all binary formats which is not suitable as a short term fix. For the time being this solution manually covers the places that can confuse the core dump either by altering the vma layout or the vma flags while it runs. Once ->core_dump runs under mmap_sem for writing the function mmget_still_valid() can be dropped. Allowing mmap_sem protected sections to run in parallel with the coredump provides some minor parallelism advantage to the swapoff code (which seems to be safe enough by never mangling any vma field and can keep doing swapins in parallel to the core dumping) and to some other corner case. In order to facilitate the backporting I added "Fixes: 86039bd3b4e6" however the side effect of this same race condition in /proc/pid/mem should be reproducible since before 2.6.12-rc2 so I couldn't add any other "Fixes:" because there's no hash beyond the git genesis commit. Because find_extend_vma() is the only location outside of the process context that could modify the "mm" structures under mmap_sem for reading, by adding the mmget_still_valid() check to it, all other cases that take the mmap_sem for reading don't need the new check after mmget_not_zero()/get_task_mm(). The expand_stack() in page fault context also doesn't need the new check, because all tasks under core dumping are frozen. Link: http://lkml.kernel.org/r/20190325224949.11068-1-aarcange@redhat.com Fixes: 86039bd3b4e6 ("userfaultfd: add new syscall to provide memory externalization") Signed-off-by: Andrea Arcangeli Reported-by: Jann Horn Suggested-by: Oleg Nesterov Acked-by: Peter Xu Reviewed-by: Mike Rapoport Reviewed-by: Oleg Nesterov Reviewed-by: Jann Horn Acked-by: Jason Gunthorpe Acked-by: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/infiniband/core/uverbs_main.c | 3 +++ fs/proc/task_mmu.c | 18 ++++++++++++++++++ fs/userfaultfd.c | 9 +++++++++ include/linux/sched/mm.h | 21 +++++++++++++++++++++ mm/mmap.c | 7 ++++++- 5 files changed, 57 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 70b7d80431a9..f2e7ffe6fc54 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -993,6 +993,8 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) * will only be one mm, so no big deal. */ down_write(&mm->mmap_sem); + if (!mmget_still_valid(mm)) + goto skip_mm; mutex_lock(&ufile->umap_lock); list_for_each_entry_safe (priv, next_priv, &ufile->umaps, list) { @@ -1007,6 +1009,7 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE); } mutex_unlock(&ufile->umap_lock); + skip_mm: up_write(&mm->mmap_sem); mmput(mm); } diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 92a91e7816d8..95ca1fe7283c 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1143,6 +1143,24 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, count = -EINTR; goto out_mm; } + /* + * Avoid to modify vma->vm_flags + * without locked ops while the + * coredump reads the vm_flags. + */ + if (!mmget_still_valid(mm)) { + /* + * Silently return "count" + * like if get_task_mm() + * failed. FIXME: should this + * function have returned + * -ESRCH if get_task_mm() + * failed like if + * get_proc_task() fails? + */ + up_write(&mm->mmap_sem); + goto out_mm; + } for (vma = mm->mmap; vma; vma = vma->vm_next) { vma->vm_flags &= ~VM_SOFTDIRTY; vma_set_page_prot(vma); diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 89800fc7dc9d..f5de1e726356 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -629,6 +629,8 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, /* the various vma->vm_userfaultfd_ctx still points to it */ down_write(&mm->mmap_sem); + /* no task can run (and in turn coredump) yet */ + VM_WARN_ON(!mmget_still_valid(mm)); for (vma = mm->mmap; vma; vma = vma->vm_next) if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) { vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; @@ -883,6 +885,8 @@ static int userfaultfd_release(struct inode *inode, struct file *file) * taking the mmap_sem for writing. */ down_write(&mm->mmap_sem); + if (!mmget_still_valid(mm)) + goto skip_mm; prev = NULL; for (vma = mm->mmap; vma; vma = vma->vm_next) { cond_resched(); @@ -905,6 +909,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file) vma->vm_flags = new_flags; vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; } +skip_mm: up_write(&mm->mmap_sem); mmput(mm); wakeup: @@ -1333,6 +1338,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, goto out; down_write(&mm->mmap_sem); + if (!mmget_still_valid(mm)) + goto out_unlock; vma = find_vma_prev(mm, start, &prev); if (!vma) goto out_unlock; @@ -1520,6 +1527,8 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, goto out; down_write(&mm->mmap_sem); + if (!mmget_still_valid(mm)) + goto out_unlock; vma = find_vma_prev(mm, start, &prev); if (!vma) goto out_unlock; diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 0cd9f10423fb..a3fda9f024c3 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -49,6 +49,27 @@ static inline void mmdrop(struct mm_struct *mm) __mmdrop(mm); } +/* + * This has to be called after a get_task_mm()/mmget_not_zero() + * followed by taking the mmap_sem for writing before modifying the + * vmas or anything the coredump pretends not to change from under it. + * + * NOTE: find_extend_vma() called from GUP context is the only place + * that can modify the "mm" (notably the vm_start/end) under mmap_sem + * for reading and outside the context of the process, so it is also + * the only case that holds the mmap_sem for reading that must call + * this function. Generally if the mmap_sem is hold for reading + * there's no need of this check after get_task_mm()/mmget_not_zero(). + * + * This function can be obsoleted and the check can be removed, after + * the coredump code will hold the mmap_sem for writing before + * invoking the ->core_dump methods. + */ +static inline bool mmget_still_valid(struct mm_struct *mm) +{ + return likely(!mm->core_state); +} + /** * mmget() - Pin the address space associated with a &struct mm_struct. * @mm: The address space to pin. diff --git a/mm/mmap.c b/mm/mmap.c index 41eb48d9b527..bd7b9f293b39 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -2525,7 +2526,8 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr) vma = find_vma_prev(mm, addr, &prev); if (vma && (vma->vm_start <= addr)) return vma; - if (!prev || expand_stack(prev, addr)) + /* don't alter vm_end if the coredump is running */ + if (!prev || !mmget_still_valid(mm) || expand_stack(prev, addr)) return NULL; if (prev->vm_flags & VM_LOCKED) populate_vma_page_range(prev, addr, prev->vm_end, NULL); @@ -2551,6 +2553,9 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr) return vma; if (!(vma->vm_flags & VM_GROWSDOWN)) return NULL; + /* don't alter vm_start if the coredump is running */ + if (!mmget_still_valid(mm)) + return NULL; start = vma->vm_start; if (expand_stack(vma, addr)) return NULL; -- cgit v1.2.3 From 2ee27796f298b710992a677a7e4d35c8c588b17e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 30 Dec 2018 18:27:15 +0100 Subject: x86/cpu/intel: Lower the "ENERGY_PERF_BIAS: Set to normal" message's log priority The "ENERGY_PERF_BIAS: Set to 'normal', was 'performance'" message triggers on pretty much every Intel machine. The purpose of log messages with a warning level is to notify the user of something which potentially is a problem, or at least somewhat unexpected. This message clearly does not match those criteria, so lower its log priority from warning to info. Signed-off-by: Hans de Goede Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20181230172715.17469-1-hdegoede@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index fc3c07fe7df5..3142fd7a9b32 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -611,8 +611,8 @@ static void init_intel_energy_perf(struct cpuinfo_x86 *c) if ((epb & 0xF) != ENERGY_PERF_BIAS_PERFORMANCE) return; - pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n"); - pr_warn_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n"); + pr_info_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n"); + pr_info_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n"); epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL; wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); } -- cgit v1.2.3 From b40fabc05ea047f6af5933d26a5483873340b0d4 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 19 Apr 2019 10:31:27 +0800 Subject: block: kill all_q_node in request_queue all_q_node has not been used since commit 4b855ad37194 ("blk-mq: Create hctx for each present CPU"), so remove it. Reviewed-by: Chaitanya Kulkarni Reviewed-by: Ming Lei Signed-off-by: Hou Tao Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5c58a3b2bf00..317ab30d2904 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -548,7 +548,6 @@ struct request_queue { struct rcu_head rcu_head; wait_queue_head_t mq_freeze_wq; struct percpu_ref q_usage_counter; - struct list_head all_q_node; struct blk_mq_tag_set *tag_set; struct list_head tag_set_list; -- cgit v1.2.3 From 6bedf00e55e5dd0a4ed1ad3f06131edd6fb56ec8 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 17 Apr 2019 09:11:26 +0800 Subject: block: make sure that bvec length can't be overflow bvec->bv_offset may be bigger than PAGE_SIZE sometimes, such as, when one bio is splitted in the middle of one bvec via bio_split(), and bi_iter.bi_bvec_done is used to build offset of the 1st bvec of remained bio. And the remained bio's bvec may be re-submitted to fs layer via ITER_IBVEC, such as loop and nvme-loop. So we have to make sure that every bvec's offset is less than PAGE_SIZE from bio_for_each_segment_all() because some drivers(loop, nvme-loop) passes the splitted bvec to fs layer via ITER_BVEC. This patch fixes this issue reported by Zhang Yi When running nvme/011. Cc: Christoph Hellwig Cc: Yi Zhang Reported-by: Yi Zhang Reviewed-by: Christoph Hellwig Fixes: 6dc4f100c175 ("block: allow bio_for_each_segment_all() to iterate over multi-page bvec") Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/bvec.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 3bc91879e1e2..ff13cbc1887d 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -160,8 +160,9 @@ static inline void bvec_advance(const struct bio_vec *bvec, bv->bv_page = nth_page(bv->bv_page, 1); bv->bv_offset = 0; } else { - bv->bv_page = bvec->bv_page; - bv->bv_offset = bvec->bv_offset; + bv->bv_page = bvec_nth_page(bvec->bv_page, bvec->bv_offset / + PAGE_SIZE); + bv->bv_offset = bvec->bv_offset & ~PAGE_MASK; } bv->bv_len = min_t(unsigned int, PAGE_SIZE - bv->bv_offset, bvec->bv_len - iter_all->done); -- cgit v1.2.3 From 36ad7022536e0c65f8baeeaa5efde11dec44808a Mon Sep 17 00:00:00 2001 From: Petr Štetiar Date: Wed, 17 Apr 2019 22:09:12 +0200 Subject: of_net: Fix residues after of_get_nvmem_mac_address removal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I've discovered following discrepancy in the bindings/net/ethernet.txt documentation, where it states following: - nvmem-cells: phandle, reference to an nvmem node for the MAC address; - nvmem-cell-names: string, should be "mac-address" if nvmem is to be.. which is actually misleading and confusing. There are only two ethernet drivers in the tree, cadence/macb and davinci which supports this properties. This nvmem-cell* properties were introduced in commit 9217e566bdee ("of_net: Implement of_get_nvmem_mac_address helper"), but commit afa64a72b862 ("of: net: kill of_get_nvmem_mac_address()") forget to properly clean up this parts. So this patch fixes the documentation by moving the nvmem-cell* properties at the appropriate places. While at it, I've removed unused include as well. Cc: Bartosz Golaszewski Fixes: afa64a72b862 ("of: net: kill of_get_nvmem_mac_address()") Signed-off-by: Petr Štetiar Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/davinci_emac.txt | 2 ++ Documentation/devicetree/bindings/net/ethernet.txt | 2 -- Documentation/devicetree/bindings/net/macb.txt | 4 ++++ drivers/of/of_net.c | 1 - 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/net/davinci_emac.txt b/Documentation/devicetree/bindings/net/davinci_emac.txt index 24c5cdaba8d2..ca83dcc84fb8 100644 --- a/Documentation/devicetree/bindings/net/davinci_emac.txt +++ b/Documentation/devicetree/bindings/net/davinci_emac.txt @@ -20,6 +20,8 @@ Required properties: Optional properties: - phy-handle: See ethernet.txt file in the same directory. If absent, davinci_emac driver defaults to 100/FULL. +- nvmem-cells: phandle, reference to an nvmem node for the MAC address +- nvmem-cell-names: string, should be "mac-address" if nvmem is to be used - ti,davinci-rmii-en: 1 byte, 1 means use RMII - ti,davinci-no-bd-ram: boolean, does EMAC have BD RAM? diff --git a/Documentation/devicetree/bindings/net/ethernet.txt b/Documentation/devicetree/bindings/net/ethernet.txt index cfc376bc977a..2974e63ba311 100644 --- a/Documentation/devicetree/bindings/net/ethernet.txt +++ b/Documentation/devicetree/bindings/net/ethernet.txt @@ -10,8 +10,6 @@ Documentation/devicetree/bindings/phy/phy-bindings.txt. the boot program; should be used in cases where the MAC address assigned to the device by the boot program is different from the "local-mac-address" property; -- nvmem-cells: phandle, reference to an nvmem node for the MAC address; -- nvmem-cell-names: string, should be "mac-address" if nvmem is to be used; - max-speed: number, specifies maximum speed in Mbit/s supported by the device; - max-frame-size: number, maximum transfer unit (IEEE defined MTU), rather than the maximum frame size (there's contradiction in the Devicetree diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt index 174f292d8a3e..8b80515729d7 100644 --- a/Documentation/devicetree/bindings/net/macb.txt +++ b/Documentation/devicetree/bindings/net/macb.txt @@ -26,6 +26,10 @@ Required properties: Optional elements: 'tsu_clk' - clocks: Phandles to input clocks. +Optional properties: +- nvmem-cells: phandle, reference to an nvmem node for the MAC address +- nvmem-cell-names: string, should be "mac-address" if nvmem is to be used + Optional properties for PHY child node: - reset-gpios : Should specify the gpio for phy reset - magic-packet : If present, indicates that the hardware supports waking diff --git a/drivers/of/of_net.c b/drivers/of/of_net.c index 810ab0fbcccb..d820f3edd431 100644 --- a/drivers/of/of_net.c +++ b/drivers/of/of_net.c @@ -7,7 +7,6 @@ */ #include #include -#include #include #include #include -- cgit v1.2.3 From c2b71462d294cf517a0bc6e4fd6424d7cee5596f Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 19 Apr 2019 13:52:38 -0400 Subject: USB: core: Fix bug caused by duplicate interface PM usage counter The syzkaller fuzzer reported a bug in the USB hub driver which turned out to be caused by a negative runtime-PM usage counter. This allowed a hub to be runtime suspended at a time when the driver did not expect it. The symptom is a WARNING issued because the hub's status URB is submitted while it is already active: URB 0000000031fb463e submitted while active WARNING: CPU: 0 PID: 2917 at drivers/usb/core/urb.c:363 The negative runtime-PM usage count was caused by an unfortunate design decision made when runtime PM was first implemented for USB. At that time, USB class drivers were allowed to unbind from their interfaces without balancing the usage counter (i.e., leaving it with a positive count). The core code would take care of setting the counter back to 0 before allowing another driver to bind to the interface. Later on when runtime PM was implemented for the entire kernel, the opposite decision was made: Drivers were required to balance their runtime-PM get and put calls. In order to maintain backward compatibility, however, the USB subsystem adapted to the new implementation by keeping an independent usage counter for each interface and using it to automatically adjust the normal usage counter back to 0 whenever a driver was unbound. This approach involves duplicating information, but what is worse, it doesn't work properly in cases where a USB class driver delays decrementing the usage counter until after the driver's disconnect() routine has returned and the counter has been adjusted back to 0. Doing so would cause the usage counter to become negative. There's even a warning about this in the USB power management documentation! As it happens, this is exactly what the hub driver does. The kick_hub_wq() routine increments the runtime-PM usage counter, and the corresponding decrement is carried out by hub_event() in the context of the hub_wq work-queue thread. This work routine may sometimes run after the driver has been unbound from its interface, and when it does it causes the usage counter to go negative. It is not possible for hub_disconnect() to wait for a pending hub_event() call to finish, because hub_disconnect() is called with the device lock held and hub_event() acquires that lock. The only feasible fix is to reverse the original design decision: remove the duplicate interface-specific usage counter and require USB drivers to balance their runtime PM gets and puts. As far as I know, all existing drivers currently do this. Signed-off-by: Alan Stern Reported-and-tested-by: syzbot+7634edaea4d0b341c625@syzkaller.appspotmail.com CC: Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-api/usb/power-management.rst | 14 +++++++++----- drivers/usb/core/driver.c | 13 ------------- drivers/usb/storage/realtek_cr.c | 13 +++++-------- include/linux/usb.h | 2 -- 4 files changed, 14 insertions(+), 28 deletions(-) diff --git a/Documentation/driver-api/usb/power-management.rst b/Documentation/driver-api/usb/power-management.rst index 79beb807996b..4a74cf6f2797 100644 --- a/Documentation/driver-api/usb/power-management.rst +++ b/Documentation/driver-api/usb/power-management.rst @@ -370,11 +370,15 @@ autosuspend the interface's device. When the usage counter is = 0 then the interface is considered to be idle, and the kernel may autosuspend the device. -Drivers need not be concerned about balancing changes to the usage -counter; the USB core will undo any remaining "get"s when a driver -is unbound from its interface. As a corollary, drivers must not call -any of the ``usb_autopm_*`` functions after their ``disconnect`` -routine has returned. +Drivers must be careful to balance their overall changes to the usage +counter. Unbalanced "get"s will remain in effect when a driver is +unbound from its interface, preventing the device from going into +runtime suspend should the interface be bound to a driver again. On +the other hand, drivers are allowed to achieve this balance by calling +the ``usb_autopm_*`` functions even after their ``disconnect`` routine +has returned -- say from within a work-queue routine -- provided they +retain an active reference to the interface (via ``usb_get_intf`` and +``usb_put_intf``). Drivers using the async routines are responsible for their own synchronization and mutual exclusion. diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index 8987cec9549d..ebcadaad89d1 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -473,11 +473,6 @@ static int usb_unbind_interface(struct device *dev) pm_runtime_disable(dev); pm_runtime_set_suspended(dev); - /* Undo any residual pm_autopm_get_interface_* calls */ - for (r = atomic_read(&intf->pm_usage_cnt); r > 0; --r) - usb_autopm_put_interface_no_suspend(intf); - atomic_set(&intf->pm_usage_cnt, 0); - if (!error) usb_autosuspend_device(udev); @@ -1633,7 +1628,6 @@ void usb_autopm_put_interface(struct usb_interface *intf) int status; usb_mark_last_busy(udev); - atomic_dec(&intf->pm_usage_cnt); status = pm_runtime_put_sync(&intf->dev); dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n", __func__, atomic_read(&intf->dev.power.usage_count), @@ -1662,7 +1656,6 @@ void usb_autopm_put_interface_async(struct usb_interface *intf) int status; usb_mark_last_busy(udev); - atomic_dec(&intf->pm_usage_cnt); status = pm_runtime_put(&intf->dev); dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n", __func__, atomic_read(&intf->dev.power.usage_count), @@ -1684,7 +1677,6 @@ void usb_autopm_put_interface_no_suspend(struct usb_interface *intf) struct usb_device *udev = interface_to_usbdev(intf); usb_mark_last_busy(udev); - atomic_dec(&intf->pm_usage_cnt); pm_runtime_put_noidle(&intf->dev); } EXPORT_SYMBOL_GPL(usb_autopm_put_interface_no_suspend); @@ -1715,8 +1707,6 @@ int usb_autopm_get_interface(struct usb_interface *intf) status = pm_runtime_get_sync(&intf->dev); if (status < 0) pm_runtime_put_sync(&intf->dev); - else - atomic_inc(&intf->pm_usage_cnt); dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n", __func__, atomic_read(&intf->dev.power.usage_count), status); @@ -1750,8 +1740,6 @@ int usb_autopm_get_interface_async(struct usb_interface *intf) status = pm_runtime_get(&intf->dev); if (status < 0 && status != -EINPROGRESS) pm_runtime_put_noidle(&intf->dev); - else - atomic_inc(&intf->pm_usage_cnt); dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n", __func__, atomic_read(&intf->dev.power.usage_count), status); @@ -1775,7 +1763,6 @@ void usb_autopm_get_interface_no_resume(struct usb_interface *intf) struct usb_device *udev = interface_to_usbdev(intf); usb_mark_last_busy(udev); - atomic_inc(&intf->pm_usage_cnt); pm_runtime_get_noresume(&intf->dev); } EXPORT_SYMBOL_GPL(usb_autopm_get_interface_no_resume); diff --git a/drivers/usb/storage/realtek_cr.c b/drivers/usb/storage/realtek_cr.c index 31b024441938..cc794e25a0b6 100644 --- a/drivers/usb/storage/realtek_cr.c +++ b/drivers/usb/storage/realtek_cr.c @@ -763,18 +763,16 @@ static void rts51x_suspend_timer_fn(struct timer_list *t) break; case RTS51X_STAT_IDLE: case RTS51X_STAT_SS: - usb_stor_dbg(us, "RTS51X_STAT_SS, intf->pm_usage_cnt:%d, power.usage:%d\n", - atomic_read(&us->pusb_intf->pm_usage_cnt), + usb_stor_dbg(us, "RTS51X_STAT_SS, power.usage:%d\n", atomic_read(&us->pusb_intf->dev.power.usage_count)); - if (atomic_read(&us->pusb_intf->pm_usage_cnt) > 0) { + if (atomic_read(&us->pusb_intf->dev.power.usage_count) > 0) { usb_stor_dbg(us, "Ready to enter SS state\n"); rts51x_set_stat(chip, RTS51X_STAT_SS); /* ignore mass storage interface's children */ pm_suspend_ignore_children(&us->pusb_intf->dev, true); usb_autopm_put_interface_async(us->pusb_intf); - usb_stor_dbg(us, "RTS51X_STAT_SS 01, intf->pm_usage_cnt:%d, power.usage:%d\n", - atomic_read(&us->pusb_intf->pm_usage_cnt), + usb_stor_dbg(us, "RTS51X_STAT_SS 01, power.usage:%d\n", atomic_read(&us->pusb_intf->dev.power.usage_count)); } break; @@ -807,11 +805,10 @@ static void rts51x_invoke_transport(struct scsi_cmnd *srb, struct us_data *us) int ret; if (working_scsi(srb)) { - usb_stor_dbg(us, "working scsi, intf->pm_usage_cnt:%d, power.usage:%d\n", - atomic_read(&us->pusb_intf->pm_usage_cnt), + usb_stor_dbg(us, "working scsi, power.usage:%d\n", atomic_read(&us->pusb_intf->dev.power.usage_count)); - if (atomic_read(&us->pusb_intf->pm_usage_cnt) <= 0) { + if (atomic_read(&us->pusb_intf->dev.power.usage_count) <= 0) { ret = usb_autopm_get_interface(us->pusb_intf); usb_stor_dbg(us, "working scsi, ret=%d\n", ret); } diff --git a/include/linux/usb.h b/include/linux/usb.h index 5e49e82c4368..ff010d1fd1c7 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -200,7 +200,6 @@ usb_find_last_int_out_endpoint(struct usb_host_interface *alt, * @dev: driver model's view of this device * @usb_dev: if an interface is bound to the USB major, this will point * to the sysfs representation for that device. - * @pm_usage_cnt: PM usage counter for this interface * @reset_ws: Used for scheduling resets from atomic context. * @resetting_device: USB core reset the device, so use alt setting 0 as * current; needs bandwidth alloc after reset. @@ -257,7 +256,6 @@ struct usb_interface { struct device dev; /* interface specific device info */ struct device *usb_dev; - atomic_t pm_usage_cnt; /* usage counter for autosuspend */ struct work_struct reset_ws; /* for resets in atomic context */ }; #define to_usb_interface(d) container_of(d, struct usb_interface, dev) -- cgit v1.2.3 From 12fc512f5741443a03adde2ead20724da8ad550a Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Fri, 15 Mar 2019 16:41:43 +0200 Subject: net/mlx5e: Fix use-after-free after xdp_return_frame xdp_return_frame releases the frame. It leads to releasing the page, so it's not allowed to access xdpi.xdpf->len after that, because xdpi.xdpf is at xdp->data_hard_start after convert_to_xdp_frame. This patch moves the memory access to precede the return of the frame. Fixes: 58b99ee3e3ebe ("net/mlx5e: Add support for XDP_REDIRECT in device-out side") Signed-off-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 03b2a9f9c589..10a99cd3e598 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -304,9 +304,9 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) mlx5e_xdpi_fifo_pop(xdpi_fifo); if (is_redirect) { - xdp_return_frame(xdpi.xdpf); dma_unmap_single(sq->pdev, xdpi.dma_addr, xdpi.xdpf->len, DMA_TO_DEVICE); + xdp_return_frame(xdpi.xdpf); } else { /* Recycle RX page */ mlx5e_page_release(rq, &xdpi.di, true); @@ -345,9 +345,9 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq) mlx5e_xdpi_fifo_pop(xdpi_fifo); if (is_redirect) { - xdp_return_frame(xdpi.xdpf); dma_unmap_single(sq->pdev, xdpi.dma_addr, xdpi.xdpf->len, DMA_TO_DEVICE); + xdp_return_frame(xdpi.xdpf); } else { /* Recycle RX page */ mlx5e_page_release(rq, &xdpi.di, false); -- cgit v1.2.3 From d460c2718906252a2a69bc6f89b537071f792e6e Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Mon, 8 Apr 2019 15:12:45 +0300 Subject: net/mlx5e: Fix the max MTU check in case of XDP MLX5E_XDP_MAX_MTU was calculated incorrectly. It didn't account for NET_IP_ALIGN and MLX5E_HW2SW_MTU, and it also misused MLX5_SKB_FRAG_SZ. This commit fixes the calculations and adds a brief explanation for the formula used. Fixes: a26a5bdf3ee2d ("net/mlx5e: Restrict the combination of large MTU and XDP") Signed-off-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 20 ++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h | 3 +-- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 5 +++-- 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 10a99cd3e598..cad34d6f5f45 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -33,6 +33,26 @@ #include #include "en/xdp.h" +int mlx5e_xdp_max_mtu(struct mlx5e_params *params) +{ + int hr = NET_IP_ALIGN + XDP_PACKET_HEADROOM; + + /* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)). + * The condition checked in mlx5e_rx_is_linear_skb is: + * SKB_DATA_ALIGN(sw_mtu + hard_mtu + hr) + S <= PAGE_SIZE (1) + * (Note that hw_mtu == sw_mtu + hard_mtu.) + * What is returned from this function is: + * max_mtu = PAGE_SIZE - S - hr - hard_mtu (2) + * After assigning sw_mtu := max_mtu, the left side of (1) turns to + * SKB_DATA_ALIGN(PAGE_SIZE - S) + S, which is equal to PAGE_SIZE, + * because both PAGE_SIZE and S are already aligned. Any number greater + * than max_mtu would make the left side of (1) greater than PAGE_SIZE, + * so max_mtu is the maximum MTU allowed. + */ + + return MLX5E_HW2SW_MTU(params, SKB_MAX_HEAD(hr)); +} + static inline bool mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di, struct xdp_buff *xdp) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index ee27a7c8cd87..553956cadc8a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -34,13 +34,12 @@ #include "en.h" -#define MLX5E_XDP_MAX_MTU ((int)(PAGE_SIZE - \ - MLX5_SKB_FRAG_SZ(XDP_PACKET_HEADROOM))) #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN) #define MLX5E_XDP_TX_EMPTY_DS_COUNT \ (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) #define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */) +int mlx5e_xdp_max_mtu(struct mlx5e_params *params); bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, void *va, u16 *rx_headroom, u32 *len); bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index f7eb521db580..46157e2a1e5a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3777,7 +3777,7 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, if (params->xdp_prog && !mlx5e_rx_is_linear_skb(priv->mdev, &new_channels.params)) { netdev_err(netdev, "MTU(%d) > %d is not allowed while XDP enabled\n", - new_mtu, MLX5E_XDP_MAX_MTU); + new_mtu, mlx5e_xdp_max_mtu(params)); err = -EINVAL; goto out; } @@ -4212,7 +4212,8 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog) if (!mlx5e_rx_is_linear_skb(priv->mdev, &new_channels.params)) { netdev_warn(netdev, "XDP is not allowed with MTU(%d) > %d\n", - new_channels.params.sw_mtu, MLX5E_XDP_MAX_MTU); + new_channels.params.sw_mtu, + mlx5e_xdp_max_mtu(&new_channels.params)); return -EINVAL; } -- cgit v1.2.3 From ace329f4ab3ba434be2adf618073c752d083b524 Mon Sep 17 00:00:00 2001 From: Erez Alfasi Date: Thu, 11 Apr 2019 10:41:03 +0300 Subject: net/mlx5e: ethtool, Remove unsupported SFP EEPROM high pages query Querying EEPROM high pages data for SFP module is currently not supported by our driver and yet queried, resulting in invalid FW queries. Set the EEPROM ethtool data length to 256 for SFP module will limit the reading for page 0 only and prevent invalid FW queries. Fixes: bb64143eee8c ("net/mlx5e: Add ethtool support for dump module EEPROM") Signed-off-by: Erez Alfasi Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/port.c | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 76a3d01a489e..78dc8fe2a83c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1586,7 +1586,7 @@ static int mlx5e_get_module_info(struct net_device *netdev, break; case MLX5_MODULE_ID_SFP: modinfo->type = ETH_MODULE_SFF_8472; - modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + modinfo->eeprom_len = MLX5_EEPROM_PAGE_LENGTH; break; default: netdev_err(priv->netdev, "%s: cable type not recognized:0x%x\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 21b7f05b16a5..361468e0435d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -317,10 +317,6 @@ int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, size -= offset + size - MLX5_EEPROM_PAGE_LENGTH; i2c_addr = MLX5_I2C_ADDR_LOW; - if (offset >= MLX5_EEPROM_PAGE_LENGTH) { - i2c_addr = MLX5_I2C_ADDR_HIGH; - offset -= MLX5_EEPROM_PAGE_LENGTH; - } MLX5_SET(mcia_reg, in, l, 0); MLX5_SET(mcia_reg, in, module, module_num); -- cgit v1.2.3 From 30c04d796b693e22405c38e9b78e9a364e4c77e6 Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Thu, 18 Apr 2019 19:57:25 +0800 Subject: selftests/net: correct the return value for run_netsocktests The run_netsocktests will be marked as passed regardless the actual test result from the ./socket: selftests: net: run_netsocktests ======================================== -------------------- running socket test -------------------- [FAIL] ok 1..6 selftests: net: run_netsocktests [PASS] This is because the test script itself has been successfully executed. Fix this by exit 1 when the test failed. Signed-off-by: Po-Hsu Lin Signed-off-by: David S. Miller --- tools/testing/selftests/net/run_netsocktests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/run_netsocktests b/tools/testing/selftests/net/run_netsocktests index b093f39c298c..14e41faf2c57 100755 --- a/tools/testing/selftests/net/run_netsocktests +++ b/tools/testing/selftests/net/run_netsocktests @@ -7,7 +7,7 @@ echo "--------------------" ./socket if [ $? -ne 0 ]; then echo "[FAIL]" + exit 1 else echo "[PASS]" fi - -- cgit v1.2.3 From 925b0c841e066b488cc3a60272472b2c56300704 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 19 Apr 2019 14:31:00 +0800 Subject: team: fix possible recursive locking when add slaves If we add a bond device which is already the master of the team interface, we will hold the team->lock in team_add_slave() first and then request the lock in team_set_mac_address() again. The functions are called like: - team_add_slave() - team_port_add() - team_port_enter() - team_modeop_port_enter() - __set_port_dev_addr() - dev_set_mac_address() - bond_set_mac_address() - dev_set_mac_address() - team_set_mac_address Although team_upper_dev_link() would check the upper devices but it is called too late. Fix it by adding a checking before processing the slave. v2: Do not split the string in netdev_err() Fixes: 3d249d4ca7d0 ("net: introduce ethernet teaming device") Acked-by: Jiri Pirko Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- drivers/net/team/team.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 9ce61b019aad..16963f7a88f7 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1156,6 +1156,13 @@ static int team_port_add(struct team *team, struct net_device *port_dev, return -EINVAL; } + if (netdev_has_upper_dev(dev, port_dev)) { + NL_SET_ERR_MSG(extack, "Device is already an upper device of the team interface"); + netdev_err(dev, "Device %s is already an upper device of the team interface\n", + portname); + return -EBUSY; + } + if (port_dev->features & NETIF_F_VLAN_CHALLENGED && vlan_uses_dev(dev)) { NL_SET_ERR_MSG(extack, "Device is VLAN challenged and team device has VLAN set up"); -- cgit v1.2.3 From 8c03557c3f25271e62e39154af66ebdd1b59c9ca Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Fri, 19 Apr 2019 19:01:13 +0800 Subject: selftests/net: correct the return value for run_afpackettests The run_afpackettests will be marked as passed regardless the return value of those sub-tests in the script: -------------------- running psock_tpacket test -------------------- [FAIL] selftests: run_afpackettests [PASS] Fix this by changing the return value for each tests. Signed-off-by: Po-Hsu Lin Signed-off-by: David S. Miller --- tools/testing/selftests/net/run_afpackettests | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests index 2dc95fda7ef7..ea5938ec009a 100755 --- a/tools/testing/selftests/net/run_afpackettests +++ b/tools/testing/selftests/net/run_afpackettests @@ -6,12 +6,14 @@ if [ $(id -u) != 0 ]; then exit 0 fi +ret=0 echo "--------------------" echo "running psock_fanout test" echo "--------------------" ./in_netns.sh ./psock_fanout if [ $? -ne 0 ]; then echo "[FAIL]" + ret=1 else echo "[PASS]" fi @@ -22,6 +24,7 @@ echo "--------------------" ./in_netns.sh ./psock_tpacket if [ $? -ne 0 ]; then echo "[FAIL]" + ret=1 else echo "[PASS]" fi @@ -32,6 +35,8 @@ echo "--------------------" ./in_netns.sh ./txring_overwrite if [ $? -ne 0 ]; then echo "[FAIL]" + ret=1 else echo "[PASS]" fi +exit $ret -- cgit v1.2.3 From 62ef81d5632634d5e310ed25b9b940b2b6612b46 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 19 Apr 2019 16:51:38 -0700 Subject: net/tls: avoid potential deadlock in tls_set_device_offload_rx() If device supports offload, but offload fails tls_set_device_offload_rx() will call tls_sw_free_resources_rx() which (unhelpfully) releases and reacquires the socket lock. For a small fix release and reacquire the device_offload_lock. Fixes: 4799ac81e52a ("tls: Add rx inline crypto offload") Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Signed-off-by: David S. Miller --- net/tls/tls_device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 9f3bdbc1e593..1b5f55cac613 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -904,7 +904,9 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) goto release_netdev; free_sw_resources: + up_read(&device_offload_lock); tls_sw_free_resources_rx(sk); + down_read(&device_offload_lock); release_ctx: ctx->priv_ctx_rx = NULL; release_netdev: -- cgit v1.2.3 From 12c7686111326148b4b5db189130522a4ad1be4a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 19 Apr 2019 16:52:19 -0700 Subject: net/tls: don't leak IV and record seq when offload fails When device refuses the offload in tls_set_device_offload_rx() it calls tls_sw_free_resources_rx() to clean up software context state. Unfortunately, tls_sw_free_resources_rx() does not free all the state tls_set_sw_offload() allocated - it leaks IV and sequence number buffers. All other code paths which lead to tls_sw_release_resources_rx() (which tls_sw_free_resources_rx() calls) free those right before the call. Avoid the leak by moving freeing of iv and rec_seq into tls_sw_release_resources_rx(). Fixes: 4799ac81e52a ("tls: Add rx inline crypto offload") Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Signed-off-by: David S. Miller --- net/tls/tls_device.c | 2 -- net/tls/tls_main.c | 5 +---- net/tls/tls_sw.c | 3 +++ 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 1b5f55cac613..cc0256939eb6 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -941,8 +941,6 @@ void tls_device_offload_cleanup_rx(struct sock *sk) } out: up_read(&device_offload_lock); - kfree(tls_ctx->rx.rec_seq); - kfree(tls_ctx->rx.iv); tls_sw_release_resources_rx(sk); } diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 9547cea0ce3b..478603f43964 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -293,11 +293,8 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) #endif } - if (ctx->rx_conf == TLS_SW) { - kfree(ctx->rx.rec_seq); - kfree(ctx->rx.iv); + if (ctx->rx_conf == TLS_SW) tls_sw_free_resources_rx(sk); - } #ifdef CONFIG_TLS_DEVICE if (ctx->rx_conf == TLS_HW) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index b50ced862f6f..29d6af43dd24 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2078,6 +2078,9 @@ void tls_sw_release_resources_rx(struct sock *sk) struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); + kfree(tls_ctx->rx.rec_seq); + kfree(tls_ctx->rx.iv); + if (ctx->aead_recv) { kfree_skb(ctx->recv_pkt); ctx->recv_pkt = NULL; -- cgit v1.2.3 From 085b7755808aa11f78ab9377257e1dad2e6fa4bb Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 21 Apr 2019 10:45:57 -0700 Subject: Linux 5.1-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 71fd5c2ce067..abe13538a8c0 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Shy Crocodile # *DOCUMENTATION* -- cgit v1.2.3 From 39420fe04f093c15e1674ef56dbae0df109738ec Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Sat, 20 Apr 2019 18:14:33 +0000 Subject: dt-bindings: add an explanation for internal phy-mode When working on the Allwinner internal PHY, the first work was to use the "internal" mode, but some answer was made my mail on what are really internal mean for PHY. This patch write that in the doc. Signed-off-by: Corentin Labbe Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/ethernet.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/ethernet.txt b/Documentation/devicetree/bindings/net/ethernet.txt index 2974e63ba311..a68621580584 100644 --- a/Documentation/devicetree/bindings/net/ethernet.txt +++ b/Documentation/devicetree/bindings/net/ethernet.txt @@ -16,7 +16,8 @@ Documentation/devicetree/bindings/phy/phy-bindings.txt. Specification). - phy-mode: string, operation mode of the PHY interface. This is now a de-facto standard property; supported values are: - * "internal" + * "internal" (Internal means there is not a standard bus between the MAC and + the PHY, something proprietary is being used to embed the PHY in the MAC.) * "mii" * "gmii" * "sgmii" -- cgit v1.2.3 From 26d1b8586b4fe14814ff4fd471cfc56014359e59 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Sat, 20 Apr 2019 16:43:01 +0000 Subject: Documentation: decnet: remove reference to CONFIG_DECNET_ROUTE_FWMARK CONFIG_DECNET_ROUTE_FWMARK was removed in commit 47dcf0cb1005 ("[NET]: Rethink mark field in struct flowi") Since nothing replace it (and nothindg need to replace it, simply remove it from documentation. Signed-off-by: Corentin Labbe Signed-off-by: David S. Miller --- Documentation/networking/decnet.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/Documentation/networking/decnet.txt b/Documentation/networking/decnet.txt index e12a4900cf72..d192f8b9948b 100644 --- a/Documentation/networking/decnet.txt +++ b/Documentation/networking/decnet.txt @@ -22,8 +22,6 @@ you'll need the following options as well... CONFIG_DECNET_ROUTER (to be able to add/delete routes) CONFIG_NETFILTER (will be required for the DECnet routing daemon) - CONFIG_DECNET_ROUTE_FWMARK is optional - Don't turn on SIOCGIFCONF support for DECnet unless you are really sure that you need it, in general you won't and it can cause ifconfig to malfunction. -- cgit v1.2.3 From 7caa56f006e9d712b44f27b32520c66420d5cbc6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 15 Apr 2019 00:43:00 +0200 Subject: netfilter: ebtables: CONFIG_COMPAT: drop a bogus WARN_ON It means userspace gave us a ruleset where there is some other data after the ebtables target but before the beginning of the next rule. Fixes: 81e675c227ec ("netfilter: ebtables: add CONFIG_COMPAT support") Reported-by: syzbot+659574e7bcc7f7eb4df7@syzkaller.appspotmail.com Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebtables.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index eb15891f8b9f..3cad01ac64e4 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -2032,7 +2032,8 @@ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32, if (match_kern) match_kern->match_size = ret; - if (WARN_ON(type == EBT_COMPAT_TARGET && size_left)) + /* rule should have no remaining data after target */ + if (type == EBT_COMPAT_TARGET && size_left) return -EINVAL; match32 = (struct compat_ebt_entry_mwt *) buf; -- cgit v1.2.3 From 916f6efae62305796e012e7c3a7884a267cbacbf Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 17 Apr 2019 02:17:23 +0200 Subject: netfilter: never get/set skb->tstamp setting net.netfilter.nf_conntrack_timestamp=1 breaks xmit with fq scheduler. skb->tstamp might be "refreshed" using ktime_get_real(), but fq expects CLOCK_MONOTONIC. This patch removes all places in netfilter that check/set skb->tstamp: 1. To fix the bogus "start" time seen with conntrack timestamping for outgoing packets, never use skb->tstamp and always use current time. 2. In nfqueue and nflog, only use skb->tstamp for incoming packets, as determined by current hook (prerouting, input, forward). 3. xt_time has to use system clock as well rather than skb->tstamp. We could still use skb->tstamp for prerouting/input/foward, but I see no advantage to make this conditional. Fixes: fb420d5d91c1 ("tcp/fq: move back to CLOCK_MONOTONIC") Cc: Eric Dumazet Reported-by: Michal Soltys Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 7 ++----- net/netfilter/nfnetlink_log.c | 2 +- net/netfilter/nfnetlink_queue.c | 2 +- net/netfilter/xt_time.c | 23 ++++++++++++++--------- 4 files changed, 18 insertions(+), 16 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 3c48d44d6fff..2a714527cde1 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1017,12 +1017,9 @@ __nf_conntrack_confirm(struct sk_buff *skb) /* set conntrack timestamp, if enabled. */ tstamp = nf_conn_tstamp_find(ct); - if (tstamp) { - if (skb->tstamp == 0) - __net_timestamp(skb); + if (tstamp) + tstamp->start = ktime_get_real_ns(); - tstamp->start = ktime_to_ns(skb->tstamp); - } /* Since the lookup is lockless, hash insertion must be done after * starting the timer and setting the CONFIRMED bit. The RCU barriers * guarantee that no other CPU can find the conntrack before the above diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index b1f9c5303f02..0b3347570265 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -540,7 +540,7 @@ __build_packet_message(struct nfnl_log_net *log, goto nla_put_failure; } - if (skb->tstamp) { + if (hooknum <= NF_INET_FORWARD && skb->tstamp) { struct nfulnl_msg_packet_timestamp ts; struct timespec64 kts = ktime_to_timespec64(skb->tstamp); ts.sec = cpu_to_be64(kts.tv_sec); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 0dcc3592d053..e057b2961d31 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -582,7 +582,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, if (nfqnl_put_bridge(entry, skb) < 0) goto nla_put_failure; - if (entskb->tstamp) { + if (entry->state.hook <= NF_INET_FORWARD && entskb->tstamp) { struct nfqnl_msg_packet_timestamp ts; struct timespec64 kts = ktime_to_timespec64(entskb->tstamp); diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c index c13bcd0ab491..8dbb4d48f2ed 100644 --- a/net/netfilter/xt_time.c +++ b/net/netfilter/xt_time.c @@ -163,19 +163,24 @@ time_mt(const struct sk_buff *skb, struct xt_action_param *par) s64 stamp; /* - * We cannot use get_seconds() instead of __net_timestamp() here. + * We need real time here, but we can neither use skb->tstamp + * nor __net_timestamp(). + * + * skb->tstamp and skb->skb_mstamp_ns overlap, however, they + * use different clock types (real vs monotonic). + * * Suppose you have two rules: - * 1. match before 13:00 - * 2. match after 13:00 + * 1. match before 13:00 + * 2. match after 13:00 + * * If you match against processing time (get_seconds) it * may happen that the same packet matches both rules if - * it arrived at the right moment before 13:00. + * it arrived at the right moment before 13:00, so it would be + * better to check skb->tstamp and set it via __net_timestamp() + * if needed. This however breaks outgoing packets tx timestamp, + * and causes them to get delayed forever by fq packet scheduler. */ - if (skb->tstamp == 0) - __net_timestamp((struct sk_buff *)skb); - - stamp = ktime_to_ns(skb->tstamp); - stamp = div_s64(stamp, NSEC_PER_SEC); + stamp = get_seconds(); if (info->flags & XT_TIME_LOCAL_TZ) /* Adjust for local timezone */ -- cgit v1.2.3 From d48668052b2603b6262459625c86108c493588dd Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 17 Apr 2019 09:49:44 -0700 Subject: netfilter: fix nf_l4proto_log_invalid to log invalid packets It doesn't log a packet if sysctl_log_invalid isn't equal to protonum OR sysctl_log_invalid isn't equal to IPPROTO_RAW. This sentence is always true. I believe we need to replace OR to AND. Cc: Florian Westphal Fixes: c4f3db1595827 ("netfilter: conntrack: add and use nf_l4proto_log_invalid") Signed-off-by: Andrei Vagin Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index b9403a266a2e..37bb530d848f 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -55,7 +55,7 @@ void nf_l4proto_log_invalid(const struct sk_buff *skb, struct va_format vaf; va_list args; - if (net->ct.sysctl_log_invalid != protonum || + if (net->ct.sysctl_log_invalid != protonum && net->ct.sysctl_log_invalid != IPPROTO_RAW) return; -- cgit v1.2.3 From 36f0c423552dacaca152324b8e9bda42a6d88865 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 19 Apr 2019 15:40:14 +0200 Subject: x86/boot: Disable RSDP parsing temporarily The original intention to move RDSP parsing very early, before KASLR does its ranges selection, was to accommodate movable memory regions machines (CONFIG_MEMORY_HOTREMOVE) to still be able to do memory hotplug. However, that broke kexec'ing a kernel on EFI machines because depending on where the EFI systab was mapped, on at least one machine it isn't present in the kexec mapping of the second kernel, leading to a triple fault in the early code. Fixing this properly requires significantly involved surgery and we cannot allow ourselves to do that, that close to the merge window. So disable the RSDP parsing code temporarily until it is fixed properly in the next release cycle. Signed-off-by: Borislav Petkov Cc: Ard Biesheuvel Cc: Baoquan He Cc: Chao Fan Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: indou.takao@jp.fujitsu.com Cc: Ingo Molnar Cc: Juergen Gross Cc: kasong@redhat.com Cc: Kees Cook Cc: "Kirill A. Shutemov" Cc: msys.mizuma@gmail.com Cc: Thomas Gleixner Cc: Tom Lendacky Cc: x86-ml Link: https://lkml.kernel.org/r/20190419141952.GE10324@zn.tnic --- arch/x86/boot/compressed/misc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index c0d6c560df69..5a237e8dbf8d 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -352,7 +352,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, boot_params->hdr.loadflags &= ~KASLR_FLAG; /* Save RSDP address for later use. */ - boot_params->acpi_rsdp_addr = get_rsdp_addr(); + /* boot_params->acpi_rsdp_addr = get_rsdp_addr(); */ sanitize_boot_params(boot_params); -- cgit v1.2.3 From 35fa71a030caa50458a043560d4814ea9bcd639f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 22 Apr 2019 10:23:23 -0600 Subject: io_uring: fail io_uring_register(2) on a dying io_uring instance If we have multiple threads doing io_uring_register(2) on an io_uring fd, then we can potentially try and kill the percpu reference while someone else has already killed it. Prevent this race by failing io_uring_register(2) if the ref is marked dying. This is safe since we're inside the io_uring mutex. Fixes: b19062a56726 ("io_uring: fix possible deadlock between io_uring_{enter,register}") Reported-by: syzbot Signed-off-by: Jens Axboe --- fs/io_uring.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index f65f85d89217..a2f39faed6a7 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2934,6 +2934,14 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, { int ret; + /* + * We're inside the ring mutex, if the ref is already dying, then + * someone else killed the ctx or is already going through + * io_uring_register(). + */ + if (percpu_ref_is_dying(&ctx->refs)) + return -ENXIO; + percpu_ref_kill(&ctx->refs); /* -- cgit v1.2.3 From e523a29c4f2703bdb98f68ce1bb256e259fd8d5f Mon Sep 17 00:00:00 2001 From: Stefan Bühler Date: Fri, 19 Apr 2019 11:57:44 +0200 Subject: io_uring: fix race condition reading SQ entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A read memory barrier is required between reading SQ tail and reading the actual data belonging to the SQ entry. Userspace needs a matching write barrier between writing SQ entries and updating SQ tail (using smp_store_release to update tail will do). Signed-off-by: Stefan Bühler Signed-off-by: Jens Axboe --- fs/io_uring.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index a2f39faed6a7..41e3a6f6a096 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1739,7 +1739,8 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s) head = ctx->cached_sq_head; /* See comment at the top of this file */ smp_rmb(); - if (head == READ_ONCE(ring->r.tail)) + /* make sure SQ entry isn't read before tail */ + if (head == smp_load_acquire(&ring->r.tail)) return false; head = READ_ONCE(ring->array[head & ctx->sq_mask]); -- cgit v1.2.3 From 0d7bae69c574c5f25802f8a71252e7d66933a3ab Mon Sep 17 00:00:00 2001 From: Stefan Bühler Date: Fri, 19 Apr 2019 11:57:45 +0200 Subject: io_uring: fix race condition when sq threads goes sleeping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reading the SQ tail needs to come after setting IORING_SQ_NEED_WAKEUP in flags; there is no cheap barrier for ordering a store before a load, a full memory barrier is required. Userspace needs a full memory barrier between updating SQ tail and checking for the IORING_SQ_NEED_WAKEUP too. Signed-off-by: Stefan Bühler Signed-off-by: Jens Axboe --- fs/io_uring.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 41e3a6f6a096..69910fd9ccca 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1865,7 +1865,8 @@ static int io_sq_thread(void *data) /* Tell userspace we may need a wakeup call */ ctx->sq_ring->flags |= IORING_SQ_NEED_WAKEUP; - smp_wmb(); + /* make sure to read SQ tail after writing flags */ + smp_mb(); if (!io_get_sqring(ctx, &sqes[0])) { if (kthread_should_stop()) { -- cgit v1.2.3 From fb775faa9e46ff481e4ced11116c9bd45359cb43 Mon Sep 17 00:00:00 2001 From: Stefan Bühler Date: Fri, 19 Apr 2019 11:57:46 +0200 Subject: io_uring: fix poll full SQ detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit io_uring_poll shouldn't signal EPOLLOUT | EPOLLWRNORM if the queue is full; the old check would always signal EPOLLOUT | EPOLLWRNORM (unless there were U32_MAX - 1 entries in the SQ queue). Signed-off-by: Stefan Bühler Signed-off-by: Jens Axboe --- fs/io_uring.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 69910fd9ccca..b998e98acd01 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2576,7 +2576,8 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait) poll_wait(file, &ctx->cq_wait, wait); /* See comment at the top of this file */ smp_rmb(); - if (READ_ONCE(ctx->sq_ring->r.tail) + 1 != ctx->cached_sq_head) + if (READ_ONCE(ctx->sq_ring->r.tail) - ctx->cached_sq_head != + ctx->sq_ring->ring_entries) mask |= EPOLLOUT | EPOLLWRNORM; if (READ_ONCE(ctx->cq_ring->r.head) != ctx->cached_cq_tail) mask |= EPOLLIN | EPOLLRDNORM; -- cgit v1.2.3 From 6aaafc43a4ecc5bc8a3f6a2811d5eddc996a97f3 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 22 Apr 2019 12:34:23 -0400 Subject: nfsd: wake waiters blocked on file_lock before deleting it After a blocked nfsd file_lock request is deleted, knfsd will send a callback to the client and then free the request. Commit 16306a61d3b7 ("fs/locks: always delete_block after waiting.") changed it such that locks_delete_block is always called on a request after it is awoken, but that patch missed fixing up blocked nfsd request handling. Call locks_delete_block on the block to wake up any locks still blocked on the nfsd lock request before freeing it. Some of its callers already do this however, so just remove those calls. URL: https://bugzilla.kernel.org/show_bug.cgi?id=203363 Fixes: 16306a61d3b7 ("fs/locks: always delete_block after waiting.") Reported-by: Slawomir Pryczek Cc: Neil Brown Cc: stable@vger.kernel.org Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 6a45fb00c5fc..e87e15df2044 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -265,6 +265,7 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh, static void free_blocked_lock(struct nfsd4_blocked_lock *nbl) { + locks_delete_block(&nbl->nbl_lock); locks_release_private(&nbl->nbl_lock); kfree(nbl); } @@ -293,7 +294,6 @@ remove_blocked_locks(struct nfs4_lockowner *lo) nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock, nbl_lru); list_del_init(&nbl->nbl_lru); - locks_delete_block(&nbl->nbl_lock); free_blocked_lock(nbl); } } @@ -4863,7 +4863,6 @@ nfs4_laundromat(struct nfsd_net *nn) nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock, nbl_lru); list_del_init(&nbl->nbl_lru); - locks_delete_block(&nbl->nbl_lock); free_blocked_lock(nbl); } out: -- cgit v1.2.3 From f456458e4d25a8962d0946891617c76cc3ff5fb9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 22 Apr 2019 12:34:24 -0400 Subject: nfsd: wake blocked file lock waiters before sending callback When a blocked NFS lock is "awoken" we send a callback to the server and then wake any hosts waiting on it. If a client attempts to get a lock and then drops off the net, we could end up waiting for a long time until we end up waking locks blocked on that request. So, wake any other waiting lock requests before sending the callback. Do this by calling locks_delete_block in a new "prepare" phase for CB_NOTIFY_LOCK callbacks. URL: https://bugzilla.kernel.org/show_bug.cgi?id=203363 Fixes: 16306a61d3b7 ("fs/locks: always delete_block after waiting.") Reported-by: Slawomir Pryczek Cc: Neil Brown Cc: stable@vger.kernel.org Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e87e15df2044..f056b1d3fecd 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -298,6 +298,14 @@ remove_blocked_locks(struct nfs4_lockowner *lo) } } +static void +nfsd4_cb_notify_lock_prepare(struct nfsd4_callback *cb) +{ + struct nfsd4_blocked_lock *nbl = container_of(cb, + struct nfsd4_blocked_lock, nbl_cb); + locks_delete_block(&nbl->nbl_lock); +} + static int nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task) { @@ -325,6 +333,7 @@ nfsd4_cb_notify_lock_release(struct nfsd4_callback *cb) } static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = { + .prepare = nfsd4_cb_notify_lock_prepare, .done = nfsd4_cb_notify_lock_done, .release = nfsd4_cb_notify_lock_release, }; -- cgit v1.2.3 From b561af36b1841088552464cdc3f6371d92f17710 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Mon, 22 Apr 2019 15:15:32 +0530 Subject: net: stmmac: move stmmac_check_ether_addr() to driver probe stmmac_check_ether_addr() checks the MAC address and assigns one in driver open(). In many cases when we create slave netdevice, the dev addr is inherited from master but the master dev addr maybe NULL at that time, so move this call to driver probe so that address is always valid. Signed-off-by: Xiaofei Shen Tested-by: Xiaofei Shen Signed-off-by: Sneh Shah Signed-off-by: Vinod Koul Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index a26e36dbb5df..48712437d0da 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2616,8 +2616,6 @@ static int stmmac_open(struct net_device *dev) u32 chan; int ret; - stmmac_check_ether_addr(priv); - if (priv->hw->pcs != STMMAC_PCS_RGMII && priv->hw->pcs != STMMAC_PCS_TBI && priv->hw->pcs != STMMAC_PCS_RTBI) { @@ -4303,6 +4301,8 @@ int stmmac_dvr_probe(struct device *device, if (ret) goto error_hw_init; + stmmac_check_ether_addr(priv); + /* Configure real RX and TX queues */ netif_set_real_num_rx_queues(ndev, priv->plat->rx_queues_to_use); netif_set_real_num_tx_queues(ndev, priv->plat->tx_queues_to_use); -- cgit v1.2.3 From f147384774a7b24dda4783a3dcd61af272757ea8 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Mon, 1 Apr 2019 20:38:19 +0200 Subject: dmaengine: bcm2835: Avoid GFP_KERNEL in device_prep_slave_sg The commit af19b7ce76ba ("mmc: bcm2835: Avoid possible races on data requests") introduces a possible circular locking dependency, which is triggered by swapping to the sdhost interface. So instead of reintroduce the race condition again, we could also avoid this situation by using GFP_NOWAIT for the allocation of the DMA buffer descriptors. Reported-by: Aaro Koskinen Signed-off-by: Stefan Wahren Fixes: af19b7ce76ba ("mmc: bcm2835: Avoid possible races on data requests") Link: http://lists.infradead.org/pipermail/linux-rpi-kernel/2019-March/008615.html Signed-off-by: Vinod Koul --- drivers/dma/bcm2835-dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c index ec8a291d62ba..54093ffd0aef 100644 --- a/drivers/dma/bcm2835-dma.c +++ b/drivers/dma/bcm2835-dma.c @@ -671,7 +671,7 @@ static struct dma_async_tx_descriptor *bcm2835_dma_prep_slave_sg( d = bcm2835_dma_create_cb_chain(chan, direction, false, info, extra, frames, src, dst, 0, 0, - GFP_KERNEL); + GFP_NOWAIT); if (!d) return NULL; -- cgit v1.2.3 From 907bd68a2edc491849e2fdcfe52c4596627bca94 Mon Sep 17 00:00:00 2001 From: Dirk Behme Date: Fri, 12 Apr 2019 07:29:13 +0200 Subject: dmaengine: sh: rcar-dmac: With cyclic DMA residue 0 is valid Having a cyclic DMA, a residue 0 is not an indication of a completed DMA. In case of cyclic DMA make sure that dma_set_residue() is called and with this a residue of 0 is forwarded correctly to the caller. Fixes: 3544d2878817 ("dmaengine: rcar-dmac: use result of updated get_residue in tx_status") Signed-off-by: Dirk Behme Signed-off-by: Achim Dahlhoff Signed-off-by: Hiroyuki Yokoyama Signed-off-by: Yao Lihua Reviewed-by: Yoshihiro Shimoda Reviewed-by: Laurent Pinchart Cc: # v4.8+ Signed-off-by: Vinod Koul --- drivers/dma/sh/rcar-dmac.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c index 2b4f25698169..54810ffd95e2 100644 --- a/drivers/dma/sh/rcar-dmac.c +++ b/drivers/dma/sh/rcar-dmac.c @@ -1368,6 +1368,7 @@ static enum dma_status rcar_dmac_tx_status(struct dma_chan *chan, enum dma_status status; unsigned long flags; unsigned int residue; + bool cyclic; status = dma_cookie_status(chan, cookie, txstate); if (status == DMA_COMPLETE || !txstate) @@ -1375,10 +1376,11 @@ static enum dma_status rcar_dmac_tx_status(struct dma_chan *chan, spin_lock_irqsave(&rchan->lock, flags); residue = rcar_dmac_chan_get_residue(rchan, cookie); + cyclic = rchan->desc.running ? rchan->desc.running->cyclic : false; spin_unlock_irqrestore(&rchan->lock, flags); /* if there's no residue, the cookie is complete */ - if (!residue) + if (!residue && !cyclic) return DMA_COMPLETE; dma_set_residue(txstate, residue); -- cgit v1.2.3 From 6e7da74775348d96e2d7efaf3f91410e18c481ef Mon Sep 17 00:00:00 2001 From: Achim Dahlhoff Date: Fri, 12 Apr 2019 07:29:14 +0200 Subject: dmaengine: sh: rcar-dmac: Fix glitch in dmaengine_tx_status The tx_status poll in the rcar_dmac driver reads the status register which indicates which chunk is busy (DMACHCRB). Afterwards the point inside the chunk is read from DMATCRB. It is possible that the chunk has changed between the two reads. The result is a non-monotonous increase of the residue. Fix this by introducing a 'safe read' logic. Fixes: 73a47bd0da66 ("dmaengine: rcar-dmac: use TCRB instead of TCR for residue") Signed-off-by: Achim Dahlhoff Signed-off-by: Dirk Behme Reviewed-by: Yoshihiro Shimoda Cc: # v4.16+ Signed-off-by: Vinod Koul --- drivers/dma/sh/rcar-dmac.c | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c index 54810ffd95e2..e2a5398f89b5 100644 --- a/drivers/dma/sh/rcar-dmac.c +++ b/drivers/dma/sh/rcar-dmac.c @@ -1282,6 +1282,9 @@ static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan, enum dma_status status; unsigned int residue = 0; unsigned int dptr = 0; + unsigned int chcrb; + unsigned int tcrb; + unsigned int i; if (!desc) return 0; @@ -1329,6 +1332,24 @@ static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan, return 0; } + /* + * We need to read two registers. + * Make sure the control register does not skip to next chunk + * while reading the counter. + * Trying it 3 times should be enough: Initial read, retry, retry + * for the paranoid. + */ + for (i = 0; i < 3; i++) { + chcrb = rcar_dmac_chan_read(chan, RCAR_DMACHCRB) & + RCAR_DMACHCRB_DPTR_MASK; + tcrb = rcar_dmac_chan_read(chan, RCAR_DMATCRB); + /* Still the same? */ + if (chcrb == (rcar_dmac_chan_read(chan, RCAR_DMACHCRB) & + RCAR_DMACHCRB_DPTR_MASK)) + break; + } + WARN_ONCE(i >= 3, "residue might be not continuous!"); + /* * In descriptor mode the descriptor running pointer is not maintained * by the interrupt handler, find the running descriptor from the @@ -1336,8 +1357,7 @@ static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan, * mode just use the running descriptor pointer. */ if (desc->hwdescs.use) { - dptr = (rcar_dmac_chan_read(chan, RCAR_DMACHCRB) & - RCAR_DMACHCRB_DPTR_MASK) >> RCAR_DMACHCRB_DPTR_SHIFT; + dptr = chcrb >> RCAR_DMACHCRB_DPTR_SHIFT; if (dptr == 0) dptr = desc->nchunks; dptr--; @@ -1355,7 +1375,7 @@ static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan, } /* Add the residue for the current chunk. */ - residue += rcar_dmac_chan_read(chan, RCAR_DMATCRB) << desc->xfer_shift; + residue += tcrb << desc->xfer_shift; return residue; } -- cgit v1.2.3 From d4d18e3ec6091843f607e8929a56723e28f393a6 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Wed, 17 Apr 2019 21:29:29 -0700 Subject: arm64: mm: Ensure tail of unaligned initrd is reserved In the event that the start address of the initrd is not aligned, but has an aligned size, the base + size will not cover the entire initrd image and there is a chance that the kernel will corrupt the tail of the image. By aligning the end of the initrd to a page boundary and then subtracting the adjusted start address the memblock reservation will cover all pages that contains the initrd. Fixes: c756c592e442 ("arm64: Utilize phys_initrd_start/phys_initrd_size") Cc: stable@vger.kernel.org Acked-by: Will Deacon Signed-off-by: Bjorn Andersson Signed-off-by: Catalin Marinas --- arch/arm64/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 6bc135042f5e..7cae155e81a5 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -363,7 +363,7 @@ void __init arm64_memblock_init(void) * Otherwise, this is a no-op */ u64 base = phys_initrd_start & PAGE_MASK; - u64 size = PAGE_ALIGN(phys_initrd_size); + u64 size = PAGE_ALIGN(phys_initrd_start + phys_initrd_size) - base; /* * We can only add back the initrd memory if we don't end up -- cgit v1.2.3 From f1267cf3c01b12e0f843fb6a7450a7f0b2efab8a Mon Sep 17 00:00:00 2001 From: Bhagavathi Perumal S Date: Tue, 16 Apr 2019 12:54:40 +0530 Subject: mac80211: Fix kernel panic due to use of txq after free MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The txq of vif is added to active_txqs list for ATF TXQ scheduling in the function ieee80211_queue_skb(), but it was not properly removed before freeing the txq object. It was causing use after free of the txq objects from the active_txqs list, result was kernel panic due to invalid memory access. Fix kernel invalid memory access by properly removing txq object from active_txqs list before free the object. Signed-off-by: Bhagavathi Perumal S Acked-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 4a6ff1482a9f..02d2e6f11e93 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1908,6 +1908,9 @@ void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata) list_del_rcu(&sdata->list); mutex_unlock(&sdata->local->iflist_mtx); + if (sdata->vif.txq) + ieee80211_txq_purge(sdata->local, to_txq_info(sdata->vif.txq)); + synchronize_rcu(); if (sdata->dev) { -- cgit v1.2.3 From 8772eed9a95abd82cf188c93edb9645543ca4418 Mon Sep 17 00:00:00 2001 From: Sriram R Date: Tue, 16 Apr 2019 11:16:33 +0530 Subject: cfg80211: Notify previous user request during self managed wiphy registration Commit c82c06ce43d3("cfg80211: Notify all User Hints To self managed wiphys") notified all new user hints to self managed wiphy's after device registration. But it didn't do this for anything other than cell base hints done before registration. This needs to be done during wiphy registration of a self managed device also, so that the previous user settings are retained. Fixes: c82c06ce43d3 ("cfg80211: Notify all User Hints To self managed wiphys") Signed-off-by: Sriram R Signed-off-by: Johannes Berg --- net/wireless/reg.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 0ba778f371cb..a6fd5ce199da 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -3778,10 +3778,9 @@ void wiphy_regulatory_register(struct wiphy *wiphy) /* * The last request may have been received before this * registration call. Call the driver notifier if - * initiator is USER and user type is CELL_BASE. + * initiator is USER. */ - if (lr->initiator == NL80211_REGDOM_SET_BY_USER && - lr->user_reg_hint_type == NL80211_USER_REG_HINT_CELL_BASE) + if (lr->initiator == NL80211_REGDOM_SET_BY_USER) reg_call_notifier(wiphy, lr); } -- cgit v1.2.3 From 517879147493a5e1df6b89a50f708f1133fcaddb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 15 Apr 2019 11:39:33 +0200 Subject: mac80211: don't attempt to rename ERR_PTR() debugfs dirs We need to dereference the directory to get its parent to be able to rename it, so it's clearly not safe to try to do this with ERR_PTR() pointers. Skip in this case. It seems that this is most likely what was causing the report by syzbot, but I'm not entirely sure as it didn't come with a reproducer this time. Cc: stable@vger.kernel.org Reported-by: syzbot+4ece1a28b8f4730547c9@syzkaller.appspotmail.com Signed-off-by: Johannes Berg --- net/mac80211/debugfs_netdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index cff0fb3578c9..deb3faf08337 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -841,7 +841,7 @@ void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata) dir = sdata->vif.debugfs_dir; - if (!dir) + if (IS_ERR_OR_NULL(dir)) return; sprintf(buf, "netdev:%s", sdata->name); -- cgit v1.2.3 From 4e69ecf4da1ee0b2ac735e1f1bb13935acd5a38d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 12 Apr 2019 23:59:25 -0700 Subject: arm64/module: ftrace: deal with place relative nature of PLTs Another bodge for the ftrace PLT code: plt_entries_equal() now takes the place relative nature of the ADRP/ADD based PLT entries into account, which means that a struct trampoline instance on the stack is no longer equal to the same set of opcodes in the module struct, given that they don't point to the same place in memory anymore. Work around this by using memcmp() in the ftrace PLT handling code. Acked-by: Will Deacon Tested-by: dann frazier Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/kernel/ftrace.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 07b298120182..65a51331088e 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -103,10 +103,15 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) * to be revisited if support for multiple ftrace entry points * is added in the future, but for now, the pr_err() below * deals with a theoretical issue only. + * + * Note that PLTs are place relative, and plt_entries_equal() + * checks whether they point to the same target. Here, we need + * to check if the actual opcodes are in fact identical, + * regardless of the offset in memory so use memcmp() instead. */ trampoline = get_plt_entry(addr, mod->arch.ftrace_trampoline); - if (!plt_entries_equal(mod->arch.ftrace_trampoline, - &trampoline)) { + if (memcmp(mod->arch.ftrace_trampoline, &trampoline, + sizeof(trampoline))) { if (plt_entry_is_initialized(mod->arch.ftrace_trampoline)) { pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n"); return -EINVAL; -- cgit v1.2.3 From d4fad0a426c6e26f48c9a7cdd21a7fe9c198d645 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 12 Apr 2019 17:59:40 +0200 Subject: gpu: ipu-v3: dp: fix CSC handling Initialize the flow input colorspaces to unknown and reset to that value when the channel gets disabled. This avoids the state getting mixed up with a previous mode. Also keep the CSC settings for the background flow intact when disabling the foreground flow. Root-caused-by: Jonathan Marek Signed-off-by: Lucas Stach Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-dp.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/ipu-v3/ipu-dp.c b/drivers/gpu/ipu-v3/ipu-dp.c index 9b2b3fa479c4..5e44ff1f2085 100644 --- a/drivers/gpu/ipu-v3/ipu-dp.c +++ b/drivers/gpu/ipu-v3/ipu-dp.c @@ -195,7 +195,8 @@ int ipu_dp_setup_channel(struct ipu_dp *dp, ipu_dp_csc_init(flow, flow->foreground.in_cs, flow->out_cs, DP_COM_CONF_CSC_DEF_BOTH); } else { - if (flow->foreground.in_cs == flow->out_cs) + if (flow->foreground.in_cs == IPUV3_COLORSPACE_UNKNOWN || + flow->foreground.in_cs == flow->out_cs) /* * foreground identical to output, apply color * conversion on background @@ -261,6 +262,8 @@ void ipu_dp_disable_channel(struct ipu_dp *dp, bool sync) struct ipu_dp_priv *priv = flow->priv; u32 reg, csc; + dp->in_cs = IPUV3_COLORSPACE_UNKNOWN; + if (!dp->foreground) return; @@ -268,8 +271,9 @@ void ipu_dp_disable_channel(struct ipu_dp *dp, bool sync) reg = readl(flow->base + DP_COM_CONF); csc = reg & DP_COM_CONF_CSC_DEF_MASK; - if (csc == DP_COM_CONF_CSC_DEF_FG) - reg &= ~DP_COM_CONF_CSC_DEF_MASK; + reg &= ~DP_COM_CONF_CSC_DEF_MASK; + if (csc == DP_COM_CONF_CSC_DEF_BOTH || csc == DP_COM_CONF_CSC_DEF_BG) + reg |= DP_COM_CONF_CSC_DEF_BG; reg &= ~DP_COM_CONF_FG_EN; writel(reg, flow->base + DP_COM_CONF); @@ -347,6 +351,8 @@ int ipu_dp_init(struct ipu_soc *ipu, struct device *dev, unsigned long base) mutex_init(&priv->mutex); for (i = 0; i < IPUV3_NUM_FLOWS; i++) { + priv->flow[i].background.in_cs = IPUV3_COLORSPACE_UNKNOWN; + priv->flow[i].foreground.in_cs = IPUV3_COLORSPACE_UNKNOWN; priv->flow[i].foreground.foreground = true; priv->flow[i].base = priv->base + ipu_dp_flow_base[i]; priv->flow[i].priv = priv; -- cgit v1.2.3 From 7bcde275eb1d0ac8793c77c7e666a886eb16633d Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 12 Apr 2019 17:59:41 +0200 Subject: drm/imx: don't skip DP channel disable for background plane In order to make sure that the plane color space gets reset correctly. Signed-off-by: Lucas Stach Signed-off-by: Philipp Zabel --- drivers/gpu/drm/imx/ipuv3-crtc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c index ec3602ebbc1c..54011df8c2e8 100644 --- a/drivers/gpu/drm/imx/ipuv3-crtc.c +++ b/drivers/gpu/drm/imx/ipuv3-crtc.c @@ -71,7 +71,7 @@ static void ipu_crtc_disable_planes(struct ipu_crtc *ipu_crtc, if (disable_partial) ipu_plane_disable(ipu_crtc->plane[1], true); if (disable_full) - ipu_plane_disable(ipu_crtc->plane[0], false); + ipu_plane_disable(ipu_crtc->plane[0], true); } static void ipu_crtc_atomic_disable(struct drm_crtc *crtc, -- cgit v1.2.3 From 8358e3a8264a228cf2dfb6f3a05c0328f4118f12 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 23 Apr 2019 08:17:58 -0600 Subject: io_uring: remove 'state' argument from io_{read,write} path Since commit 09bb839434b we don't use the state argument for any sort of on-stack caching in the io read and write path. Remove the stale and unused argument from them, and bubble it up to __io_submit_sqe() and down to io_prep_rw(). Signed-off-by: Jens Axboe --- fs/io_uring.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index b998e98acd01..0e9fb2cb1984 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -740,7 +740,7 @@ static bool io_file_supports_async(struct file *file) } static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s, - bool force_nonblock, struct io_submit_state *state) + bool force_nonblock) { const struct io_uring_sqe *sqe = s->sqe; struct io_ring_ctx *ctx = req->ctx; @@ -938,7 +938,7 @@ static void io_async_list_note(int rw, struct io_kiocb *req, size_t len) } static int io_read(struct io_kiocb *req, const struct sqe_submit *s, - bool force_nonblock, struct io_submit_state *state) + bool force_nonblock) { struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct kiocb *kiocb = &req->rw; @@ -947,7 +947,7 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s, size_t iov_count; int ret; - ret = io_prep_rw(req, s, force_nonblock, state); + ret = io_prep_rw(req, s, force_nonblock); if (ret) return ret; file = kiocb->ki_filp; @@ -985,7 +985,7 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s, } static int io_write(struct io_kiocb *req, const struct sqe_submit *s, - bool force_nonblock, struct io_submit_state *state) + bool force_nonblock) { struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct kiocb *kiocb = &req->rw; @@ -994,7 +994,7 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s, size_t iov_count; int ret; - ret = io_prep_rw(req, s, force_nonblock, state); + ret = io_prep_rw(req, s, force_nonblock); if (ret) return ret; @@ -1336,8 +1336,7 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe) } static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, - const struct sqe_submit *s, bool force_nonblock, - struct io_submit_state *state) + const struct sqe_submit *s, bool force_nonblock) { int ret, opcode; @@ -1353,18 +1352,18 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, case IORING_OP_READV: if (unlikely(s->sqe->buf_index)) return -EINVAL; - ret = io_read(req, s, force_nonblock, state); + ret = io_read(req, s, force_nonblock); break; case IORING_OP_WRITEV: if (unlikely(s->sqe->buf_index)) return -EINVAL; - ret = io_write(req, s, force_nonblock, state); + ret = io_write(req, s, force_nonblock); break; case IORING_OP_READ_FIXED: - ret = io_read(req, s, force_nonblock, state); + ret = io_read(req, s, force_nonblock); break; case IORING_OP_WRITE_FIXED: - ret = io_write(req, s, force_nonblock, state); + ret = io_write(req, s, force_nonblock); break; case IORING_OP_FSYNC: ret = io_fsync(req, s->sqe, force_nonblock); @@ -1457,7 +1456,7 @@ restart: s->has_user = cur_mm != NULL; s->needs_lock = true; do { - ret = __io_submit_sqe(ctx, req, s, false, NULL); + ret = __io_submit_sqe(ctx, req, s, false); /* * We can get EAGAIN for polled IO even though * we're forcing a sync submission from here, @@ -1623,7 +1622,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, if (unlikely(ret)) goto out; - ret = __io_submit_sqe(ctx, req, s, true, state); + ret = __io_submit_sqe(ctx, req, s, true); if (ret == -EAGAIN) { struct io_uring_sqe *sqe_copy; -- cgit v1.2.3 From f5d356328d676deca698d01324000e0d98fba643 Mon Sep 17 00:00:00 2001 From: Jonathan Neuschäfer Date: Sat, 20 Apr 2019 14:50:50 +0200 Subject: drm/sched: Fix description of drm_sched_stop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 222b5f044159 ("drm/sched: Refactor ring mirror list handling."), drm_sched_hw_job_reset is no longer there, so let's adjust the doc comment accordingly. Reviewed-by: Andrey Grodzovsky Signed-off-by: Jonathan Neuschäfer Signed-off-by: Alex Deucher --- drivers/gpu/drm/scheduler/sched_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 19fc601c9eeb..a1bec2779e76 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -366,10 +366,9 @@ void drm_sched_increase_karma(struct drm_sched_job *bad) EXPORT_SYMBOL(drm_sched_increase_karma); /** - * drm_sched_hw_job_reset - stop the scheduler if it contains the bad job + * drm_sched_stop - stop the scheduler * * @sched: scheduler instance - * @bad: bad scheduler job * */ void drm_sched_stop(struct drm_gpu_scheduler *sched) -- cgit v1.2.3 From 503621628b32782a07b2318e4112bd4372aa3401 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 23 Apr 2019 17:09:38 +0100 Subject: ARM: fix function graph tracer and unwinder dependencies Naresh Kamboju recently reported that the function-graph tracer crashes on ARM. The function-graph tracer assumes that the kernel is built with frame pointers. We explicitly disabled the function-graph tracer when building Thumb2, since the Thumb2 ABI doesn't have frame pointers. We recently changed the way the unwinder method was selected, which seems to have made it more likely that we can end up with the function- graph tracer enabled but without the kernel built with frame pointers. Fix up the function graph tracer dependencies so the option is not available when we have no possibility of having frame pointers, and adjust the dependencies on the unwinder option to hide the non-frame pointer unwinder options if the function-graph tracer is enabled. Reviewed-by: Masami Hiramatsu Tested-by: Masami Hiramatsu Signed-off-by: Russell King --- arch/arm/Kconfig | 2 +- arch/arm/Kconfig.debug | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 054ead960f98..5bd8d781b9ff 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -73,7 +73,7 @@ config ARM select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU select HAVE_EXIT_THREAD select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL - select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL + select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG select HAVE_FUNCTION_TRACER if !XIP_KERNEL select HAVE_GCC_PLUGINS select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7) diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 6d6e0330930b..e388af4594a6 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -47,8 +47,8 @@ config DEBUG_WX choice prompt "Choose kernel unwinder" - default UNWINDER_ARM if AEABI && !FUNCTION_GRAPH_TRACER - default UNWINDER_FRAME_POINTER if !AEABI || FUNCTION_GRAPH_TRACER + default UNWINDER_ARM if AEABI + default UNWINDER_FRAME_POINTER if !AEABI help This determines which method will be used for unwinding kernel stack traces for panics, oopses, bugs, warnings, perf, /proc//stack, @@ -65,7 +65,7 @@ config UNWINDER_FRAME_POINTER config UNWINDER_ARM bool "ARM EABI stack unwinder" - depends on AEABI + depends on AEABI && !FUNCTION_GRAPH_TRACER select ARM_UNWIND help This option enables stack unwinding support in the kernel -- cgit v1.2.3 From c3143967807adb1357c36b68a7563fc0c4e1f615 Mon Sep 17 00:00:00 2001 From: Tigran Tadevosyan Date: Fri, 5 Apr 2019 14:16:13 +0100 Subject: ARM: 8856/1: NOMMU: Fix CCR register faulty initialization when MPU is disabled When CONFIG_ARM_MPU is not defined, the base address of v7M SCB register is not initialized with correct value. This prevents enabling I/D caches when the L1 cache poilcy is applied in kernel. Fixes: 3c24121039c9da14692eb48f6e39565b28c0f3cf ("ARM: 8756/1: NOMMU: Postpone MPU activation till __after_proc_init") Signed-off-by: Tigran Tadevosyan Signed-off-by: Vladimir Murzin Signed-off-by: Russell King --- arch/arm/kernel/head-nommu.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index c08d2d890f7b..b38bbd011b35 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -133,9 +133,9 @@ __secondary_data: */ .text __after_proc_init: -#ifdef CONFIG_ARM_MPU M_CLASS(movw r12, #:lower16:BASEADDR_V7M_SCB) M_CLASS(movt r12, #:upper16:BASEADDR_V7M_SCB) +#ifdef CONFIG_ARM_MPU M_CLASS(ldr r3, [r12, 0x50]) AR_CLASS(mrc p15, 0, r3, c0, c1, 4) @ Read ID_MMFR0 and r3, r3, #(MMFR0_PMSA) @ PMSA field -- cgit v1.2.3 From e17b1af96b2afc38e684aa2f1033387e2ed10029 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 12 Apr 2019 22:34:18 +0100 Subject: ARM: 8857/1: efi: enable CP15 DMB instructions before cleaning the cache The EFI stub is entered with the caches and MMU enabled by the firmware, and once the stub is ready to hand over to the decompressor, we clean and disable the caches. The cache clean routines use CP15 barrier instructions, which can be disabled via SCTLR. Normally, when using the provided cache handling routines to enable the caches and MMU, this bit is enabled as well. However, but since we entered the stub with the caches already enabled, this routine is not executed before we call the cache clean routines, resulting in undefined instruction exceptions if the firmware never enabled this bit. So set the bit explicitly in the EFI entry code, but do so in a way that guarantees that the resulting code can still run on v6 cores as well (which are guaranteed to have CP15 barriers enabled) Cc: # v4.9+ Acked-by: Marc Zyngier Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King --- arch/arm/boot/compressed/head.S | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 6c7ccb428c07..7135820f76d4 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -1438,7 +1438,21 @@ ENTRY(efi_stub_entry) @ Preserve return value of efi_entry() in r4 mov r4, r0 - bl cache_clean_flush + + @ our cache maintenance code relies on CP15 barrier instructions + @ but since we arrived here with the MMU and caches configured + @ by UEFI, we must check that the CP15BEN bit is set in SCTLR. + @ Note that this bit is RAO/WI on v6 and earlier, so the ISB in + @ the enable path will be executed on v7+ only. + mrc p15, 0, r1, c1, c0, 0 @ read SCTLR + tst r1, #(1 << 5) @ CP15BEN bit set? + bne 0f + orr r1, r1, #(1 << 5) @ CP15 barrier instructions + mcr p15, 0, r1, c1, c0, 0 @ write SCTLR + ARM( .inst 0xf57ff06f @ v7+ isb ) + THUMB( isb ) + +0: bl cache_clean_flush bl cache_off @ Set parameters for booting zImage according to boot protocol -- cgit v1.2.3 From 2f23a2a768bee7ad2ff1e9527c3f7e279e794a46 Mon Sep 17 00:00:00 2001 From: Daniel Gomez Date: Mon, 22 Apr 2019 21:08:03 +0200 Subject: spi: Micrel eth switch: declare missing of table Add missing table for SPI driver relying on SPI device match since compatible is in a DT binding or in a DTS. Before this patch: modinfo drivers/net/phy/spi_ks8995.ko | grep alias alias: spi:ksz8795 alias: spi:ksz8864 alias: spi:ks8995 After this patch: modinfo drivers/net/phy/spi_ks8995.ko | grep alias alias: spi:ksz8795 alias: spi:ksz8864 alias: spi:ks8995 alias: of:N*T*Cmicrel,ksz8795C* alias: of:N*T*Cmicrel,ksz8795 alias: of:N*T*Cmicrel,ksz8864C* alias: of:N*T*Cmicrel,ksz8864 alias: of:N*T*Cmicrel,ks8995C* alias: of:N*T*Cmicrel,ks8995 Reported-by: Javier Martinez Canillas Signed-off-by: Daniel Gomez Signed-off-by: David S. Miller --- drivers/net/phy/spi_ks8995.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c index 92b64e254b44..7475cef17cf7 100644 --- a/drivers/net/phy/spi_ks8995.c +++ b/drivers/net/phy/spi_ks8995.c @@ -159,6 +159,14 @@ static const struct spi_device_id ks8995_id[] = { }; MODULE_DEVICE_TABLE(spi, ks8995_id); +static const struct of_device_id ks8895_spi_of_match[] = { + { .compatible = "micrel,ks8995" }, + { .compatible = "micrel,ksz8864" }, + { .compatible = "micrel,ksz8795" }, + { }, + }; +MODULE_DEVICE_TABLE(of, ks8895_spi_of_match); + static inline u8 get_chip_id(u8 val) { return (val >> ID1_CHIPID_S) & ID1_CHIPID_M; @@ -526,6 +534,7 @@ static int ks8995_remove(struct spi_device *spi) static struct spi_driver ks8995_driver = { .driver = { .name = "spi-ks8995", + .of_match_table = of_match_ptr(ks8895_spi_of_match), }, .probe = ks8995_probe, .remove = ks8995_remove, -- cgit v1.2.3 From d04830531d0c4a99c897a44038e5da3d23331d2f Mon Sep 17 00:00:00 2001 From: Daniel Gomez Date: Mon, 22 Apr 2019 21:08:04 +0200 Subject: spi: ST ST95HF NFC: declare missing of table Add missing table for SPI driver relying on SPI device match since compatible is in a DT binding or in a DTS. Before this patch: modinfo drivers/nfc/st95hf/st95hf.ko | grep alias alias: spi:st95hf After this patch: modinfo drivers/nfc/st95hf/st95hf.ko | grep alias alias: spi:st95hf alias: of:N*T*Cst,st95hfC* alias: of:N*T*Cst,st95hf Reported-by: Javier Martinez Canillas Signed-off-by: Daniel Gomez Signed-off-by: David S. Miller --- drivers/nfc/st95hf/core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/nfc/st95hf/core.c b/drivers/nfc/st95hf/core.c index 2b26f762fbc3..01acb6e53365 100644 --- a/drivers/nfc/st95hf/core.c +++ b/drivers/nfc/st95hf/core.c @@ -1074,6 +1074,12 @@ static const struct spi_device_id st95hf_id[] = { }; MODULE_DEVICE_TABLE(spi, st95hf_id); +static const struct of_device_id st95hf_spi_of_match[] = { + { .compatible = "st,st95hf" }, + { }, +}; +MODULE_DEVICE_TABLE(of, st95hf_spi_of_match); + static int st95hf_probe(struct spi_device *nfc_spi_dev) { int ret; @@ -1260,6 +1266,7 @@ static struct spi_driver st95hf_driver = { .driver = { .name = "st95hf", .owner = THIS_MODULE, + .of_match_table = of_match_ptr(st95hf_spi_of_match), }, .id_table = st95hf_id, .probe = st95hf_probe, -- cgit v1.2.3 From 1bcb344086f3ecf8d6705f6d708441baa823beb3 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 15 Apr 2019 12:00:42 -0400 Subject: ceph: only use d_name directly when parent is locked Ben reported tripping the BUG_ON in create_request_message during some performance testing. Analysis of the vmcore showed that the length of the r_dentry->d_name string changed after we allocated the buffer, but before we encoded it. build_dentry_path returns pointers to d_name in the common case of non-snapped dentries, but this optimization isn't safe unless the parent directory is locked. When it isn't, have the code make a copy of the d_name while holding the d_lock. Cc: stable@vger.kernel.org Reported-by: Ben England Signed-off-by: Jeff Layton Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 61 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 50 insertions(+), 11 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 21c33ed048ed..bc5d70d6bfe6 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2161,10 +2161,39 @@ retry: return path; } +/* Duplicate the dentry->d_name.name safely */ +static int clone_dentry_name(struct dentry *dentry, const char **ppath, + int *ppathlen) +{ + u32 len; + char *name; + +retry: + len = READ_ONCE(dentry->d_name.len); + name = kmalloc(len + 1, GFP_NOFS); + if (!name) + return -ENOMEM; + + spin_lock(&dentry->d_lock); + if (dentry->d_name.len != len) { + spin_unlock(&dentry->d_lock); + kfree(name); + goto retry; + } + memcpy(name, dentry->d_name.name, len); + spin_unlock(&dentry->d_lock); + + name[len] = '\0'; + *ppath = name; + *ppathlen = len; + return 0; +} + static int build_dentry_path(struct dentry *dentry, struct inode *dir, const char **ppath, int *ppathlen, u64 *pino, - int *pfreepath) + bool *pfreepath, bool parent_locked) { + int ret; char *path; rcu_read_lock(); @@ -2173,8 +2202,15 @@ static int build_dentry_path(struct dentry *dentry, struct inode *dir, if (dir && ceph_snap(dir) == CEPH_NOSNAP) { *pino = ceph_ino(dir); rcu_read_unlock(); - *ppath = dentry->d_name.name; - *ppathlen = dentry->d_name.len; + if (parent_locked) { + *ppath = dentry->d_name.name; + *ppathlen = dentry->d_name.len; + } else { + ret = clone_dentry_name(dentry, ppath, ppathlen); + if (ret) + return ret; + *pfreepath = true; + } return 0; } rcu_read_unlock(); @@ -2182,13 +2218,13 @@ static int build_dentry_path(struct dentry *dentry, struct inode *dir, if (IS_ERR(path)) return PTR_ERR(path); *ppath = path; - *pfreepath = 1; + *pfreepath = true; return 0; } static int build_inode_path(struct inode *inode, const char **ppath, int *ppathlen, u64 *pino, - int *pfreepath) + bool *pfreepath) { struct dentry *dentry; char *path; @@ -2204,7 +2240,7 @@ static int build_inode_path(struct inode *inode, if (IS_ERR(path)) return PTR_ERR(path); *ppath = path; - *pfreepath = 1; + *pfreepath = true; return 0; } @@ -2215,7 +2251,7 @@ static int build_inode_path(struct inode *inode, static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry, struct inode *rdiri, const char *rpath, u64 rino, const char **ppath, int *pathlen, - u64 *ino, int *freepath) + u64 *ino, bool *freepath, bool parent_locked) { int r = 0; @@ -2225,7 +2261,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry, ceph_snap(rinode)); } else if (rdentry) { r = build_dentry_path(rdentry, rdiri, ppath, pathlen, ino, - freepath); + freepath, parent_locked); dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen, *ppath); } else if (rpath || rino) { @@ -2251,7 +2287,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, const char *path2 = NULL; u64 ino1 = 0, ino2 = 0; int pathlen1 = 0, pathlen2 = 0; - int freepath1 = 0, freepath2 = 0; + bool freepath1 = false, freepath2 = false; int len; u16 releases; void *p, *end; @@ -2259,16 +2295,19 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, ret = set_request_path_attr(req->r_inode, req->r_dentry, req->r_parent, req->r_path1, req->r_ino1.ino, - &path1, &pathlen1, &ino1, &freepath1); + &path1, &pathlen1, &ino1, &freepath1, + test_bit(CEPH_MDS_R_PARENT_LOCKED, + &req->r_req_flags)); if (ret < 0) { msg = ERR_PTR(ret); goto out; } + /* If r_old_dentry is set, then assume that its parent is locked */ ret = set_request_path_attr(NULL, req->r_old_dentry, req->r_old_dentry_dir, req->r_path2, req->r_ino2.ino, - &path2, &pathlen2, &ino2, &freepath2); + &path2, &pathlen2, &ino2, &freepath2, true); if (ret < 0) { msg = ERR_PTR(ret); goto out_free1; -- cgit v1.2.3 From 76a495d666e5043ffc315695f8241f5e94a98849 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 17 Apr 2019 12:58:28 -0400 Subject: ceph: ensure d_name stability in ceph_dentry_hash() Take the d_lock here to ensure that d_name doesn't change. Cc: stable@vger.kernel.org Signed-off-by: Jeff Layton Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/dir.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index a8f429882249..0637149fb9f9 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1766,6 +1766,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn) { struct ceph_inode_info *dci = ceph_inode(dir); + unsigned hash; switch (dci->i_dir_layout.dl_dir_hash) { case 0: /* for backward compat */ @@ -1773,8 +1774,11 @@ unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn) return dn->d_name.hash; default: - return ceph_str_hash(dci->i_dir_layout.dl_dir_hash, + spin_lock(&dn->d_lock); + hash = ceph_str_hash(dci->i_dir_layout.dl_dir_hash, dn->d_name.name, dn->d_name.len); + spin_unlock(&dn->d_lock); + return hash; } } -- cgit v1.2.3 From 4b8222870032715f9d995f3eb7c7acd8379a275d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 15 Apr 2019 12:00:42 -0400 Subject: ceph: handle the case where a dentry has been renamed on outstanding req It's possible for us to issue a lookup to revalidate a dentry concurrently with a rename. If done in the right order, then we could end up processing dentry info in the reply that no longer reflects the state of the dentry. If req->r_dentry->d_name differs from the one in the trace, then just ignore the trace in the reply. We only need to do this however if the parent's i_rwsem is not held. Signed-off-by: Jeff Layton Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/inode.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 2d61ddda9bf5..c2feb310ac1e 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1163,6 +1163,19 @@ static int splice_dentry(struct dentry **pdn, struct inode *in) return 0; } +static int d_name_cmp(struct dentry *dentry, const char *name, size_t len) +{ + int ret; + + /* take d_lock to ensure dentry->d_name stability */ + spin_lock(&dentry->d_lock); + ret = dentry->d_name.len - len; + if (!ret) + ret = memcmp(dentry->d_name.name, name, len); + spin_unlock(&dentry->d_lock); + return ret; +} + /* * Incorporate results into the local cache. This is either just * one inode, or a directory, dentry, and possibly linked-to inode (e.g., @@ -1412,7 +1425,8 @@ retry_lookup: err = splice_dentry(&req->r_dentry, in); if (err < 0) goto done; - } else if (rinfo->head->is_dentry) { + } else if (rinfo->head->is_dentry && + !d_name_cmp(req->r_dentry, rinfo->dname, rinfo->dname_len)) { struct ceph_vino *ptvino = NULL; if ((le32_to_cpu(rinfo->diri.in->cap.caps) & CEPH_CAP_FILE_SHARED) || -- cgit v1.2.3 From 37659182bff1eeaaeadcfc8f853c6d2b6dbc3f47 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 18 Apr 2019 11:24:57 +0800 Subject: ceph: fix ci->i_head_snapc leak We missed two places that i_wrbuffer_ref_head, i_wr_ref, i_dirty_caps and i_flushing_caps may change. When they are all zeros, we should free i_head_snapc. Cc: stable@vger.kernel.org Link: https://tracker.ceph.com/issues/38224 Reported-and-tested-by: Luis Henriques Signed-off-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 9 +++++++++ fs/ceph/snap.c | 7 ++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index bc5d70d6bfe6..9049c2a3e972 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1414,6 +1414,15 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove); ci->i_prealloc_cap_flush = NULL; } + + if (drop && + ci->i_wrbuffer_ref_head == 0 && + ci->i_wr_ref == 0 && + ci->i_dirty_caps == 0 && + ci->i_flushing_caps == 0) { + ceph_put_snap_context(ci->i_head_snapc); + ci->i_head_snapc = NULL; + } } spin_unlock(&ci->i_ceph_lock); while (!list_empty(&to_remove)) { diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 89aa37fa0f84..b26e12cd8ec3 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -572,7 +572,12 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) old_snapc = NULL; update_snapc: - if (ci->i_head_snapc) { + if (ci->i_wrbuffer_ref_head == 0 && + ci->i_wr_ref == 0 && + ci->i_dirty_caps == 0 && + ci->i_flushing_caps == 0) { + ci->i_head_snapc = NULL; + } else { ci->i_head_snapc = ceph_get_snap_context(new_snapc); dout(" new snapc is %p\n", new_snapc); } -- cgit v1.2.3 From 9fa246256e09dc30820524401cdbeeaadee94025 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 24 Apr 2019 10:47:56 +1000 Subject: Revert "drm/i915/fbdev: Actually configure untiled displays" This reverts commit d179b88deb3bf6fed4991a31fd6f0f2cad21fab5. This commit is documented to break userspace X.org modesetting driver in certain configurations. The X.org modesetting userspace driver is broken. No fixes are available yet. In order for this patch to be applied it either needs a config option or a workaround developed. This has been reported a few times, saying it's a userspace problem is clearly against the regression rules. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109806 Signed-off-by: Dave Airlie Cc: # v3.19+ --- drivers/gpu/drm/i915/intel_fbdev.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index e8f694b57b8a..376ffe842e26 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -338,8 +338,8 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, bool *enabled, int width, int height) { struct drm_i915_private *dev_priv = to_i915(fb_helper->dev); + unsigned long conn_configured, conn_seq, mask; unsigned int count = min(fb_helper->connector_count, BITS_PER_LONG); - unsigned long conn_configured, conn_seq; int i, j; bool *save_enabled; bool fallback = true, ret = true; @@ -357,9 +357,10 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, drm_modeset_backoff(&ctx); memcpy(save_enabled, enabled, count); - conn_seq = GENMASK(count - 1, 0); + mask = GENMASK(count - 1, 0); conn_configured = 0; retry: + conn_seq = conn_configured; for (i = 0; i < count; i++) { struct drm_fb_helper_connector *fb_conn; struct drm_connector *connector; @@ -372,8 +373,7 @@ retry: if (conn_configured & BIT(i)) continue; - /* First pass, only consider tiled connectors */ - if (conn_seq == GENMASK(count - 1, 0) && !connector->has_tile) + if (conn_seq == 0 && !connector->has_tile) continue; if (connector->status == connector_status_connected) @@ -477,10 +477,8 @@ retry: conn_configured |= BIT(i); } - if (conn_configured != conn_seq) { /* repeat until no more are found */ - conn_seq = conn_configured; + if ((conn_configured & mask) != mask && conn_configured != conn_seq) goto retry; - } /* * If the BIOS didn't enable everything it could, fall back to have the -- cgit v1.2.3 From a0cecc23cfcbf2626497a8c8770856dd56b67917 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 24 Apr 2019 10:52:20 +1000 Subject: Revert "drm/virtio: drop prime import/export callbacks" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch does more harm than good, as it breaks both Xwayland and gnome-shell with X11. Xwayland requires DRI3 & DRI3 requires PRIME. X11 crash for obscure double-free reason which are hard to debug (starting X11 by hand doesn't trigger the crash). I don't see an apparent problem implementing those stub prime functions, they may return an error at run-time, and it seems to be handled fine by GNOME at least. This reverts commit b318e3ff7ca065d6b107e424c85a63d7a6798a69. [airlied: This broke userspace for virtio-gpus, and regressed things from DRI3 to DRI2. This brings back the original problem, but it's better than regressions.] Fixes: b318e3ff7ca065d6b107e424c85a63d7a6798a ("drm/virtio: drop prime import/export callbacks") Signed-off-by: Marc-André Lureau Signed-off-by: Dave Airlie --- drivers/gpu/drm/virtio/virtgpu_drv.c | 4 ++++ drivers/gpu/drm/virtio/virtgpu_drv.h | 4 ++++ drivers/gpu/drm/virtio/virtgpu_prime.c | 12 ++++++++++++ 3 files changed, 20 insertions(+) diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index b996ac1d4fcc..af92964b6889 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -205,10 +205,14 @@ static struct drm_driver driver = { #if defined(CONFIG_DEBUG_FS) .debugfs_init = virtio_gpu_debugfs_init, #endif + .prime_handle_to_fd = drm_gem_prime_handle_to_fd, + .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_export = drm_gem_prime_export, .gem_prime_import = drm_gem_prime_import, .gem_prime_pin = virtgpu_gem_prime_pin, .gem_prime_unpin = virtgpu_gem_prime_unpin, + .gem_prime_get_sg_table = virtgpu_gem_prime_get_sg_table, + .gem_prime_import_sg_table = virtgpu_gem_prime_import_sg_table, .gem_prime_vmap = virtgpu_gem_prime_vmap, .gem_prime_vunmap = virtgpu_gem_prime_vunmap, .gem_prime_mmap = virtgpu_gem_prime_mmap, diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index 3238fdf58eb4..d577cb76f5ad 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -354,6 +354,10 @@ int virtio_gpu_object_wait(struct virtio_gpu_object *bo, bool no_wait); /* virtgpu_prime.c */ int virtgpu_gem_prime_pin(struct drm_gem_object *obj); void virtgpu_gem_prime_unpin(struct drm_gem_object *obj); +struct sg_table *virtgpu_gem_prime_get_sg_table(struct drm_gem_object *obj); +struct drm_gem_object *virtgpu_gem_prime_import_sg_table( + struct drm_device *dev, struct dma_buf_attachment *attach, + struct sg_table *sgt); void *virtgpu_gem_prime_vmap(struct drm_gem_object *obj); void virtgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); int virtgpu_gem_prime_mmap(struct drm_gem_object *obj, diff --git a/drivers/gpu/drm/virtio/virtgpu_prime.c b/drivers/gpu/drm/virtio/virtgpu_prime.c index c59ec34c80a5..eb51a78e1199 100644 --- a/drivers/gpu/drm/virtio/virtgpu_prime.c +++ b/drivers/gpu/drm/virtio/virtgpu_prime.c @@ -39,6 +39,18 @@ void virtgpu_gem_prime_unpin(struct drm_gem_object *obj) WARN_ONCE(1, "not implemented"); } +struct sg_table *virtgpu_gem_prime_get_sg_table(struct drm_gem_object *obj) +{ + return ERR_PTR(-ENODEV); +} + +struct drm_gem_object *virtgpu_gem_prime_import_sg_table( + struct drm_device *dev, struct dma_buf_attachment *attach, + struct sg_table *table) +{ + return ERR_PTR(-ENODEV); +} + void *virtgpu_gem_prime_vmap(struct drm_gem_object *obj) { struct virtio_gpu_object *bo = gem_to_virtio_gpu_obj(obj); -- cgit v1.2.3 From 66c031716bcd9647cd2304e4875163663b086405 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 23 Apr 2019 15:30:07 +0100 Subject: net: atheros: fix spelling mistake "underun" -> "underrun" There are spelling mistakes in structure elements, fix these. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/atlx/atl1.c | 4 ++-- drivers/net/ethernet/atheros/atlx/atl1.h | 2 +- drivers/net/ethernet/atheros/atlx/atl2.c | 2 +- drivers/net/ethernet/atheros/atlx/atl2.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c index 9e07b469066a..156fbc5601ca 100644 --- a/drivers/net/ethernet/atheros/atlx/atl1.c +++ b/drivers/net/ethernet/atheros/atlx/atl1.c @@ -1721,7 +1721,7 @@ static void atl1_inc_smb(struct atl1_adapter *adapter) adapter->soft_stats.scc += smb->tx_1_col; adapter->soft_stats.mcc += smb->tx_2_col; adapter->soft_stats.latecol += smb->tx_late_col; - adapter->soft_stats.tx_underun += smb->tx_underrun; + adapter->soft_stats.tx_underrun += smb->tx_underrun; adapter->soft_stats.tx_trunc += smb->tx_trunc; adapter->soft_stats.tx_pause += smb->tx_pause; @@ -3179,7 +3179,7 @@ static struct atl1_stats atl1_gstrings_stats[] = { {"tx_deferred_ok", ATL1_STAT(soft_stats.deffer)}, {"tx_single_coll_ok", ATL1_STAT(soft_stats.scc)}, {"tx_multi_coll_ok", ATL1_STAT(soft_stats.mcc)}, - {"tx_underun", ATL1_STAT(soft_stats.tx_underun)}, + {"tx_underrun", ATL1_STAT(soft_stats.tx_underrun)}, {"tx_trunc", ATL1_STAT(soft_stats.tx_trunc)}, {"tx_pause", ATL1_STAT(soft_stats.tx_pause)}, {"rx_pause", ATL1_STAT(soft_stats.rx_pause)}, diff --git a/drivers/net/ethernet/atheros/atlx/atl1.h b/drivers/net/ethernet/atheros/atlx/atl1.h index 34a58cd846a0..eacff19ea05b 100644 --- a/drivers/net/ethernet/atheros/atlx/atl1.h +++ b/drivers/net/ethernet/atheros/atlx/atl1.h @@ -681,7 +681,7 @@ struct atl1_sft_stats { u64 scc; /* packets TX after a single collision */ u64 mcc; /* packets TX after multiple collisions */ u64 latecol; /* TX packets w/ late collisions */ - u64 tx_underun; /* TX packets aborted due to TX FIFO underrun + u64 tx_underrun; /* TX packets aborted due to TX FIFO underrun * or TRD FIFO underrun */ u64 tx_trunc; /* TX packets truncated due to size > MTU */ u64 rx_pause; /* num Pause packets received. */ diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c index d99317b3d891..98da0fa27192 100644 --- a/drivers/net/ethernet/atheros/atlx/atl2.c +++ b/drivers/net/ethernet/atheros/atlx/atl2.c @@ -553,7 +553,7 @@ static void atl2_intr_tx(struct atl2_adapter *adapter) netdev->stats.tx_aborted_errors++; if (txs->late_col) netdev->stats.tx_window_errors++; - if (txs->underun) + if (txs->underrun) netdev->stats.tx_fifo_errors++; } while (1); diff --git a/drivers/net/ethernet/atheros/atlx/atl2.h b/drivers/net/ethernet/atheros/atlx/atl2.h index c64a6bdfa7ae..25ec84cb4853 100644 --- a/drivers/net/ethernet/atheros/atlx/atl2.h +++ b/drivers/net/ethernet/atheros/atlx/atl2.h @@ -260,7 +260,7 @@ struct tx_pkt_status { unsigned multi_col:1; unsigned late_col:1; unsigned abort_col:1; - unsigned underun:1; /* current packet is aborted + unsigned underrun:1; /* current packet is aborted * due to txram underrun */ unsigned:3; /* reserved */ unsigned update:1; /* always 1'b1 in tx_status_buf */ -- cgit v1.2.3 From ffbf9870dcf1342592a1a26f4cf70bda39046134 Mon Sep 17 00:00:00 2001 From: Ilias Apalodimas Date: Tue, 23 Apr 2019 09:01:41 +0300 Subject: net: socionext: replace napi_alloc_frag with the netdev variant on init The netdev variant is usable on any context since it disables interrupts. The napi variant of the call should only be used within softirq context. Replace napi_alloc_frag on driver init with the correct netdev_alloc_frag call Changes since v1: - Adjusted commit message Acked-by: Ard Biesheuvel Acked-by: Jassi Brar Fixes: 4acb20b46214 ("net: socionext: different approach on DMA") Signed-off-by: Ilias Apalodimas Signed-off-by: David S. Miller --- drivers/net/ethernet/socionext/netsec.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index a18149720aa2..cba5881b2746 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -673,7 +673,8 @@ static void netsec_process_tx(struct netsec_priv *priv) } static void *netsec_alloc_rx_data(struct netsec_priv *priv, - dma_addr_t *dma_handle, u16 *desc_len) + dma_addr_t *dma_handle, u16 *desc_len, + bool napi) { size_t total_len = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); size_t payload_len = NETSEC_RX_BUF_SZ; @@ -682,7 +683,7 @@ static void *netsec_alloc_rx_data(struct netsec_priv *priv, total_len += SKB_DATA_ALIGN(payload_len + NETSEC_SKB_PAD); - buf = napi_alloc_frag(total_len); + buf = napi ? napi_alloc_frag(total_len) : netdev_alloc_frag(total_len); if (!buf) return NULL; @@ -765,7 +766,8 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget) /* allocate a fresh buffer and map it to the hardware. * This will eventually replace the old buffer in the hardware */ - buf_addr = netsec_alloc_rx_data(priv, &dma_handle, &desc_len); + buf_addr = netsec_alloc_rx_data(priv, &dma_handle, &desc_len, + true); if (unlikely(!buf_addr)) break; @@ -1069,7 +1071,8 @@ static int netsec_setup_rx_dring(struct netsec_priv *priv) void *buf; u16 len; - buf = netsec_alloc_rx_data(priv, &dma_handle, &len); + buf = netsec_alloc_rx_data(priv, &dma_handle, &len, + false); if (!buf) { netsec_uninit_pkt_dring(priv, NETSEC_RING_RX); goto err_out; -- cgit v1.2.3 From 1c5c12ee308aacf635c8819cd4baa3bd58f8a8b7 Mon Sep 17 00:00:00 2001 From: Tao Ren Date: Wed, 24 Apr 2019 01:43:32 +0000 Subject: net/ncsi: handle overflow when incrementing mac address Previously BMC's MAC address is calculated by simply adding 1 to the last byte of network controller's MAC address, and it produces incorrect result when network controller's MAC address ends with 0xFF. The problem can be fixed by calling eth_addr_inc() function to increment MAC address; besides, the MAC address is also validated before assigning to BMC. Fixes: cb10c7c0dfd9 ("net/ncsi: Add NCSI Broadcom OEM command") Signed-off-by: Tao Ren Acked-by: Jakub Kicinski Acked-by: Samuel Mendoza-Jonas Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 12 ++++++++++++ net/ncsi/ncsi-rsp.c | 6 +++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index e2f3b21cd72a..aa8bfd6f738c 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -448,6 +448,18 @@ static inline void eth_addr_dec(u8 *addr) u64_to_ether_addr(u, addr); } +/** + * eth_addr_inc() - Increment the given MAC address. + * @addr: Pointer to a six-byte array containing Ethernet address to increment. + */ +static inline void eth_addr_inc(u8 *addr) +{ + u64 u = ether_addr_to_u64(addr); + + u++; + u64_to_ether_addr(u, addr); +} + /** * is_etherdev_addr - Tell if given Ethernet address belongs to the device. * @dev: Pointer to a device structure diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index dc07fcc7938e..802db01e3075 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -667,7 +668,10 @@ static int ncsi_rsp_handler_oem_bcm_gma(struct ncsi_request *nr) ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE; memcpy(saddr.sa_data, &rsp->data[BCM_MAC_ADDR_OFFSET], ETH_ALEN); /* Increase mac address by 1 for BMC's address */ - saddr.sa_data[ETH_ALEN - 1]++; + eth_addr_inc((u8 *)saddr.sa_data); + if (!is_valid_ether_addr((const u8 *)saddr.sa_data)) + return -ENXIO; + ret = ops->ndo_set_mac_address(ndev, &saddr); if (ret < 0) netdev_warn(ndev, "NCSI: 'Writing mac address to device failed\n"); -- cgit v1.2.3 From d08106796a78a4273e39e1bbdf538dc4334b2635 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 1 Mar 2019 13:56:11 +0100 Subject: drm/vc4: Fix memory leak during gpu reset. __drm_atomic_helper_crtc_destroy_state does not free memory, it only cleans it up. Fix this by calling the functions own destroy function. Fixes: 6d6e50039187 ("drm/vc4: Allocate the right amount of space for boot-time CRTC state.") Cc: Eric Anholt Cc: # v4.6+ Reviewed-by: Eric Anholt Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20190301125627.7285-2-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/vc4/vc4_crtc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index 730008d3da76..e7c04a9eb219 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -1042,7 +1042,7 @@ static void vc4_crtc_reset(struct drm_crtc *crtc) { if (crtc->state) - __drm_atomic_helper_crtc_destroy_state(crtc->state); + vc4_crtc_destroy_state(crtc->state); crtc->state = kzalloc(sizeof(struct vc4_crtc_state), GFP_KERNEL); if (crtc->state) -- cgit v1.2.3 From 0d02113b31b2017dd349ec9df2314e798a90fa6e Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Tue, 23 Apr 2019 12:58:11 -0400 Subject: x86/mm: Fix a crash with kmemleak_scan() The first kmemleak_scan() call after boot would trigger the crash below because this callpath: kernel_init free_initmem mem_encrypt_free_decrypted_mem free_init_pages unmaps memory inside the .bss when DEBUG_PAGEALLOC=y. kmemleak_init() will register the .data/.bss sections and then kmemleak_scan() will scan those addresses and dereference them looking for pointer references. If free_init_pages() frees and unmaps pages in those sections, kmemleak_scan() will crash if referencing one of those addresses: BUG: unable to handle kernel paging request at ffffffffbd402000 CPU: 12 PID: 325 Comm: kmemleak Not tainted 5.1.0-rc4+ #4 RIP: 0010:scan_block Call Trace: scan_gray_list kmemleak_scan kmemleak_scan_thread kthread ret_from_fork Since kmemleak_free_part() is tolerant to unknown objects (not tracked by kmemleak), it is fine to call it from free_init_pages() even if not all address ranges passed to this function are known to kmemleak. [ bp: Massage. ] Fixes: b3f0907c71e0 ("x86/mm: Add .bss..decrypted section to hold shared variables") Signed-off-by: Qian Cai Signed-off-by: Borislav Petkov Reviewed-by: Catalin Marinas Cc: Andy Lutomirski Cc: Brijesh Singh Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/20190423165811.36699-1-cai@lca.pw --- arch/x86/mm/init.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index f905a2371080..8dacdb96899e 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -766,6 +767,11 @@ void free_init_pages(const char *what, unsigned long begin, unsigned long end) if (debug_pagealloc_enabled()) { pr_info("debug: unmapping init [mem %#010lx-%#010lx]\n", begin, end - 1); + /* + * Inform kmemleak about the hole in the memory since the + * corresponding pages will be unmapped. + */ + kmemleak_free_part((void *)begin, end - begin); set_memory_np(begin, (end - begin) >> PAGE_SHIFT); } else { /* -- cgit v1.2.3 From e02bc29b2cfa7806830d6da8b2322cddd67e8dfe Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Wed, 24 Apr 2019 11:04:13 +0200 Subject: drm/sun4i: Unbind components before releasing DRM and memory Our components may still be using the DRM device driver (if only to access our driver's private data), so make sure to unbind them before the final drm_dev_put. Also release our reserved memory after component unbind instead of before to match reverse creation order. Fixes: f5a9ed867c83 ("drm/sun4i: Fix component unbinding and component master deletion") Signed-off-by: Paul Kocialkowski Reviewed-by: Chen-Yu Tsai Link: https://patchwork.freedesktop.org/patch/msgid/20190424090413.6918-1-paul.kocialkowski@bootlin.com --- drivers/gpu/drm/sun4i/sun4i_drv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index 843b86661833..29258b404e54 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -149,10 +149,11 @@ static void sun4i_drv_unbind(struct device *dev) drm_kms_helper_poll_fini(drm); drm_atomic_helper_shutdown(drm); drm_mode_config_cleanup(drm); - of_reserved_mem_device_release(dev); - drm_dev_put(drm); component_unbind_all(dev, NULL); + of_reserved_mem_device_release(dev); + + drm_dev_put(drm); } static const struct component_master_ops sun4i_drv_master_ops = { -- cgit v1.2.3 From 462ce5d963f18b71c63f6b7730a35a2ee5273540 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 24 Apr 2019 17:06:29 +0200 Subject: drm/vc4: Fix compilation error reported by kbuild test bot A pointer to crtc was missing, resulting in the following build error: drivers/gpu/drm/vc4/vc4_crtc.c:1045:44: sparse: sparse: incorrect type in argument 1 (different base types) drivers/gpu/drm/vc4/vc4_crtc.c:1045:44: sparse: expected struct drm_crtc *crtc drivers/gpu/drm/vc4/vc4_crtc.c:1045:44: sparse: got struct drm_crtc_state *state drivers/gpu/drm/vc4/vc4_crtc.c:1045:39: sparse: sparse: not enough arguments for function vc4_crtc_destroy_state Signed-off-by: Maarten Lankhorst Reported-by: kbuild test robot Cc: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/2b6ed5e6-81b0-4276-8860-870b54ca3262@linux.intel.com Fixes: d08106796a78 ("drm/vc4: Fix memory leak during gpu reset.") Cc: # v4.6+ Acked-by: Daniel Vetter --- drivers/gpu/drm/vc4/vc4_crtc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index e7c04a9eb219..1baa10e94484 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -1042,7 +1042,7 @@ static void vc4_crtc_reset(struct drm_crtc *crtc) { if (crtc->state) - vc4_crtc_destroy_state(crtc->state); + vc4_crtc_destroy_state(crtc, crtc->state); crtc->state = kzalloc(sizeof(struct vc4_crtc_state), GFP_KERNEL); if (crtc->state) -- cgit v1.2.3 From c660133c339f9ab684fdf568c0d51b9ae5e86002 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 16 Apr 2019 14:07:25 +0300 Subject: RDMA/mlx5: Do not allow the user to write to the clock page The intent of this VMA was to be read-only from user space, but the VM_MAYWRITE masking was missed, so mprotect could make it writable. Cc: stable@vger.kernel.org Fixes: 5c99eaecb1fc ("IB/mlx5: Mmap the HCA's clock info to user-space") Signed-off-by: Jason Gunthorpe Reviewed-by: Haggai Eran Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 241d9ead79eb..12a75dfc992b 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2068,6 +2068,7 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev, if (vma->vm_flags & VM_WRITE) return -EPERM; + vma->vm_flags &= ~VM_MAYWRITE; if (!dev->mdev->clock_info_page) return -EOPNOTSUPP; @@ -2233,6 +2234,7 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm if (vma->vm_flags & VM_WRITE) return -EPERM; + vma->vm_flags &= ~VM_MAYWRITE; /* Don't expose to user-space information it shouldn't have */ if (PAGE_SIZE > 4096) -- cgit v1.2.3 From d5e560d3f72382ac4e3bfe4e0f0420e6a220b039 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 16 Apr 2019 14:07:26 +0300 Subject: RDMA/mlx5: Use rdma_user_map_io for mapping BAR pages Since mlx5 supports device disassociate it must use this API for all BAR page mmaps, otherwise the pages can remain mapped after the device is unplugged causing a system crash. Cc: stable@vger.kernel.org Fixes: 5f9794dc94f5 ("RDMA/ucontext: Add a core API for mmaping driver IO memory") Signed-off-by: Jason Gunthorpe Reviewed-by: Haggai Eran Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/main.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 12a75dfc992b..d3dd290ae1b1 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2240,14 +2240,12 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm if (PAGE_SIZE > 4096) return -EOPNOTSUPP; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); pfn = (dev->mdev->iseg_base + offsetof(struct mlx5_init_seg, internal_timer_h)) >> PAGE_SHIFT; - if (io_remap_pfn_range(vma, vma->vm_start, pfn, - PAGE_SIZE, vma->vm_page_prot)) - return -EAGAIN; - break; + return rdma_user_mmap_io(&context->ibucontext, vma, pfn, + PAGE_SIZE, + pgprot_noncached(vma->vm_page_prot)); case MLX5_IB_MMAP_CLOCK_INFO: return mlx5_ib_mmap_clock_info_page(dev, vma, context); -- cgit v1.2.3 From f06eba72274788db6a43012a05a99915c0283aef Mon Sep 17 00:00:00 2001 From: Jacky Bai Date: Fri, 5 Apr 2019 10:31:09 -0700 Subject: Input: snvs_pwrkey - make it depend on ARCH_MXC The SNVS power key is not only used on i.MX6SX and i.MX7D, it is also used by i.MX6UL and NXP's latest ARMv8 based i.MX8M series SOC. So update the config dependency to use ARCH_MXC, and add the COMPILE_TEST too. Signed-off-by: Jacky Bai Reviewed-by: Dong Aisheng Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig index a878351f1643..52d7f55fca32 100644 --- a/drivers/input/keyboard/Kconfig +++ b/drivers/input/keyboard/Kconfig @@ -420,7 +420,7 @@ config KEYBOARD_MPR121 config KEYBOARD_SNVS_PWRKEY tristate "IMX SNVS Power Key Driver" - depends on SOC_IMX6SX || SOC_IMX7D + depends on ARCH_MXC || COMPILE_TEST depends on OF help This is the snvs powerkey driver for the Freescale i.MX application -- cgit v1.2.3 From 67f269b37f9b4d52c5e7f97acea26c0852e9b8a1 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 16 Apr 2019 14:07:28 +0300 Subject: RDMA/ucontext: Fix regression with disassociate When this code was consolidated the intention was that the VMA would become backed by anonymous zero pages after the zap_vma_pte - however this very subtly relied on setting the vm_ops = NULL and clearing the VM_SHARED bits to transform the VMA into an anonymous VMA. Since the vm_ops was removed this broke. Now userspace gets a SIGBUS if it touches the vma after disassociation. Instead of converting the VMA to anonymous provide a fault handler that puts a zero'd page into the VMA when user-space touches it after disassociation. Cc: stable@vger.kernel.org Suggested-by: Andrea Arcangeli Fixes: 5f9794dc94f5 ("RDMA/ucontext: Add a core API for mmaping driver IO memory") Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs.h | 1 + drivers/infiniband/core/uverbs_main.c | 52 +++++++++++++++++++++++++++++++++-- 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index ea0bc6885517..32cc8fe7902f 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -160,6 +160,7 @@ struct ib_uverbs_file { struct mutex umap_lock; struct list_head umaps; + struct page *disassociate_page; struct idr idr; /* spinlock protects write access to idr */ diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 70b7d80431a9..db20b6e0f253 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -208,6 +208,9 @@ void ib_uverbs_release_file(struct kref *ref) kref_put(&file->async_file->ref, ib_uverbs_release_async_event_file); put_device(&file->device->dev); + + if (file->disassociate_page) + __free_pages(file->disassociate_page, 0); kfree(file); } @@ -877,9 +880,50 @@ static void rdma_umap_close(struct vm_area_struct *vma) kfree(priv); } +/* + * Once the zap_vma_ptes has been called touches to the VMA will come here and + * we return a dummy writable zero page for all the pfns. + */ +static vm_fault_t rdma_umap_fault(struct vm_fault *vmf) +{ + struct ib_uverbs_file *ufile = vmf->vma->vm_file->private_data; + struct rdma_umap_priv *priv = vmf->vma->vm_private_data; + vm_fault_t ret = 0; + + if (!priv) + return VM_FAULT_SIGBUS; + + /* Read only pages can just use the system zero page. */ + if (!(vmf->vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) { + vmf->page = ZERO_PAGE(vmf->vm_start); + get_page(vmf->page); + return 0; + } + + mutex_lock(&ufile->umap_lock); + if (!ufile->disassociate_page) + ufile->disassociate_page = + alloc_pages(vmf->gfp_mask | __GFP_ZERO, 0); + + if (ufile->disassociate_page) { + /* + * This VMA is forced to always be shared so this doesn't have + * to worry about COW. + */ + vmf->page = ufile->disassociate_page; + get_page(vmf->page); + } else { + ret = VM_FAULT_SIGBUS; + } + mutex_unlock(&ufile->umap_lock); + + return ret; +} + static const struct vm_operations_struct rdma_umap_ops = { .open = rdma_umap_open, .close = rdma_umap_close, + .fault = rdma_umap_fault, }; static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext, @@ -889,6 +933,9 @@ static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext, struct ib_uverbs_file *ufile = ucontext->ufile; struct rdma_umap_priv *priv; + if (!(vma->vm_flags & VM_SHARED)) + return ERR_PTR(-EINVAL); + if (vma->vm_end - vma->vm_start != size) return ERR_PTR(-EINVAL); @@ -992,7 +1039,7 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) * at a time to get the lock ordering right. Typically there * will only be one mm, so no big deal. */ - down_write(&mm->mmap_sem); + down_read(&mm->mmap_sem); mutex_lock(&ufile->umap_lock); list_for_each_entry_safe (priv, next_priv, &ufile->umaps, list) { @@ -1004,10 +1051,9 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); - vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE); } mutex_unlock(&ufile->umap_lock); - up_write(&mm->mmap_sem); + up_read(&mm->mmap_sem); mmput(mm); } } -- cgit v1.2.3 From bce1a78423961fce676ac65540a31b6ffd179e6d Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Fri, 19 Apr 2019 07:39:00 +0000 Subject: Input: synaptics-rmi4 - fix possible double free The RMI4 function structure has been released in rmi_register_function if error occurs. However, it will be released again in the function rmi_create_function, which may result in a double-free bug. Signed-off-by: Pan Bian Signed-off-by: Dmitry Torokhov --- drivers/input/rmi4/rmi_driver.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c index fc3ab93b7aea..7fb358f96195 100644 --- a/drivers/input/rmi4/rmi_driver.c +++ b/drivers/input/rmi4/rmi_driver.c @@ -860,7 +860,7 @@ static int rmi_create_function(struct rmi_device *rmi_dev, error = rmi_register_function(fn); if (error) - goto err_put_fn; + return error; if (pdt->function_number == 0x01) data->f01_container = fn; @@ -870,10 +870,6 @@ static int rmi_create_function(struct rmi_device *rmi_dev, list_add_tail(&fn->node, &data->function_list); return RMI_SCAN_CONTINUE; - -err_put_fn: - put_device(&fn->dev); - return error; } void rmi_enable_irq(struct rmi_device *rmi_dev, bool clear_wake) -- cgit v1.2.3 From 05fd5c2c61732152a6bddc318aae62d7e436629b Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Tue, 23 Apr 2019 16:39:45 +1000 Subject: cifs: fix memory leak in SMB2_read Commit 088aaf17aa79300cab14dbee2569c58cfafd7d6e introduced a leak where if SMB2_read() returned an error we would return without freeing the request buffer. Cc: Stable Signed-off-by: Ronnie Sahlberg Reviewed-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index b8f7262ac354..a37774a55f3a 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -3466,6 +3466,7 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms, io_parms->tcon->tid, ses->Suid, io_parms->offset, 0); free_rsp_buf(resp_buftype, rsp_iov.iov_base); + cifs_small_buf_release(req); return rc == -ENODATA ? 0 : rc; } else trace_smb3_read_done(xid, req->PersistentFileId, -- cgit v1.2.3 From 652727bbe1b17993636346716ae5867627793647 Mon Sep 17 00:00:00 2001 From: Frank Sorenson Date: Tue, 16 Apr 2019 08:37:27 -0500 Subject: cifs: do not attempt cifs operation on smb2+ rename error A path-based rename returning EBUSY will incorrectly try opening the file with a cifs (NT Create AndX) operation on an smb2+ mount, which causes the server to force a session close. If the mount is smb2+, skip the fallback. Signed-off-by: Frank Sorenson Signed-off-by: Steve French CC: Stable Reviewed-by: Ronnie Sahlberg --- fs/cifs/inode.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 53fdb5df0d2e..538fd7d807e4 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1735,6 +1735,10 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry, if (rc == 0 || rc != -EBUSY) goto do_rename_exit; + /* Don't fall back to using SMB on SMB 2+ mount */ + if (server->vals->protocol_id != 0) + goto do_rename_exit; + /* open-file renames don't work across directories */ if (to_dentry->d_parent != from_dentry->d_parent) goto do_rename_exit; -- cgit v1.2.3 From 13f5938d8264b5501368523c4513ff26608a33e8 Mon Sep 17 00:00:00 2001 From: Jérôme Glisse Date: Wed, 10 Apr 2019 15:37:47 -0400 Subject: cifs: fix page reference leak with readv/writev MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CIFS can leak pages reference gotten through GUP (get_user_pages*() through iov_iter_get_pages()). This happen if cifs_send_async_read() or cifs_write_from_iter() calls fail from within __cifs_readv() and __cifs_writev() respectively. This patch move page unreference to cifs_aio_ctx_release() which will happens on all code paths this is all simpler to follow for correctness. Signed-off-by: Jérôme Glisse Cc: Steve French Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Cc: Alexander Viro Cc: linux-fsdevel@vger.kernel.org Cc: Linus Torvalds Cc: Stable Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky --- fs/cifs/file.c | 15 +-------------- fs/cifs/misc.c | 23 ++++++++++++++++++++++- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 9c0ccc06d172..7037a137fa53 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2877,7 +2877,6 @@ static void collect_uncached_write_data(struct cifs_aio_ctx *ctx) struct cifs_tcon *tcon; struct cifs_sb_info *cifs_sb; struct dentry *dentry = ctx->cfile->dentry; - unsigned int i; int rc; tcon = tlink_tcon(ctx->cfile->tlink); @@ -2941,10 +2940,6 @@ restart_loop: kref_put(&wdata->refcount, cifs_uncached_writedata_release); } - if (!ctx->direct_io) - for (i = 0; i < ctx->npages; i++) - put_page(ctx->bv[i].bv_page); - cifs_stats_bytes_written(tcon, ctx->total_len); set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags); @@ -3582,7 +3577,6 @@ collect_uncached_read_data(struct cifs_aio_ctx *ctx) struct iov_iter *to = &ctx->iter; struct cifs_sb_info *cifs_sb; struct cifs_tcon *tcon; - unsigned int i; int rc; tcon = tlink_tcon(ctx->cfile->tlink); @@ -3666,15 +3660,8 @@ again: kref_put(&rdata->refcount, cifs_uncached_readdata_release); } - if (!ctx->direct_io) { - for (i = 0; i < ctx->npages; i++) { - if (ctx->should_dirty) - set_page_dirty(ctx->bv[i].bv_page); - put_page(ctx->bv[i].bv_page); - } - + if (!ctx->direct_io) ctx->total_len = ctx->len - iov_iter_count(to); - } /* mask nodata case */ if (rc == -ENODATA) diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 1e1626a2cfc3..0dc6f08020ac 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -789,6 +789,11 @@ cifs_aio_ctx_alloc(void) { struct cifs_aio_ctx *ctx; + /* + * Must use kzalloc to initialize ctx->bv to NULL and ctx->direct_io + * to false so that we know when we have to unreference pages within + * cifs_aio_ctx_release() + */ ctx = kzalloc(sizeof(struct cifs_aio_ctx), GFP_KERNEL); if (!ctx) return NULL; @@ -807,7 +812,23 @@ cifs_aio_ctx_release(struct kref *refcount) struct cifs_aio_ctx, refcount); cifsFileInfo_put(ctx->cfile); - kvfree(ctx->bv); + + /* + * ctx->bv is only set if setup_aio_ctx_iter() was call successfuly + * which means that iov_iter_get_pages() was a success and thus that + * we have taken reference on pages. + */ + if (ctx->bv) { + unsigned i; + + for (i = 0; i < ctx->npages; i++) { + if (ctx->should_dirty) + set_page_dirty(ctx->bv[i].bv_page); + put_page(ctx->bv[i].bv_page); + } + kvfree(ctx->bv); + } + kfree(ctx); } -- cgit v1.2.3 From 22e8860cf8f777fbf6a83f2fb7127f682a8e9de4 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Sun, 24 Mar 2019 18:18:56 -0500 Subject: net: ieee802154: fix missing checks for regmap_update_bits regmap_update_bits could fail and deserves a check. The patch adds the checks and if it fails, returns its error code upstream. Signed-off-by: Kangjie Lu Reviewed-by: Mukesh Ojha Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/mcr20a.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c index c589f5ae75bb..8bb53ec8d9cf 100644 --- a/drivers/net/ieee802154/mcr20a.c +++ b/drivers/net/ieee802154/mcr20a.c @@ -533,6 +533,8 @@ mcr20a_start(struct ieee802154_hw *hw) dev_dbg(printdev(lp), "no slotted operation\n"); ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1, DAR_PHY_CTRL1_SLOTTED, 0x0); + if (ret < 0) + return ret; /* enable irq */ enable_irq(lp->spi->irq); @@ -540,11 +542,15 @@ mcr20a_start(struct ieee802154_hw *hw) /* Unmask SEQ interrupt */ ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL2, DAR_PHY_CTRL2_SEQMSK, 0x0); + if (ret < 0) + return ret; /* Start the RX sequence */ dev_dbg(printdev(lp), "start the RX sequence\n"); ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1, DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_RX); + if (ret < 0) + return ret; return 0; } -- cgit v1.2.3 From 6819e3f6d83a24777813b0d031ebe0861694db5a Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Sat, 20 Apr 2019 12:09:39 +0800 Subject: net: vrf: Fix operation not supported when set vrf mac Vrf device is not able to change mac address now because lack of ndo_set_mac_address. Complete this in case some apps need to do this. Reported-by: Hui Wang Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- drivers/net/vrf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index cd15c32b2e43..9ee4d7402ca2 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -875,6 +875,7 @@ static const struct net_device_ops vrf_netdev_ops = { .ndo_init = vrf_dev_init, .ndo_uninit = vrf_dev_uninit, .ndo_start_xmit = vrf_xmit, + .ndo_set_mac_address = eth_mac_addr, .ndo_get_stats64 = vrf_get_stats64, .ndo_add_slave = vrf_add_slave, .ndo_del_slave = vrf_del_slave, @@ -1274,6 +1275,7 @@ static void vrf_setup(struct net_device *dev) /* default to no qdisc; user can add if desired */ dev->priv_flags |= IFF_NO_QUEUE; dev->priv_flags |= IFF_NO_RX_HANDLER; + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; /* VRF devices do not care about MTU, but if the MTU is set * too low then the ipv4 and ipv6 protocols are disabled -- cgit v1.2.3 From 4b9fc7146249a6e0e3175d0acc033fdcd2bfcb17 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Wed, 24 Apr 2019 02:56:42 -0400 Subject: net: rds: exchange of 8K and 1M pool Before the commit 490ea5967b0d ("RDS: IB: move FMR code to its own file"), when the dirty_count is greater than 9/10 of max_items of 8K pool, 1M pool is used, Vice versa. After the commit 490ea5967b0d ("RDS: IB: move FMR code to its own file"), the above is removed. When we make the following tests. Server: rds-stress -r 1.1.1.16 -D 1M Client: rds-stress -r 1.1.1.14 -s 1.1.1.16 -D 1M The following will appear. " connecting to 1.1.1.16:4000 negotiated options, tasks will start in 2 seconds Starting up..header from 1.1.1.166:4001 to id 4001 bogus .. tsks tx/s rx/s tx+rx K/s mbi K/s mbo K/s tx us/c rtt us cpu % 1 0 0 0.00 0.00 0.00 0.00 0.00 -1.00 1 0 0 0.00 0.00 0.00 0.00 0.00 -1.00 1 0 0 0.00 0.00 0.00 0.00 0.00 -1.00 1 0 0 0.00 0.00 0.00 0.00 0.00 -1.00 1 0 0 0.00 0.00 0.00 0.00 0.00 -1.00 ... " So this exchange between 8K and 1M pool is added back. Fixes: commit 490ea5967b0d ("RDS: IB: move FMR code to its own file") Signed-off-by: Zhu Yanjun Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/ib_fmr.c | 11 +++++++++++ net/rds/ib_rdma.c | 3 --- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/net/rds/ib_fmr.c b/net/rds/ib_fmr.c index 31cf37da4510..93c0437e6a5f 100644 --- a/net/rds/ib_fmr.c +++ b/net/rds/ib_fmr.c @@ -44,6 +44,17 @@ struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev, int npages) else pool = rds_ibdev->mr_1m_pool; + if (atomic_read(&pool->dirty_count) >= pool->max_items / 10) + queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10); + + /* Switch pools if one of the pool is reaching upper limit */ + if (atomic_read(&pool->dirty_count) >= pool->max_items * 9 / 10) { + if (pool->pool_type == RDS_IB_MR_8K_POOL) + pool = rds_ibdev->mr_1m_pool; + else + pool = rds_ibdev->mr_8k_pool; + } + ibmr = rds_ib_try_reuse_ibmr(pool); if (ibmr) return ibmr; diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 63c8d107adcf..d664e9ade74d 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -454,9 +454,6 @@ struct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *pool) struct rds_ib_mr *ibmr = NULL; int iter = 0; - if (atomic_read(&pool->dirty_count) >= pool->max_items_soft / 10) - queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10); - while (1) { ibmr = rds_ib_reuse_mr(pool); if (ibmr) -- cgit v1.2.3 From 52fb56f7b5a9cc23a07b2c237bad91180263a492 Mon Sep 17 00:00:00 2001 From: Dan Murphy Date: Tue, 23 Apr 2019 15:00:24 -0500 Subject: MAINTAINERS: LEDs: Add designated reviewer for LED subsystem Add a designated reviewer for the LED subsystem as there are already two maintainers assigned. Signed-off-by: Dan Murphy Acked-by: Pavel Machek Signed-off-by: Jacek Anaszewski --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index e17ebf70b548..d11bc7a44294 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8698,6 +8698,7 @@ F: scripts/leaking_addresses.pl LED SUBSYSTEM M: Jacek Anaszewski M: Pavel Machek +R: Dan Murphy L: linux-leds@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/j.anaszewski/linux-leds.git S: Maintained -- cgit v1.2.3 From 032be5f19a94de51093851757089133dcc1e92aa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 24 Apr 2019 09:44:11 -0700 Subject: rxrpc: fix race condition in rxrpc_input_packet() After commit 5271953cad31 ("rxrpc: Use the UDP encap_rcv hook"), rxrpc_input_packet() is directly called from lockless UDP receive path, under rcu_read_lock() protection. It must therefore use RCU rules : - udp_sk->sk_user_data can be cleared at any point in this function. rcu_dereference_sk_user_data() is what we need here. - Also, since sk_user_data might have been set in rxrpc_open_socket() we must observe a proper RCU grace period before kfree(local) in rxrpc_lookup_local() v4: @local can be NULL in xrpc_lookup_local() as reported by kbuild test robot and Julia Lawall , thanks ! v3,v2 : addressed David Howells feedback, thanks ! syzbot reported : kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN CPU: 0 PID: 19236 Comm: syz-executor703 Not tainted 5.1.0-rc6 #79 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__lock_acquire+0xbef/0x3fb0 kernel/locking/lockdep.c:3573 Code: 00 0f 85 a5 1f 00 00 48 81 c4 10 01 00 00 5b 41 5c 41 5d 41 5e 41 5f 5d c3 48 b8 00 00 00 00 00 fc ff df 4c 89 ea 48 c1 ea 03 <80> 3c 02 00 0f 85 4a 21 00 00 49 81 7d 00 20 54 9c 89 0f 84 cf f4 RSP: 0018:ffff88809d7aef58 EFLAGS: 00010002 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000026 RSI: 0000000000000000 RDI: 0000000000000001 RBP: ffff88809d7af090 R08: 0000000000000001 R09: 0000000000000001 R10: ffffed1015d05bc7 R11: ffff888089428600 R12: 0000000000000000 R13: 0000000000000130 R14: 0000000000000001 R15: 0000000000000001 FS: 00007f059044d700(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000004b6040 CR3: 00000000955ca000 CR4: 00000000001406f0 Call Trace: lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:4211 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline] _raw_spin_lock_irqsave+0x95/0xcd kernel/locking/spinlock.c:152 skb_queue_tail+0x26/0x150 net/core/skbuff.c:2972 rxrpc_reject_packet net/rxrpc/input.c:1126 [inline] rxrpc_input_packet+0x4a0/0x5536 net/rxrpc/input.c:1414 udp_queue_rcv_one_skb+0xaf2/0x1780 net/ipv4/udp.c:2011 udp_queue_rcv_skb+0x128/0x730 net/ipv4/udp.c:2085 udp_unicast_rcv_skb.isra.0+0xb9/0x360 net/ipv4/udp.c:2245 __udp4_lib_rcv+0x701/0x2ca0 net/ipv4/udp.c:2301 udp_rcv+0x22/0x30 net/ipv4/udp.c:2482 ip_protocol_deliver_rcu+0x60/0x8f0 net/ipv4/ip_input.c:208 ip_local_deliver_finish+0x23b/0x390 net/ipv4/ip_input.c:234 NF_HOOK include/linux/netfilter.h:289 [inline] NF_HOOK include/linux/netfilter.h:283 [inline] ip_local_deliver+0x1e9/0x520 net/ipv4/ip_input.c:255 dst_input include/net/dst.h:450 [inline] ip_rcv_finish+0x1e1/0x300 net/ipv4/ip_input.c:413 NF_HOOK include/linux/netfilter.h:289 [inline] NF_HOOK include/linux/netfilter.h:283 [inline] ip_rcv+0xe8/0x3f0 net/ipv4/ip_input.c:523 __netif_receive_skb_one_core+0x115/0x1a0 net/core/dev.c:4987 __netif_receive_skb+0x2c/0x1c0 net/core/dev.c:5099 netif_receive_skb_internal+0x117/0x660 net/core/dev.c:5202 napi_frags_finish net/core/dev.c:5769 [inline] napi_gro_frags+0xade/0xd10 net/core/dev.c:5843 tun_get_user+0x2f24/0x3fb0 drivers/net/tun.c:1981 tun_chr_write_iter+0xbd/0x156 drivers/net/tun.c:2027 call_write_iter include/linux/fs.h:1866 [inline] do_iter_readv_writev+0x5e1/0x8e0 fs/read_write.c:681 do_iter_write fs/read_write.c:957 [inline] do_iter_write+0x184/0x610 fs/read_write.c:938 vfs_writev+0x1b3/0x2f0 fs/read_write.c:1002 do_writev+0x15e/0x370 fs/read_write.c:1037 __do_sys_writev fs/read_write.c:1110 [inline] __se_sys_writev fs/read_write.c:1107 [inline] __x64_sys_writev+0x75/0xb0 fs/read_write.c:1107 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: 5271953cad31 ("rxrpc: Use the UDP encap_rcv hook") Signed-off-by: Eric Dumazet Reported-by: syzbot Acked-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/input.c | 12 ++++++++---- net/rxrpc/local_object.c | 3 ++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 4c6f9d0a00e7..c2c35cf4e308 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1161,19 +1161,19 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) * handle data received on the local endpoint * - may be called in interrupt context * - * The socket is locked by the caller and this prevents the socket from being - * shut down and the local endpoint from going away, thus sk_user_data will not - * be cleared until this function returns. + * [!] Note that as this is called from the encap_rcv hook, the socket is not + * held locked by the caller and nothing prevents sk_user_data on the UDP from + * being cleared in the middle of processing this function. * * Called with the RCU read lock held from the IP layer via UDP. */ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) { + struct rxrpc_local *local = rcu_dereference_sk_user_data(udp_sk); struct rxrpc_connection *conn; struct rxrpc_channel *chan; struct rxrpc_call *call = NULL; struct rxrpc_skb_priv *sp; - struct rxrpc_local *local = udp_sk->sk_user_data; struct rxrpc_peer *peer = NULL; struct rxrpc_sock *rx = NULL; unsigned int channel; @@ -1181,6 +1181,10 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) _enter("%p", udp_sk); + if (unlikely(!local)) { + kfree_skb(skb); + return 0; + } if (skb->tstamp == 0) skb->tstamp = ktime_get_real(); diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 15cf42d5b53a..01959db51445 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -304,7 +304,8 @@ nomem: ret = -ENOMEM; sock_error: mutex_unlock(&rxnet->local_mutex); - kfree(local); + if (local) + call_rcu(&local->rcu, rxrpc_local_rcu); _leave(" = %d", ret); return ERR_PTR(ret); -- cgit v1.2.3 From 0453c682459583910d611a96de928f4442205493 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 24 Apr 2019 05:35:00 -0700 Subject: net/rose: fix unbound loop in rose_loopback_timer() This patch adds a limit on the number of skbs that fuzzers can queue into loopback_queue. 1000 packets for rose loopback seems more than enough. Then, since we now have multiple cpus in most linux hosts, we also need to limit the number of skbs rose_loopback_timer() can dequeue at each round. rose_loopback_queue() can be drop-monitor friendly, calling consume_skb() or kfree_skb() appropriately. Finally, use mod_timer() instead of del_timer() + add_timer() syzbot report was : rcu: INFO: rcu_preempt self-detected stall on CPU rcu: 0-...!: (10499 ticks this GP) idle=536/1/0x4000000000000002 softirq=103291/103291 fqs=34 rcu: (t=10500 jiffies g=140321 q=323) rcu: rcu_preempt kthread starved for 10426 jiffies! g140321 f0x0 RCU_GP_WAIT_FQS(5) ->state=0x402 ->cpu=1 rcu: RCU grace-period kthread stack dump: rcu_preempt I29168 10 2 0x80000000 Call Trace: context_switch kernel/sched/core.c:2877 [inline] __schedule+0x813/0x1cc0 kernel/sched/core.c:3518 schedule+0x92/0x180 kernel/sched/core.c:3562 schedule_timeout+0x4db/0xfd0 kernel/time/timer.c:1803 rcu_gp_fqs_loop kernel/rcu/tree.c:1971 [inline] rcu_gp_kthread+0x962/0x17b0 kernel/rcu/tree.c:2128 kthread+0x357/0x430 kernel/kthread.c:253 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352 NMI backtrace for cpu 0 CPU: 0 PID: 7632 Comm: kworker/0:4 Not tainted 5.1.0-rc5+ #172 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: events iterate_cleanup_work Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 nmi_cpu_backtrace.cold+0x63/0xa4 lib/nmi_backtrace.c:101 nmi_trigger_cpumask_backtrace+0x1be/0x236 lib/nmi_backtrace.c:62 arch_trigger_cpumask_backtrace+0x14/0x20 arch/x86/kernel/apic/hw_nmi.c:38 trigger_single_cpu_backtrace include/linux/nmi.h:164 [inline] rcu_dump_cpu_stacks+0x183/0x1cf kernel/rcu/tree.c:1223 print_cpu_stall kernel/rcu/tree.c:1360 [inline] check_cpu_stall kernel/rcu/tree.c:1434 [inline] rcu_pending kernel/rcu/tree.c:3103 [inline] rcu_sched_clock_irq.cold+0x500/0xa4a kernel/rcu/tree.c:2544 update_process_times+0x32/0x80 kernel/time/timer.c:1635 tick_sched_handle+0xa2/0x190 kernel/time/tick-sched.c:161 tick_sched_timer+0x47/0x130 kernel/time/tick-sched.c:1271 __run_hrtimer kernel/time/hrtimer.c:1389 [inline] __hrtimer_run_queues+0x33e/0xde0 kernel/time/hrtimer.c:1451 hrtimer_interrupt+0x314/0x770 kernel/time/hrtimer.c:1509 local_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1035 [inline] smp_apic_timer_interrupt+0x120/0x570 arch/x86/kernel/apic/apic.c:1060 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:807 RIP: 0010:__sanitizer_cov_trace_pc+0x0/0x50 kernel/kcov.c:95 Code: 89 25 b4 6e ec 08 41 bc f4 ff ff ff e8 cd 5d ea ff 48 c7 05 9e 6e ec 08 00 00 00 00 e9 a4 e9 ff ff 90 90 90 90 90 90 90 90 90 <55> 48 89 e5 48 8b 75 08 65 48 8b 04 25 00 ee 01 00 65 8b 15 c8 60 RSP: 0018:ffff8880ae807ce0 EFLAGS: 00000286 ORIG_RAX: ffffffffffffff13 RAX: ffff88806fd40640 RBX: dffffc0000000000 RCX: ffffffff863fbc56 RDX: 0000000000000100 RSI: ffffffff863fbc1d RDI: ffff88808cf94228 RBP: ffff8880ae807d10 R08: ffff88806fd40640 R09: ffffed1015d00f8b R10: ffffed1015d00f8a R11: 0000000000000003 R12: ffff88808cf941c0 R13: 00000000fffff034 R14: ffff8882166cd840 R15: 0000000000000000 rose_loopback_timer+0x30d/0x3f0 net/rose/rose_loopback.c:91 call_timer_fn+0x190/0x720 kernel/time/timer.c:1325 expire_timers kernel/time/timer.c:1362 [inline] __run_timers kernel/time/timer.c:1681 [inline] __run_timers kernel/time/timer.c:1649 [inline] run_timer_softirq+0x652/0x1700 kernel/time/timer.c:1694 __do_softirq+0x266/0x95a kernel/softirq.c:293 do_softirq_own_stack+0x2a/0x40 arch/x86/entry/entry_64.S:1027 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- net/rose/rose_loopback.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c index 7af4f99c4a93..094a6621f8e8 100644 --- a/net/rose/rose_loopback.c +++ b/net/rose/rose_loopback.c @@ -16,6 +16,7 @@ #include static struct sk_buff_head loopback_queue; +#define ROSE_LOOPBACK_LIMIT 1000 static struct timer_list loopback_timer; static void rose_set_loopback_timer(void); @@ -35,29 +36,27 @@ static int rose_loopback_running(void) int rose_loopback_queue(struct sk_buff *skb, struct rose_neigh *neigh) { - struct sk_buff *skbn; + struct sk_buff *skbn = NULL; - skbn = skb_clone(skb, GFP_ATOMIC); + if (skb_queue_len(&loopback_queue) < ROSE_LOOPBACK_LIMIT) + skbn = skb_clone(skb, GFP_ATOMIC); - kfree_skb(skb); - - if (skbn != NULL) { + if (skbn) { + consume_skb(skb); skb_queue_tail(&loopback_queue, skbn); if (!rose_loopback_running()) rose_set_loopback_timer(); + } else { + kfree_skb(skb); } return 1; } - static void rose_set_loopback_timer(void) { - del_timer(&loopback_timer); - - loopback_timer.expires = jiffies + 10; - add_timer(&loopback_timer); + mod_timer(&loopback_timer, jiffies + 10); } static void rose_loopback_timer(struct timer_list *unused) @@ -68,8 +67,12 @@ static void rose_loopback_timer(struct timer_list *unused) struct sock *sk; unsigned short frametype; unsigned int lci_i, lci_o; + int count; - while ((skb = skb_dequeue(&loopback_queue)) != NULL) { + for (count = 0; count < ROSE_LOOPBACK_LIMIT; count++) { + skb = skb_dequeue(&loopback_queue); + if (!skb) + return; if (skb->len < ROSE_MIN_LEN) { kfree_skb(skb); continue; @@ -106,6 +109,8 @@ static void rose_loopback_timer(struct timer_list *unused) kfree_skb(skb); } } + if (!skb_queue_empty(&loopback_queue)) + mod_timer(&loopback_timer, jiffies + 1); } void __exit rose_loopback_clear(void) -- cgit v1.2.3 From 20ff83f10f113c88d0bb74589389b05250994c16 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 24 Apr 2019 08:04:05 -0700 Subject: ipv4: add sanity checks in ipv4_link_failure() Before calling __ip_options_compile(), we need to ensure the network header is a an IPv4 one, and that it is already pulled in skb->head. RAW sockets going through a tunnel can end up calling ipv4_link_failure() with total garbage in the skb, or arbitrary lengthes. syzbot report : BUG: KASAN: stack-out-of-bounds in memcpy include/linux/string.h:355 [inline] BUG: KASAN: stack-out-of-bounds in __ip_options_echo+0x294/0x1120 net/ipv4/ip_options.c:123 Write of size 69 at addr ffff888096abf068 by task syz-executor.4/9204 CPU: 0 PID: 9204 Comm: syz-executor.4 Not tainted 5.1.0-rc5+ #77 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 print_address_description.cold+0x7c/0x20d mm/kasan/report.c:187 kasan_report.cold+0x1b/0x40 mm/kasan/report.c:317 check_memory_region_inline mm/kasan/generic.c:185 [inline] check_memory_region+0x123/0x190 mm/kasan/generic.c:191 memcpy+0x38/0x50 mm/kasan/common.c:133 memcpy include/linux/string.h:355 [inline] __ip_options_echo+0x294/0x1120 net/ipv4/ip_options.c:123 __icmp_send+0x725/0x1400 net/ipv4/icmp.c:695 ipv4_link_failure+0x29f/0x550 net/ipv4/route.c:1204 dst_link_failure include/net/dst.h:427 [inline] vti6_xmit net/ipv6/ip6_vti.c:514 [inline] vti6_tnl_xmit+0x10d4/0x1c0c net/ipv6/ip6_vti.c:553 __netdev_start_xmit include/linux/netdevice.h:4414 [inline] netdev_start_xmit include/linux/netdevice.h:4423 [inline] xmit_one net/core/dev.c:3292 [inline] dev_hard_start_xmit+0x1b2/0x980 net/core/dev.c:3308 __dev_queue_xmit+0x271d/0x3060 net/core/dev.c:3878 dev_queue_xmit+0x18/0x20 net/core/dev.c:3911 neigh_direct_output+0x16/0x20 net/core/neighbour.c:1527 neigh_output include/net/neighbour.h:508 [inline] ip_finish_output2+0x949/0x1740 net/ipv4/ip_output.c:229 ip_finish_output+0x73c/0xd50 net/ipv4/ip_output.c:317 NF_HOOK_COND include/linux/netfilter.h:278 [inline] ip_output+0x21f/0x670 net/ipv4/ip_output.c:405 dst_output include/net/dst.h:444 [inline] NF_HOOK include/linux/netfilter.h:289 [inline] raw_send_hdrinc net/ipv4/raw.c:432 [inline] raw_sendmsg+0x1d2b/0x2f20 net/ipv4/raw.c:663 inet_sendmsg+0x147/0x5d0 net/ipv4/af_inet.c:798 sock_sendmsg_nosec net/socket.c:651 [inline] sock_sendmsg+0xdd/0x130 net/socket.c:661 sock_write_iter+0x27c/0x3e0 net/socket.c:988 call_write_iter include/linux/fs.h:1866 [inline] new_sync_write+0x4c7/0x760 fs/read_write.c:474 __vfs_write+0xe4/0x110 fs/read_write.c:487 vfs_write+0x20c/0x580 fs/read_write.c:549 ksys_write+0x14f/0x2d0 fs/read_write.c:599 __do_sys_write fs/read_write.c:611 [inline] __se_sys_write fs/read_write.c:608 [inline] __x64_sys_write+0x73/0xb0 fs/read_write.c:608 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x458c29 Code: ad b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f293b44bc78 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000458c29 RDX: 0000000000000014 RSI: 00000000200002c0 RDI: 0000000000000003 RBP: 000000000073bf00 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f293b44c6d4 R13: 00000000004c8623 R14: 00000000004ded68 R15: 00000000ffffffff The buggy address belongs to the page: page:ffffea00025aafc0 count:0 mapcount:0 mapping:0000000000000000 index:0x0 flags: 0x1fffc0000000000() raw: 01fffc0000000000 0000000000000000 ffffffff025a0101 0000000000000000 raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888096abef80: 00 00 00 f2 f2 f2 f2 f2 00 00 00 00 00 00 00 f2 ffff888096abf000: f2 f2 f2 f2 00 00 00 00 00 00 00 00 00 00 00 00 >ffff888096abf080: 00 00 f3 f3 f3 f3 00 00 00 00 00 00 00 00 00 00 ^ ffff888096abf100: 00 00 00 00 f1 f1 f1 f1 00 00 f3 f3 00 00 00 00 ffff888096abf180: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 Fixes: ed0de45a1008 ("ipv4: recompile ip options in ipv4_link_failure") Signed-off-by: Eric Dumazet Cc: Stephen Suryaputra Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/route.c | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 88ce038dd495..6fdf1c195d8e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1183,25 +1183,39 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) return dst; } -static void ipv4_link_failure(struct sk_buff *skb) +static void ipv4_send_dest_unreach(struct sk_buff *skb) { struct ip_options opt; - struct rtable *rt; int res; /* Recompile ip options since IPCB may not be valid anymore. + * Also check we have a reasonable ipv4 header. */ - memset(&opt, 0, sizeof(opt)); - opt.optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr); + if (!pskb_network_may_pull(skb, sizeof(struct iphdr)) || + ip_hdr(skb)->version != 4 || ip_hdr(skb)->ihl < 5) + return; - rcu_read_lock(); - res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL); - rcu_read_unlock(); + memset(&opt, 0, sizeof(opt)); + if (ip_hdr(skb)->ihl > 5) { + if (!pskb_network_may_pull(skb, ip_hdr(skb)->ihl * 4)) + return; + opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr); - if (res) - return; + rcu_read_lock(); + res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL); + rcu_read_unlock(); + if (res) + return; + } __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt); +} + +static void ipv4_link_failure(struct sk_buff *skb) +{ + struct rtable *rt; + + ipv4_send_dest_unreach(skb); rt = skb_rtable(skb); if (rt) -- cgit v1.2.3 From 82c99f7a81f28f8c1be5f701c8377d14c4075b10 Mon Sep 17 00:00:00 2001 From: Harry Pan Date: Wed, 24 Apr 2019 22:50:33 +0800 Subject: perf/x86/intel: Update KBL Package C-state events to also include PC8/PC9/PC10 counters Kaby Lake (and Coffee Lake) has PC8/PC9/PC10 residency counters. This patch updates the list of Kaby/Coffee Lake PMU event counters from the snb_cstates[] list of events to the hswult_cstates[] list of events, which keeps all previously supported events and also adds the PKG_C8, PKG_C9 and PKG_C10 residency counters. This allows user space tools to profile them through the perf interface. Signed-off-by: Harry Pan Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: gs0622@gmail.com Link: http://lkml.kernel.org/r/20190424145033.1924-1-harry.pan@intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/cstate.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 94a4b7fc75d0..d41de9af7a39 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -76,15 +76,15 @@ * Scope: Package (physical package) * MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter. * perf code: 0x04 - * Available model: HSW ULT,CNL + * Available model: HSW ULT,KBL,CNL * Scope: Package (physical package) * MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter. * perf code: 0x05 - * Available model: HSW ULT,CNL + * Available model: HSW ULT,KBL,CNL * Scope: Package (physical package) * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter. * perf code: 0x06 - * Available model: HSW ULT,GLM,CNL + * Available model: HSW ULT,KBL,GLM,CNL * Scope: Package (physical package) * */ @@ -566,8 +566,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates), X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_X, snb_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE, snb_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE, hswult_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, hswult_cstates), X86_CSTATES_MODEL(INTEL_FAM6_CANNONLAKE_MOBILE, cnl_cstates), -- cgit v1.2.3 From 81103355b1e23345dbcdeccad59962a424da4a34 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Tue, 23 Apr 2019 14:02:57 +0200 Subject: drm/vmwgfx: Fix dma API layer violation Remove the check for IOMMU presence since it was considered a layer violation. This means we have no reliable way to destinguish between coherent hardware IOMMU DMA address translations and incoherent SWIOTLB DMA address translations, which we can't handle. So always presume the former. This means that if anybody forces SWIOTLB without also setting the vmw_force_coherent=1 vmwgfx option, driver operation will fail, like it will on most other graphics drivers. Signed-off-by: Thomas Hellstrom Reviewed-by: Christoph Hellwig --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 33 +++++---------------------------- 1 file changed, 5 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 7ef5dcb06104..2d066383d7fd 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -545,30 +545,14 @@ static void vmw_get_initial_size(struct vmw_private *dev_priv) dev_priv->initial_height = height; } -/** - * vmw_assume_iommu - Figure out whether coherent dma-remapping might be - * taking place. - * @dev: Pointer to the struct drm_device. - * - * Return: true if iommu present, false otherwise. - */ -static bool vmw_assume_iommu(struct drm_device *dev) -{ - const struct dma_map_ops *ops = get_dma_ops(dev->dev); - - return !dma_is_direct(ops) && ops && - ops->map_page != dma_direct_map_page; -} - /** * vmw_dma_select_mode - Determine how DMA mappings should be set up for this * system. * * @dev_priv: Pointer to a struct vmw_private * - * This functions tries to determine the IOMMU setup and what actions - * need to be taken by the driver to make system pages visible to the - * device. + * This functions tries to determine what actions need to be taken by the + * driver to make system pages visible to the device. * If this function decides that DMA is not possible, it returns -EINVAL. * The driver may then try to disable features of the device that require * DMA. @@ -578,23 +562,16 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv) static const char *names[vmw_dma_map_max] = { [vmw_dma_phys] = "Using physical TTM page addresses.", [vmw_dma_alloc_coherent] = "Using coherent TTM pages.", - [vmw_dma_map_populate] = "Keeping DMA mappings.", + [vmw_dma_map_populate] = "Caching DMA mappings.", [vmw_dma_map_bind] = "Giving up DMA mappings early."}; if (vmw_force_coherent) dev_priv->map_mode = vmw_dma_alloc_coherent; - else if (vmw_assume_iommu(dev_priv->dev)) - dev_priv->map_mode = vmw_dma_map_populate; - else if (!vmw_force_iommu) - dev_priv->map_mode = vmw_dma_phys; - else if (IS_ENABLED(CONFIG_SWIOTLB) && swiotlb_nr_tbl()) - dev_priv->map_mode = vmw_dma_alloc_coherent; + else if (vmw_restrict_iommu) + dev_priv->map_mode = vmw_dma_map_bind; else dev_priv->map_mode = vmw_dma_map_populate; - if (dev_priv->map_mode == vmw_dma_map_populate && vmw_restrict_iommu) - dev_priv->map_mode = vmw_dma_map_bind; - /* No TTM coherent page pool? FIXME: Ask TTM instead! */ if (!(IS_ENABLED(CONFIG_SWIOTLB) || IS_ENABLED(CONFIG_INTEL_IOMMU)) && (dev_priv->map_mode == vmw_dma_alloc_coherent)) -- cgit v1.2.3 From 357798909164bf423eac6a78ff7da7e98d2d7f7f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 24 Apr 2019 15:59:33 +0200 Subject: gpio: Fix gpiochip_add_data_with_key() error path The err_remove_chip block is too coarse, and may perform cleanup that must not be done. E.g. if of_gpiochip_add() fails, of_gpiochip_remove() is still called, causing: OF: ERROR: Bad of_node_put() on /soc/gpio@e6050000 CPU: 1 PID: 20 Comm: kworker/1:1 Not tainted 5.1.0-rc2-koelsch+ #407 Hardware name: Generic R-Car Gen2 (Flattened Device Tree) Workqueue: events deferred_probe_work_func [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x7c/0x9c) [] (dump_stack) from [] (kobject_put+0x94/0xbc) [] (kobject_put) from [] (gpiochip_add_data_with_key+0x8d8/0xa3c) [] (gpiochip_add_data_with_key) from [] (gpio_rcar_probe+0x1d4/0x314) [] (gpio_rcar_probe) from [] (platform_drv_probe+0x48/0x94) and later, if a GPIO consumer tries to use a GPIO from a failed controller: WARNING: CPU: 0 PID: 1 at lib/refcount.c:156 kobject_get+0x38/0x4c refcount_t: increment on 0; use-after-free. Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.1.0-rc2-koelsch+ #407 Hardware name: Generic R-Car Gen2 (Flattened Device Tree) [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x7c/0x9c) [] (dump_stack) from [] (__warn+0xd0/0xec) [] (__warn) from [] (warn_slowpath_fmt+0x44/0x6c) [] (warn_slowpath_fmt) from [] (kobject_get+0x38/0x4c) [] (kobject_get) from [] (of_node_get+0x14/0x1c) [] (of_node_get) from [] (of_find_node_by_phandle+0xc0/0xf0) [] (of_find_node_by_phandle) from [] (of_phandle_iterator_next+0x68/0x154) [] (of_phandle_iterator_next) from [] (__of_parse_phandle_with_args+0x40/0xd0) [] (__of_parse_phandle_with_args) from [] (of_parse_phandle_with_args_map+0x100/0x3ac) [] (of_parse_phandle_with_args_map) from [] (of_get_named_gpiod_flags+0x38/0x380) [] (of_get_named_gpiod_flags) from [] (gpiod_get_from_of_node+0x24/0xd8) [] (gpiod_get_from_of_node) from [] (devm_fwnode_get_index_gpiod_from_child+0xa0/0x144) [] (devm_fwnode_get_index_gpiod_from_child) from [] (gpio_keys_probe+0x418/0x7bc) [] (gpio_keys_probe) from [] (platform_drv_probe+0x48/0x94) Fix this by splitting the cleanup block, and adding a missing call to gpiochip_irqchip_remove(). Fixes: 28355f81969962cf ("gpio: defer probe if pinctrl cannot be found") Signed-off-by: Geert Uytterhoeven Reviewed-by: Mukesh Ojha Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 0495bf1d480a..bca3e7740ef6 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1379,7 +1379,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, status = gpiochip_add_irqchip(chip, lock_key, request_key); if (status) - goto err_remove_chip; + goto err_free_gpiochip_mask; status = of_gpiochip_add(chip); if (status) @@ -1387,7 +1387,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, status = gpiochip_init_valid_mask(chip); if (status) - goto err_remove_chip; + goto err_remove_of_chip; for (i = 0; i < chip->ngpio; i++) { struct gpio_desc *desc = &gdev->descs[i]; @@ -1415,14 +1415,18 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, if (gpiolib_initialized) { status = gpiochip_setup_dev(gdev); if (status) - goto err_remove_chip; + goto err_remove_acpi_chip; } return 0; -err_remove_chip: +err_remove_acpi_chip: acpi_gpiochip_remove(chip); +err_remove_of_chip: gpiochip_free_hogs(chip); of_gpiochip_remove(chip); +err_remove_chip: + gpiochip_irqchip_remove(chip); +err_free_gpiochip_mask: gpiochip_free_valid_mask(chip); err_remove_irqchip_mask: gpiochip_irqchip_free_valid_mask(chip); -- cgit v1.2.3 From c4cba44eeecab9d5ccd3dd2d5520a7d1e5be544f Mon Sep 17 00:00:00 2001 From: Jonas Karlman Date: Sun, 21 Apr 2019 08:25:50 +0000 Subject: drm/bridge: dw-hdmi: fix SCDC configuration for ddc-i2c-bus When ddc-i2c-bus property is used, a NULL pointer dereference is reported: [ 31.041669] Unable to handle kernel NULL pointer dereference at virtual address 00000008 [ 31.041671] pgd = 4d3c16f6 [ 31.041673] [00000008] *pgd=00000000 [ 31.041678] Internal error: Oops: 5 [#1] SMP ARM [ 31.041711] Hardware name: Rockchip (Device Tree) [ 31.041718] PC is at i2c_transfer+0x8/0xe4 [ 31.041721] LR is at drm_scdc_read+0x54/0x84 [ 31.041723] pc : [] lr : [] psr: 280f0013 [ 31.041725] sp : edffdad0 ip : 5ccb5511 fp : 00000058 [ 31.041727] r10: 00000780 r9 : edf91608 r8 : c11b0f48 [ 31.041728] r7 : 00000438 r6 : 00000000 r5 : 00000000 r4 : 00000000 [ 31.041730] r3 : edffdae7 r2 : 00000002 r1 : edffdaec r0 : 00000000 [ 31.041908] [] (i2c_transfer) from [] (drm_scdc_read+0x54/0x84) [ 31.041913] [] (drm_scdc_read) from [] (drm_scdc_set_scrambling+0x30/0xbc) [ 31.041919] [] (drm_scdc_set_scrambling) from [] (dw_hdmi_update_power+0x1440/0x1610) [ 31.041926] [] (dw_hdmi_update_power) from [] (dw_hdmi_bridge_enable+0x2c/0x70) [ 31.041932] [] (dw_hdmi_bridge_enable) from [] (drm_bridge_enable+0x24/0x34) [ 31.041938] [] (drm_bridge_enable) from [] (drm_atomic_helper_commit_modeset_enables+0x114/0x220) [ 31.041943] [] (drm_atomic_helper_commit_modeset_enables) from [] (rockchip_atomic_helper_commit_tail_rpm+0x28/0x64) hdmi->i2c may not be set when ddc-i2c-bus property is used in device tree. Fix this by using hdmi->ddc as the i2c adapter when calling drm_scdc_*(). Also report that SCDC is not supported when there is no DDC bus. Fixes: 264fce6cc2c1 ("drm/bridge: dw-hdmi: Add SCDC and TMDS Scrambling support") Signed-off-by: Jonas Karlman Reviewed-by: Heiko Stuebner Reviewed-by: Neil Armstrong Reviewed-by: Laurent Pinchart Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/VE1PR03MB59031814B5BCAB2152923BDAAC210@VE1PR03MB5903.eurprd03.prod.outlook.com --- drivers/gpu/drm/bridge/synopsys/dw-hdmi.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index b74ccb6a24c2..ab7968c8f6a2 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -1046,6 +1046,10 @@ static bool dw_hdmi_support_scdc(struct dw_hdmi *hdmi) if (hdmi->version < 0x200a) return false; + /* Disable if no DDC bus */ + if (!hdmi->ddc) + return false; + /* Disable if SCDC is not supported, or if an HF-VSDB block is absent */ if (!display->hdmi.scdc.supported || !display->hdmi.scdc.scrambling.supported) @@ -1684,13 +1688,13 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi, * Source Devices compliant shall set the * Source Version = 1. */ - drm_scdc_readb(&hdmi->i2c->adap, SCDC_SINK_VERSION, + drm_scdc_readb(hdmi->ddc, SCDC_SINK_VERSION, &bytes); - drm_scdc_writeb(&hdmi->i2c->adap, SCDC_SOURCE_VERSION, + drm_scdc_writeb(hdmi->ddc, SCDC_SOURCE_VERSION, min_t(u8, bytes, SCDC_MIN_SOURCE_VERSION)); /* Enabled Scrambling in the Sink */ - drm_scdc_set_scrambling(&hdmi->i2c->adap, 1); + drm_scdc_set_scrambling(hdmi->ddc, 1); /* * To activate the scrambler feature, you must ensure @@ -1706,7 +1710,7 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi, hdmi_writeb(hdmi, 0, HDMI_FC_SCRAMBLER_CTRL); hdmi_writeb(hdmi, (u8)~HDMI_MC_SWRSTZ_TMDSSWRST_REQ, HDMI_MC_SWRSTZ); - drm_scdc_set_scrambling(&hdmi->i2c->adap, 0); + drm_scdc_set_scrambling(hdmi->ddc, 0); } } -- cgit v1.2.3 From c409ca3be3c6ff3a1eeb303b191184e80d412862 Mon Sep 17 00:00:00 2001 From: Malte Leip Date: Sun, 14 Apr 2019 12:00:12 +0200 Subject: usb: usbip: fix isoc packet num validation in get_pipe Change the validation of number_of_packets in get_pipe to compare the number of packets to a fixed maximum number of packets allowed, set to be 1024. This number was chosen due to it being used by other drivers as well, for example drivers/usb/host/uhci-q.c Background/reason: The get_pipe function in stub_rx.c validates the number of packets in isochronous mode and aborts with an error if that number is too large, in order to prevent malicious input from possibly triggering large memory allocations. This was previously done by checking whether pdu->u.cmd_submit.number_of_packets is bigger than the number of packets that would be needed for pdu->u.cmd_submit.transfer_buffer_length bytes if all except possibly the last packet had maximum length, given by usb_endpoint_maxp(epd) * usb_endpoint_maxp_mult(epd). This leads to an error if URBs with packets shorter than the maximum possible length are submitted, which is allowed according to Documentation/driver-api/usb/URB.rst and occurs for example with the snd-usb-audio driver. Fixes: c6688ef9f297 ("usbip: fix stub_rx: harden CMD_SUBMIT path to handle malicious input") Signed-off-by: Malte Leip Cc: stable Acked-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/stub_rx.c | 12 +++--------- drivers/usb/usbip/usbip_common.h | 7 +++++++ 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/usb/usbip/stub_rx.c b/drivers/usb/usbip/stub_rx.c index 97b09a42a10c..dbfb2f24d71e 100644 --- a/drivers/usb/usbip/stub_rx.c +++ b/drivers/usb/usbip/stub_rx.c @@ -361,16 +361,10 @@ static int get_pipe(struct stub_device *sdev, struct usbip_header *pdu) } if (usb_endpoint_xfer_isoc(epd)) { - /* validate packet size and number of packets */ - unsigned int maxp, packets, bytes; - - maxp = usb_endpoint_maxp(epd); - maxp *= usb_endpoint_maxp_mult(epd); - bytes = pdu->u.cmd_submit.transfer_buffer_length; - packets = DIV_ROUND_UP(bytes, maxp); - + /* validate number of packets */ if (pdu->u.cmd_submit.number_of_packets < 0 || - pdu->u.cmd_submit.number_of_packets > packets) { + pdu->u.cmd_submit.number_of_packets > + USBIP_MAX_ISO_PACKETS) { dev_err(&sdev->udev->dev, "CMD_SUBMIT: isoc invalid num packets %d\n", pdu->u.cmd_submit.number_of_packets); diff --git a/drivers/usb/usbip/usbip_common.h b/drivers/usb/usbip/usbip_common.h index bf8afe9b5883..8be857a4fa13 100644 --- a/drivers/usb/usbip/usbip_common.h +++ b/drivers/usb/usbip/usbip_common.h @@ -121,6 +121,13 @@ extern struct device_attribute dev_attr_usbip_debug; #define USBIP_DIR_OUT 0x00 #define USBIP_DIR_IN 0x01 +/* + * Arbitrary limit for the maximum number of isochronous packets in an URB, + * compare for example the uhci_submit_isochronous function in + * drivers/usb/host/uhci-q.c + */ +#define USBIP_MAX_ISO_PACKETS 1024 + /** * struct usbip_header_basic - data pertinent to every request * @command: the usbip request type -- cgit v1.2.3 From ef61eb43ada6c1d6b94668f0f514e4c268093ff3 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 23 Apr 2019 14:48:29 -0400 Subject: USB: yurex: Fix protection fault after device removal The syzkaller USB fuzzer found a general-protection-fault bug in the yurex driver. The fault occurs when a device has been unplugged; the driver's interrupt-URB handler logs an error message referring to the device by name, after the device has been unregistered and its name deallocated. This problem is caused by the fact that the interrupt URB isn't cancelled until the driver's private data structure is released, which can happen long after the device is gone. The cure is to make sure that the interrupt URB is killed before yurex_disconnect() returns; this is exactly the sort of thing that usb_poison_urb() was meant for. Signed-off-by: Alan Stern Reported-and-tested-by: syzbot+2eb9121678bdb36e6d57@syzkaller.appspotmail.com CC: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/yurex.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/misc/yurex.c b/drivers/usb/misc/yurex.c index 6d9fd5f64903..7b306aa22d25 100644 --- a/drivers/usb/misc/yurex.c +++ b/drivers/usb/misc/yurex.c @@ -314,6 +314,7 @@ static void yurex_disconnect(struct usb_interface *interface) usb_deregister_dev(interface, &yurex_class); /* prevent more I/O from starting */ + usb_poison_urb(dev->urb); mutex_lock(&dev->io_mutex); dev->interface = NULL; mutex_unlock(&dev->io_mutex); -- cgit v1.2.3 From c114944d7d67f24e71562fcfc18d550ab787e4d4 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 22 Apr 2019 11:16:04 -0400 Subject: USB: w1 ds2490: Fix bug caused by improper use of altsetting array The syzkaller USB fuzzer spotted a slab-out-of-bounds bug in the ds2490 driver. This bug is caused by improper use of the altsetting array in the usb_interface structure (the array's entries are not always stored in numerical order), combined with a naive assumption that all interfaces probed by the driver will have the expected number of altsettings. The bug can be fixed by replacing references to the possibly non-existent intf->altsetting[alt] entry with the guaranteed-to-exist intf->cur_altsetting entry. Signed-off-by: Alan Stern Reported-and-tested-by: syzbot+d65f673b847a1a96cdba@syzkaller.appspotmail.com CC: Signed-off-by: Greg Kroah-Hartman --- drivers/w1/masters/ds2490.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/w1/masters/ds2490.c b/drivers/w1/masters/ds2490.c index 0f4ecfcdb549..a9fb77585272 100644 --- a/drivers/w1/masters/ds2490.c +++ b/drivers/w1/masters/ds2490.c @@ -1016,15 +1016,15 @@ static int ds_probe(struct usb_interface *intf, /* alternative 3, 1ms interrupt (greatly speeds search), 64 byte bulk */ alt = 3; err = usb_set_interface(dev->udev, - intf->altsetting[alt].desc.bInterfaceNumber, alt); + intf->cur_altsetting->desc.bInterfaceNumber, alt); if (err) { dev_err(&dev->udev->dev, "Failed to set alternative setting %d " "for %d interface: err=%d.\n", alt, - intf->altsetting[alt].desc.bInterfaceNumber, err); + intf->cur_altsetting->desc.bInterfaceNumber, err); goto err_out_clear; } - iface_desc = &intf->altsetting[alt]; + iface_desc = intf->cur_altsetting; if (iface_desc->desc.bNumEndpoints != NUM_EP-1) { pr_info("Num endpoints=%d. It is not DS9490R.\n", iface_desc->desc.bNumEndpoints); -- cgit v1.2.3 From b82d6c1f8f8288f744a9dcc16cd3085d535decca Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 3 Apr 2019 21:01:06 -0700 Subject: mwifiex: Make resume actually do something useful again on SDIO cards The commit fc3a2fcaa1ba ("mwifiex: use atomic bitops to represent adapter status variables") had a fairly straightforward bug in it. It contained this bit of diff: - if (!adapter->is_suspended) { + if (test_bit(MWIFIEX_IS_SUSPENDED, &adapter->work_flags)) { As you can see the patch missed the "!" when converting to the atomic bitops. This meant that the resume hasn't done anything at all since that commit landed and suspend/resume for mwifiex SDIO cards has been totally broken. After fixing this mwifiex suspend/resume appears to work again, at least with the simple testing I've done. Fixes: fc3a2fcaa1ba ("mwifiex: use atomic bitops to represent adapter status variables") Cc: Signed-off-by: Douglas Anderson Reviewed-by: Brian Norris Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/sdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.c b/drivers/net/wireless/marvell/mwifiex/sdio.c index a85648342d15..d5a70340a945 100644 --- a/drivers/net/wireless/marvell/mwifiex/sdio.c +++ b/drivers/net/wireless/marvell/mwifiex/sdio.c @@ -181,7 +181,7 @@ static int mwifiex_sdio_resume(struct device *dev) adapter = card->adapter; - if (test_bit(MWIFIEX_IS_SUSPENDED, &adapter->work_flags)) { + if (!test_bit(MWIFIEX_IS_SUSPENDED, &adapter->work_flags)) { mwifiex_dbg(adapter, WARN, "device already resumed\n"); return 0; -- cgit v1.2.3 From a3d46aea46f99d134b4e0726e4826b824c3e5980 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 1 Apr 2019 11:29:58 +0300 Subject: btrfs: Switch memory allocations in async csum calculation path to kvmalloc Recent multi-page biovec rework allowed creation of bios that can span large regions - up to 128 megabytes in the case of btrfs. OTOH btrfs' submission path currently allocates a contiguous array to store the checksums for every bio submitted. This means we can request up to (128mb / BTRFS_SECTOR_SIZE) * 4 bytes + 32bytes of memory from kmalloc. On busy systems with possibly fragmented memory said kmalloc can fail which will trigger BUG_ON due to improper error handling IO submission context in btrfs. Until error handling is improved or bios in btrfs limited to a more manageable size (e.g. 1m) let's use kvmalloc to fallback to vmalloc for such large allocations. There is no hard requirement that the memory allocated for checksums during IO submission has to be contiguous, but this is a simple fix that does not require several non-contiguous allocations. For small writes this is unlikely to have any visible effect since kmalloc will still satisfy allocation requests as usual. For larger requests the code will just fallback to vmalloc. We've performed evaluation on several workload types and there was no significant difference kmalloc vs kvmalloc. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/file-item.c | 15 +++++++++++---- fs/btrfs/ordered-data.c | 3 ++- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 920bf3b4b0ef..cccc75d15970 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -7,6 +7,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -427,9 +428,13 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio, unsigned long this_sum_bytes = 0; int i; u64 offset; + unsigned nofs_flag; + + nofs_flag = memalloc_nofs_save(); + sums = kvzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size), + GFP_KERNEL); + memalloc_nofs_restore(nofs_flag); - sums = kzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size), - GFP_NOFS); if (!sums) return BLK_STS_RESOURCE; @@ -472,8 +477,10 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio, bytes_left = bio->bi_iter.bi_size - total_bytes; - sums = kzalloc(btrfs_ordered_sum_size(fs_info, bytes_left), - GFP_NOFS); + nofs_flag = memalloc_nofs_save(); + sums = kvzalloc(btrfs_ordered_sum_size(fs_info, + bytes_left), GFP_KERNEL); + memalloc_nofs_restore(nofs_flag); BUG_ON(!sums); /* -ENOMEM */ sums->len = bytes_left; ordered = btrfs_lookup_ordered_extent(inode, diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 6fde2b2741ef..45e3cfd1198b 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "ctree.h" #include "transaction.h" #include "btrfs_inode.h" @@ -442,7 +443,7 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) cur = entry->list.next; sum = list_entry(cur, struct btrfs_ordered_sum, list); list_del(&sum->list); - kfree(sum); + kvfree(sum); } kmem_cache_free(btrfs_ordered_extent_cache, entry); } -- cgit v1.2.3 From 2557fabd6e29f349bfa0ac13f38ac98aa5eafc74 Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Tue, 23 Apr 2019 17:30:26 +0800 Subject: RDMA/hns: Bugfix for mapping user db When the maximum send wr delivered by the user is zero, the qp does not have a sq. When allocating the sq db buffer to store the user sq pi pointer and map it to the kernel mode, max_send_wr is used as the trigger condition, while the kernel does not consider the max_send_wr trigger condition when mapmping db. It will cause sq record doorbell map fail and create qp fail. The failed print information as follows: hns3 0000:7d:00.1: Send cmd: tail - 418, opcode - 0x8504, flag - 0x0011, retval - 0x0000 hns3 0000:7d:00.1: Send cmd: 0xe59dc000 0x00000000 0x00000000 0x00000000 0x00000116 0x0000ffff hns3 0000:7d:00.1: sq record doorbell map failed! hns3 0000:7d:00.1: Create RC QP failed Fixes: 0425e3e6e0c7 ("RDMA/hns: Support flush cqe for hip08 in kernel space") Signed-off-by: Lijun Ou Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 66cdf625534f..60cf9f03e941 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -533,7 +533,7 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev, static int hns_roce_qp_has_sq(struct ib_qp_init_attr *attr) { - if (attr->qp_type == IB_QPT_XRC_TGT) + if (attr->qp_type == IB_QPT_XRC_TGT || !attr->cap.max_send_wr) return 0; return 1; -- cgit v1.2.3 From a860fa7b96e1a1c974556327aa1aee852d434c21 Mon Sep 17 00:00:00 2001 From: Xie XiuQi Date: Sat, 20 Apr 2019 16:34:16 +0800 Subject: sched/numa: Fix a possible divide-by-zero sched_clock_cpu() may not be consistent between CPUs. If a task migrates to another CPU, then se.exec_start is set to that CPU's rq_clock_task() by update_stats_curr_start(). Specifically, the new value might be before the old value due to clock skew. So then if in numa_get_avg_runtime() the expression: 'now - p->last_task_numa_placement' ends up as -1, then the divider '*period + 1' in task_numa_placement() is 0 and things go bang. Similar to update_curr(), check if time goes backwards to avoid this. [ peterz: Wrote new changelog. ] [ mingo: Tweaked the code comment. ] Signed-off-by: Xie XiuQi Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: cj.chengjian@huawei.com Cc: Link: http://lkml.kernel.org/r/20190425080016.GX11158@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index a4d9e14bf138..35f3ea375084 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2007,6 +2007,10 @@ static u64 numa_get_avg_runtime(struct task_struct *p, u64 *period) if (p->last_task_numa_placement) { delta = runtime - p->last_sum_exec_runtime; *period = now - p->last_task_numa_placement; + + /* Avoid time going backwards, prevent potential divide error: */ + if (unlikely((s64)*period < 0)) + *period = 0; } else { delta = p->se.avg.load_sum; *period = LOAD_AVG_MAX; -- cgit v1.2.3 From 9a8f612ca0d6a436e6471c9bed516d34a2cc626f Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 8 Apr 2019 10:31:45 +0200 Subject: mtd: rawnand: marvell: Clean the controller state before each operation Since the migration of the driver to stop using the legacy ->select_chip() hook, there is nothing deselecting the target anymore, thus the selection is not forced at the next access. Ensure the ND_RUN bit and the interrupts are always in a clean state. Cc: Daniel Mack Cc: stable@vger.kernel.org Fixes: b25251414f6e00 ("mtd: rawnand: marvell: Stop implementing ->select_chip()") Suggested-by: Boris Brezillon Signed-off-by: Miquel Raynal Tested-by: Daniel Mack Reviewed-by: Boris Brezillon Signed-off-by: Richard Weinberger --- drivers/mtd/nand/raw/marvell_nand.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c index f38e5c1b87e4..d984538980e2 100644 --- a/drivers/mtd/nand/raw/marvell_nand.c +++ b/drivers/mtd/nand/raw/marvell_nand.c @@ -722,12 +722,6 @@ static void marvell_nfc_select_target(struct nand_chip *chip, struct marvell_nfc *nfc = to_marvell_nfc(chip->controller); u32 ndcr_generic; - if (chip == nfc->selected_chip && die_nr == marvell_nand->selected_die) - return; - - writel_relaxed(marvell_nand->ndtr0, nfc->regs + NDTR0); - writel_relaxed(marvell_nand->ndtr1, nfc->regs + NDTR1); - /* * Reset the NDCR register to a clean state for this particular chip, * also clear ND_RUN bit. @@ -739,6 +733,12 @@ static void marvell_nfc_select_target(struct nand_chip *chip, /* Also reset the interrupt status register */ marvell_nfc_clear_int(nfc, NDCR_ALL_INT); + if (chip == nfc->selected_chip && die_nr == marvell_nand->selected_die) + return; + + writel_relaxed(marvell_nand->ndtr0, nfc->regs + NDTR0); + writel_relaxed(marvell_nand->ndtr1, nfc->regs + NDTR1); + nfc->selected_chip = chip; marvell_nand->selected_die = die_nr; } -- cgit v1.2.3 From 349ced9984ff540ce74ca8a0b2e9b03dc434b9dd Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Wed, 24 Apr 2019 00:16:10 -0700 Subject: power: supply: sysfs: prevent endless uevent loop with CONFIG_POWER_SUPPLY_DEBUG Fix a similar endless event loop as was done in commit 8dcf32175b4e ("i2c: prevent endless uevent loop with CONFIG_I2C_DEBUG_CORE"): The culprit is the dev_dbg printk in the i2c uevent handler. If this is activated (for instance by CONFIG_I2C_DEBUG_CORE) it results in an endless loop with systemd-journald. This happens if user-space scans the system log and reads the uevent file to get information about a newly created device, which seems fair use to me. Unfortunately reading the "uevent" file uses the same function that runs for creating the uevent for a new device, generating the next syslog entry Both CONFIG_I2C_DEBUG_CORE and CONFIG_POWER_SUPPLY_DEBUG were reported in https://bugs.freedesktop.org/show_bug.cgi?id=76886 but only former seems to have been fixed. Drop debug prints as it was done in I2C subsystem to resolve the issue. Signed-off-by: Andrey Smirnov Cc: Chris Healy Cc: linux-pm@vger.kernel.org Signed-off-by: Sebastian Reichel --- drivers/power/supply/power_supply_sysfs.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c index dce24f596160..5358a80d854f 100644 --- a/drivers/power/supply/power_supply_sysfs.c +++ b/drivers/power/supply/power_supply_sysfs.c @@ -383,15 +383,11 @@ int power_supply_uevent(struct device *dev, struct kobj_uevent_env *env) char *prop_buf; char *attrname; - dev_dbg(dev, "uevent\n"); - if (!psy || !psy->desc) { dev_dbg(dev, "No power supply yet\n"); return ret; } - dev_dbg(dev, "POWER_SUPPLY_NAME=%s\n", psy->desc->name); - ret = add_uevent_var(env, "POWER_SUPPLY_NAME=%s", psy->desc->name); if (ret) return ret; @@ -427,8 +423,6 @@ int power_supply_uevent(struct device *dev, struct kobj_uevent_env *env) goto out; } - dev_dbg(dev, "prop %s=%s\n", attrname, prop_buf); - ret = add_uevent_var(env, "POWER_SUPPLY_%s=%s", attrname, prop_buf); kfree(attrname); if (ret) -- cgit v1.2.3 From 4ee0776760af03f181e6b80baf5fb1cc1a980f50 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 24 Apr 2019 09:32:55 -0700 Subject: selftests/seccomp: Prepare for exclusive seccomp flags Some seccomp flags will become exclusive, so the selftest needs to be adjusted to mask those out and test them individually for the "all flags" tests. Cc: stable@vger.kernel.org # v5.0+ Signed-off-by: Kees Cook Reviewed-by: Tycho Andersen Acked-by: James Morris --- tools/testing/selftests/seccomp/seccomp_bpf.c | 34 ++++++++++++++++++++------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index f69d2ee29742..5019cdae5d0b 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -2166,11 +2166,14 @@ TEST(detect_seccomp_filter_flags) SECCOMP_FILTER_FLAG_LOG, SECCOMP_FILTER_FLAG_SPEC_ALLOW, SECCOMP_FILTER_FLAG_NEW_LISTENER }; - unsigned int flag, all_flags; + unsigned int exclusive[] = { + SECCOMP_FILTER_FLAG_TSYNC, + SECCOMP_FILTER_FLAG_NEW_LISTENER }; + unsigned int flag, all_flags, exclusive_mask; int i; long ret; - /* Test detection of known-good filter flags */ + /* Test detection of individual known-good filter flags */ for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) { int bits = 0; @@ -2197,16 +2200,29 @@ TEST(detect_seccomp_filter_flags) all_flags |= flag; } - /* Test detection of all known-good filter flags */ - ret = seccomp(SECCOMP_SET_MODE_FILTER, all_flags, NULL); - EXPECT_EQ(-1, ret); - EXPECT_EQ(EFAULT, errno) { - TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!", - all_flags); + /* + * Test detection of all known-good filter flags combined. But + * for the exclusive flags we need to mask them out and try them + * individually for the "all flags" testing. + */ + exclusive_mask = 0; + for (i = 0; i < ARRAY_SIZE(exclusive); i++) + exclusive_mask |= exclusive[i]; + for (i = 0; i < ARRAY_SIZE(exclusive); i++) { + flag = all_flags & ~exclusive_mask; + flag |= exclusive[i]; + + ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EFAULT, errno) { + TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!", + flag); + } } - /* Test detection of an unknown filter flag */ + /* Test detection of an unknown filter flags, without exclusives. */ flag = -1; + flag &= ~exclusive_mask; ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); EXPECT_EQ(-1, ret); EXPECT_EQ(EINVAL, errno) { -- cgit v1.2.3 From 7a0df7fbc14505e2e2be19ed08654a09e1ed5bf6 Mon Sep 17 00:00:00 2001 From: Tycho Andersen Date: Wed, 6 Mar 2019 13:14:13 -0700 Subject: seccomp: Make NEW_LISTENER and TSYNC flags exclusive As the comment notes, the return codes for TSYNC and NEW_LISTENER conflict, because they both return positive values, one in the case of success and one in the case of error. So, let's disallow both of these flags together. While this is technically a userspace break, all the users I know of are still waiting on me to land this feature in libseccomp, so I think it'll be safe. Also, at present my use case doesn't require TSYNC at all, so this isn't a big deal to disallow. If someone wanted to support this, a path forward would be to add a new flag like TSYNC_AND_LISTENER_YES_I_UNDERSTAND_THAT_TSYNC_WILL_JUST_RETURN_EAGAIN, but the use cases are so different I don't see it really happening. Finally, it's worth noting that this does actually fix a UAF issue: at the end of seccomp_set_mode_filter(), we have: if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) { if (ret < 0) { listener_f->private_data = NULL; fput(listener_f); put_unused_fd(listener); } else { fd_install(listener, listener_f); ret = listener; } } out_free: seccomp_filter_free(prepared); But if ret > 0 because TSYNC raced, we'll install the listener fd and then free the filter out from underneath it, causing a UAF when the task closes it or dies. This patch also switches the condition to be simply if (ret), so that if someone does add the flag mentioned above, they won't have to remember to fix this too. Reported-by: syzbot+b562969adb2e04af3442@syzkaller.appspotmail.com Fixes: 6a21cc50f0c7 ("seccomp: add a return code to trap to userspace") CC: stable@vger.kernel.org # v5.0+ Signed-off-by: Tycho Andersen Signed-off-by: Kees Cook Acked-by: James Morris --- kernel/seccomp.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 54a0347ca812..4b51d9cbcf06 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -502,7 +502,10 @@ out: * * Caller must be holding current->sighand->siglock lock. * - * Returns 0 on success, -ve on error. + * Returns 0 on success, -ve on error, or + * - in TSYNC mode: the pid of a thread which was either not in the correct + * seccomp mode or did not have an ancestral seccomp filter + * - in NEW_LISTENER mode: the fd of the new listener */ static long seccomp_attach_filter(unsigned int flags, struct seccomp_filter *filter) @@ -1258,6 +1261,16 @@ static long seccomp_set_mode_filter(unsigned int flags, if (flags & ~SECCOMP_FILTER_FLAG_MASK) return -EINVAL; + /* + * In the successful case, NEW_LISTENER returns the new listener fd. + * But in the failure case, TSYNC returns the thread that died. If you + * combine these two flags, there's no way to tell whether something + * succeeded or failed. So, let's disallow this combination. + */ + if ((flags & SECCOMP_FILTER_FLAG_TSYNC) && + (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER)) + return -EINVAL; + /* Prepare the new filter before holding any locks. */ prepared = seccomp_prepare_user_filter(filter); if (IS_ERR(prepared)) @@ -1304,7 +1317,7 @@ out: mutex_unlock(¤t->signal->cred_guard_mutex); out_put_fd: if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) { - if (ret < 0) { + if (ret) { listener_f->private_data = NULL; fput(listener_f); put_unused_fd(listener); -- cgit v1.2.3 From ecfc3fcabbb5291d1e61600a3dac6cdbfdb04cb1 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 10 Apr 2019 21:49:23 +0800 Subject: MIPS: eBPF: Make ebpf_to_mips_reg() static Fix sparse warning: arch/mips/net/ebpf_jit.c:196:5: warning: symbol 'ebpf_to_mips_reg' was not declared. Should it be static? Reported-by: Hulk Robot Signed-off-by: YueHaibing Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- arch/mips/net/ebpf_jit.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c index 0effd3cba9a7..98bf0c222b5f 100644 --- a/arch/mips/net/ebpf_jit.c +++ b/arch/mips/net/ebpf_jit.c @@ -186,8 +186,9 @@ enum which_ebpf_reg { * separate frame pointer, so BPF_REG_10 relative accesses are * adjusted to be $sp relative. */ -int ebpf_to_mips_reg(struct jit_ctx *ctx, const struct bpf_insn *insn, - enum which_ebpf_reg w) +static int ebpf_to_mips_reg(struct jit_ctx *ctx, + const struct bpf_insn *insn, + enum which_ebpf_reg w) { int ebpf_reg = (w == src_reg || w == src_reg_no_fp) ? insn->src_reg : insn->dst_reg; -- cgit v1.2.3 From 8694d8c1f82cccec9380e0d3720b84eee315dfb7 Mon Sep 17 00:00:00 2001 From: Alban Crequy Date: Fri, 12 Apr 2019 14:40:50 +0200 Subject: tools: bpftool: fix infinite loop in map create "bpftool map create" has an infinite loop on "while (argc)". The error case is missing. Symptoms: when forgetting to type the keyword 'type' in front of 'hash': $ sudo bpftool map create /sys/fs/bpf/dir/foobar hash key 8 value 8 entries 128 (infinite loop, taking all the CPU) ^C After the patch: $ sudo bpftool map create /sys/fs/bpf/dir/foobar hash key 8 value 8 entries 128 Error: unknown arg hash Fixes: 0b592b5a01be ("tools: bpftool: add map create command") Signed-off-by: Alban Crequy Reviewed-by: Quentin Monnet Acked-by: Song Liu Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/map.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index e0c650d91784..994a7e0d16fb 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -1151,6 +1151,9 @@ static int do_create(int argc, char **argv) return -1; } NEXT_ARG(); + } else { + p_err("unknown arg %s", *argv); + return -1; } } -- cgit v1.2.3 From 39391377f8ecf2fa4569e2fede624dc091bcd859 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Sat, 13 Apr 2019 03:37:32 +0200 Subject: libbpf: add binary to gitignore Some binaries are generated when building libbpf from tools/lib/bpf/, namely libbpf.so.0.0.2 and libbpf.so.0. Add them to the local .gitignore. Signed-off-by: Matteo Croce Reviewed-by: Jakub Kicinski Acked-by: Song Liu Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/lib/bpf/.gitignore b/tools/lib/bpf/.gitignore index 4db74758c674..fecb78afea3f 100644 --- a/tools/lib/bpf/.gitignore +++ b/tools/lib/bpf/.gitignore @@ -1,3 +1,4 @@ libbpf_version.h FEATURE-DUMP.libbpf test_libbpf +libbpf.so.* -- cgit v1.2.3 From c6a9efa1d8353d8960d152e7d469d952b01495c0 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Wed, 24 Apr 2019 21:50:42 +0200 Subject: bpf: mark registers in all frames after pkt/null checks In case of a null check on a pointer inside a subprog, we should mark all registers with this pointer as either safe or unknown, in both the current and previous frames. Currently, only spilled registers and registers in the current frame are marked. Packet bound checks in subprogs have the same issue. This patch fixes it to mark registers in previous frames as well. A good reproducer for null checks looks as follow: 1: ptr = bpf_map_lookup_elem(map, &key); 2: ret = subprog(ptr) { 3: return ptr != NULL; 4: } 5: if (ret) 6: value = *ptr; With the above, the verifier will complain on line 6 because it sees ptr as map_value_or_null despite the null check in subprog 1. Note that this patch fixes another resulting bug when using bpf_sk_release(): 1: sk = bpf_sk_lookup_tcp(...); 2: subprog(sk) { 3: if (sk) 4: bpf_sk_release(sk); 5: } 6: if (!sk) 7: return 0; 8: return 1; In the above, mark_ptr_or_null_regs will warn on line 6 because it will try to free the reference state, even though it was already freed on line 3. Fixes: f4d7e40a5b71 ("bpf: introduce function calls (verification)") Signed-off-by: Paul Chaignon Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 76 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 46 insertions(+), 30 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6c5a41f7f338..09d5d972c9ff 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4138,15 +4138,35 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) return 0; } +static void __find_good_pkt_pointers(struct bpf_func_state *state, + struct bpf_reg_state *dst_reg, + enum bpf_reg_type type, u16 new_range) +{ + struct bpf_reg_state *reg; + int i; + + for (i = 0; i < MAX_BPF_REG; i++) { + reg = &state->regs[i]; + if (reg->type == type && reg->id == dst_reg->id) + /* keep the maximum range already checked */ + reg->range = max(reg->range, new_range); + } + + bpf_for_each_spilled_reg(i, state, reg) { + if (!reg) + continue; + if (reg->type == type && reg->id == dst_reg->id) + reg->range = max(reg->range, new_range); + } +} + static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, struct bpf_reg_state *dst_reg, enum bpf_reg_type type, bool range_right_open) { - struct bpf_func_state *state = vstate->frame[vstate->curframe]; - struct bpf_reg_state *regs = state->regs, *reg; u16 new_range; - int i, j; + int i; if (dst_reg->off < 0 || (dst_reg->off == 0 && range_right_open)) @@ -4211,20 +4231,9 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, * the range won't allow anything. * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16. */ - for (i = 0; i < MAX_BPF_REG; i++) - if (regs[i].type == type && regs[i].id == dst_reg->id) - /* keep the maximum range already checked */ - regs[i].range = max(regs[i].range, new_range); - - for (j = 0; j <= vstate->curframe; j++) { - state = vstate->frame[j]; - bpf_for_each_spilled_reg(i, state, reg) { - if (!reg) - continue; - if (reg->type == type && reg->id == dst_reg->id) - reg->range = max(reg->range, new_range); - } - } + for (i = 0; i <= vstate->curframe; i++) + __find_good_pkt_pointers(vstate->frame[i], dst_reg, type, + new_range); } /* compute branch direction of the expression "if (reg opcode val) goto target;" @@ -4698,6 +4707,22 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, } } +static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id, + bool is_null) +{ + struct bpf_reg_state *reg; + int i; + + for (i = 0; i < MAX_BPF_REG; i++) + mark_ptr_or_null_reg(state, &state->regs[i], id, is_null); + + bpf_for_each_spilled_reg(i, state, reg) { + if (!reg) + continue; + mark_ptr_or_null_reg(state, reg, id, is_null); + } +} + /* The logic is similar to find_good_pkt_pointers(), both could eventually * be folded together at some point. */ @@ -4705,10 +4730,10 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, bool is_null) { struct bpf_func_state *state = vstate->frame[vstate->curframe]; - struct bpf_reg_state *reg, *regs = state->regs; + struct bpf_reg_state *regs = state->regs; u32 ref_obj_id = regs[regno].ref_obj_id; u32 id = regs[regno].id; - int i, j; + int i; if (ref_obj_id && ref_obj_id == id && is_null) /* regs[regno] is in the " == NULL" branch. @@ -4717,17 +4742,8 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, */ WARN_ON_ONCE(release_reference_state(state, id)); - for (i = 0; i < MAX_BPF_REG; i++) - mark_ptr_or_null_reg(state, ®s[i], id, is_null); - - for (j = 0; j <= vstate->curframe; j++) { - state = vstate->frame[j]; - bpf_for_each_spilled_reg(i, state, reg) { - if (!reg) - continue; - mark_ptr_or_null_reg(state, reg, id, is_null); - } - } + for (i = 0; i <= vstate->curframe; i++) + __mark_ptr_or_null_regs(vstate->frame[i], id, is_null); } static bool try_match_pkt_pointers(const struct bpf_insn *insn, -- cgit v1.2.3 From 6dd7f14080473b655c247863e61b7c34424f0c83 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Wed, 24 Apr 2019 21:51:26 +0200 Subject: selftests/bpf: test cases for pkt/null checks in subprogs The first test case, for pointer null checks, is equivalent to the following pseudo-code. It checks that the verifier does not complain on line 6 and recognizes that ptr isn't null. 1: ptr = bpf_map_lookup_elem(map, &key); 2: ret = subprog(ptr) { 3: return ptr != NULL; 4: } 5: if (ret) 6: value = *ptr; The second test case, for packet bound checks, is equivalent to the following pseudo-code. It checks that the verifier does not complain on line 7 and recognizes that the packet is at least 1 byte long. 1: pkt_end = ctx.pkt_end; 2: ptr = ctx.pkt + 8; 3: ret = subprog(ptr, pkt_end) { 4: return ptr <= pkt_end; 5: } 6: if (ret) 7: value = *(u8 *)ctx.pkt; Signed-off-by: Paul Chaignon Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/verifier/calls.c | 25 ++++++++++++++++++++++ .../selftests/bpf/verifier/direct_packet_access.c | 22 +++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index fb11240b758b..9093a8f64dc6 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -374,6 +374,31 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "calls: ptr null check in subprog", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "function calls to other bpf functions are allowed for root only", + .fixup_map_hash_48b = { 3 }, + .result_unpriv = REJECT, + .result = ACCEPT, + .retval = 0, +}, { "calls: two calls with args", .insns = { diff --git a/tools/testing/selftests/bpf/verifier/direct_packet_access.c b/tools/testing/selftests/bpf/verifier/direct_packet_access.c index e3fc22e672c2..d5c596fdc4b9 100644 --- a/tools/testing/selftests/bpf/verifier/direct_packet_access.c +++ b/tools/testing/selftests/bpf/verifier/direct_packet_access.c @@ -631,3 +631,25 @@ .errstr = "invalid access to packet", .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, +{ + "direct packet access: test29 (reg > pkt_end in subprog)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_6, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, -- cgit v1.2.3 From 0edd6b64d1939e9e9168ff27947995bb7751db5d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 23 Apr 2019 21:55:59 +0200 Subject: bpf: Fix preempt_enable_no_resched() abuse Unless the very next line is schedule(), or implies it, one must not use preempt_enable_no_resched(). It can cause a preemption to go missing and thereby cause arbitrary delays, breaking the PREEMPT=y invariant. Cc: Roman Gushchin Cc: Alexei Starovoitov Cc: Daniel Borkmann Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f02367faa58d..944ccc310201 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -510,7 +510,7 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, } \ _out: \ rcu_read_unlock(); \ - preempt_enable_no_resched(); \ + preempt_enable(); \ _ret; \ }) -- cgit v1.2.3 From 5bb5c3a3ac102158b799bf5eda871223aa5e9c25 Mon Sep 17 00:00:00 2001 From: Shun-Chih Yu Date: Thu, 25 Apr 2019 11:53:50 +0800 Subject: dmaengine: mediatek-cqdma: fix wrong register usage in mtk_cqdma_start This patch fixes wrong register usage in the mtk_cqdma_start. The destination register should be MTK_CQDMA_DST2 instead. Fixes: b1f01e48df5a ("dmaengine: mediatek: Add MediaTek Command-Queue DMA controller for MT6765 SoC") Signed-off-by: Shun-Chih Yu Cc: stable@vger.kernel.org Signed-off-by: Vinod Koul --- drivers/dma/mediatek/mtk-cqdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/mediatek/mtk-cqdma.c b/drivers/dma/mediatek/mtk-cqdma.c index 131f3974740d..814853842e29 100644 --- a/drivers/dma/mediatek/mtk-cqdma.c +++ b/drivers/dma/mediatek/mtk-cqdma.c @@ -253,7 +253,7 @@ static void mtk_cqdma_start(struct mtk_cqdma_pchan *pc, #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT mtk_dma_set(pc, MTK_CQDMA_DST2, cvd->dest >> MTK_CQDMA_ADDR2_SHFIT); #else - mtk_dma_set(pc, MTK_CQDMA_SRC2, 0); + mtk_dma_set(pc, MTK_CQDMA_DST2, 0); #endif /* setup the length */ -- cgit v1.2.3 From c1c477217882c610a2ba0268f5faf36c9c092528 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 23 Apr 2019 09:43:26 -0700 Subject: l2tp: use rcu_dereference_sk_user_data() in l2tp_udp_encap_recv() Canonical way to fetch sk_user_data from an encap_rcv() handler called from UDP stack in rcu protected section is to use rcu_dereference_sk_user_data(), otherwise compiler might read it multiple times. Fixes: d00fa9adc528 ("il2tp: fix races with tunnel socket close") Signed-off-by: Eric Dumazet Cc: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index fed6becc5daf..aee33d132018 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -909,7 +909,7 @@ int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct l2tp_tunnel *tunnel; - tunnel = l2tp_tunnel(sk); + tunnel = rcu_dereference_sk_user_data(sk); if (tunnel == NULL) goto pass_up; -- cgit v1.2.3 From 56c5bc1849de1311eda5bc506bddad504bfd14fc Mon Sep 17 00:00:00 2001 From: Fabien Dessenne Date: Wed, 24 Apr 2019 11:35:49 +0200 Subject: net: ethernet: stmmac: manage the get_irq probe defer case Manage the -EPROBE_DEFER error case for "stm32_pwr_wakeup" IRQ. Signed-off-by: Fabien Dessenne Acked-by: Alexandre TORGUE Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c index 062a600fa5a7..21428537e231 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c @@ -333,6 +333,9 @@ static int stm32mp1_parse_data(struct stm32_dwmac *dwmac, */ dwmac->irq_pwr_wakeup = platform_get_irq_byname(pdev, "stm32_pwr_wakeup"); + if (dwmac->irq_pwr_wakeup == -EPROBE_DEFER) + return -EPROBE_DEFER; + if (!dwmac->clk_eth_ck && dwmac->irq_pwr_wakeup >= 0) { err = device_init_wakeup(&pdev->dev, true); if (err) { -- cgit v1.2.3 From 88ef66a28391ea7b624bfb7508a5b015c13b28f3 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Wed, 24 Apr 2019 19:12:46 +0200 Subject: qmi_wwan: new Wistron, ZTE and D-Link devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding device entries found in vendor modified versions of this driver. Function maps for some of the devices follow: WNC D16Q1, D16Q5, D18Q1 LTE CAT3 module (1435:0918) MI_00 Qualcomm HS-USB Diagnostics MI_01 Android Debug interface MI_02 Qualcomm HS-USB Modem MI_03 Qualcomm Wireless HS-USB Ethernet Adapter MI_04 Qualcomm Wireless HS-USB Ethernet Adapter MI_05 Qualcomm Wireless HS-USB Ethernet Adapter MI_06 USB Mass Storage Device T: Bus=02 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1435 ProdID=0918 Rev= 2.32 S: Manufacturer=Android S: Product=Android S: SerialNumber=0123456789ABCDEF C:* #Ifs= 7 Cfg#= 1 Atr=80 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=84(I) Atr=03(Int.) MxPS= 64 Ivl=32ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=86(I) Atr=03(Int.) MxPS= 64 Ivl=32ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=88(I) Atr=03(Int.) MxPS= 64 Ivl=32ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=8a(I) Atr=03(Int.) MxPS= 64 Ivl=32ms E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms WNC D18 LTE CAT3 module (1435:d182) MI_00 Qualcomm HS-USB Diagnostics MI_01 Androd Debug interface MI_02 Qualcomm HS-USB Modem MI_03 Qualcomm HS-USB NMEA MI_04 Qualcomm Wireless HS-USB Ethernet Adapter MI_05 Qualcomm Wireless HS-USB Ethernet Adapter MI_06 USB Mass Storage Device ZM8510/ZM8620/ME3960 (19d2:0396) MI_00 ZTE Mobile Broadband Diagnostics Port MI_01 ZTE Mobile Broadband AT Port MI_02 ZTE Mobile Broadband Modem MI_03 ZTE Mobile Broadband NDIS Port (qmi_wwan) MI_04 ZTE Mobile Broadband ADB Port ME3620_X (19d2:1432) MI_00 ZTE Diagnostics Device MI_01 ZTE UI AT Interface MI_02 ZTE Modem Device MI_03 ZTE Mobile Broadband Network Adapter MI_04 ZTE Composite ADB Interface Reported-by: Lars Melin Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 9195f3476b1d..679e404a5224 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1122,9 +1122,16 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x0846, 0x68d3, 8)}, /* Netgear Aircard 779S */ {QMI_FIXED_INTF(0x12d1, 0x140c, 1)}, /* Huawei E173 */ {QMI_FIXED_INTF(0x12d1, 0x14ac, 1)}, /* Huawei E1820 */ + {QMI_FIXED_INTF(0x1435, 0x0918, 3)}, /* Wistron NeWeb D16Q1 */ + {QMI_FIXED_INTF(0x1435, 0x0918, 4)}, /* Wistron NeWeb D16Q1 */ + {QMI_FIXED_INTF(0x1435, 0x0918, 5)}, /* Wistron NeWeb D16Q1 */ + {QMI_FIXED_INTF(0x1435, 0x3185, 4)}, /* Wistron NeWeb M18Q5 */ + {QMI_FIXED_INTF(0x1435, 0xd111, 4)}, /* M9615A DM11-1 D51QC */ {QMI_FIXED_INTF(0x1435, 0xd181, 3)}, /* Wistron NeWeb D18Q1 */ {QMI_FIXED_INTF(0x1435, 0xd181, 4)}, /* Wistron NeWeb D18Q1 */ {QMI_FIXED_INTF(0x1435, 0xd181, 5)}, /* Wistron NeWeb D18Q1 */ + {QMI_FIXED_INTF(0x1435, 0xd182, 4)}, /* Wistron NeWeb D18 */ + {QMI_FIXED_INTF(0x1435, 0xd182, 5)}, /* Wistron NeWeb D18 */ {QMI_FIXED_INTF(0x1435, 0xd191, 4)}, /* Wistron NeWeb D19Q1 */ {QMI_QUIRK_SET_DTR(0x1508, 0x1001, 4)}, /* Fibocom NL668 series */ {QMI_FIXED_INTF(0x16d8, 0x6003, 0)}, /* CMOTech 6003 */ @@ -1180,6 +1187,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x19d2, 0x0265, 4)}, /* ONDA MT8205 4G LTE */ {QMI_FIXED_INTF(0x19d2, 0x0284, 4)}, /* ZTE MF880 */ {QMI_FIXED_INTF(0x19d2, 0x0326, 4)}, /* ZTE MF821D */ + {QMI_FIXED_INTF(0x19d2, 0x0396, 3)}, /* ZTE ZM8620 */ {QMI_FIXED_INTF(0x19d2, 0x0412, 4)}, /* Telewell TW-LTE 4G */ {QMI_FIXED_INTF(0x19d2, 0x1008, 4)}, /* ZTE (Vodafone) K3570-Z */ {QMI_FIXED_INTF(0x19d2, 0x1010, 4)}, /* ZTE (Vodafone) K3571-Z */ @@ -1200,7 +1208,9 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x19d2, 0x1425, 2)}, {QMI_FIXED_INTF(0x19d2, 0x1426, 2)}, /* ZTE MF91 */ {QMI_FIXED_INTF(0x19d2, 0x1428, 2)}, /* Telewell TW-LTE 4G v2 */ + {QMI_FIXED_INTF(0x19d2, 0x1432, 3)}, /* ZTE ME3620 */ {QMI_FIXED_INTF(0x19d2, 0x2002, 4)}, /* ZTE (Vodafone) K3765-Z */ + {QMI_FIXED_INTF(0x2001, 0x7e16, 3)}, /* D-Link DWM-221 */ {QMI_FIXED_INTF(0x2001, 0x7e19, 4)}, /* D-Link DWM-221 B1 */ {QMI_FIXED_INTF(0x2001, 0x7e35, 4)}, /* D-Link DWM-222 */ {QMI_FIXED_INTF(0x2020, 0x2031, 4)}, /* Olicard 600 */ -- cgit v1.2.3 From b987222654f84f7b4ca95b3a55eca784cb30235b Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 4 Apr 2019 23:59:25 +0200 Subject: tracing: Fix buffer_ref pipe ops This fixes multiple issues in buffer_pipe_buf_ops: - The ->steal() handler must not return zero unless the pipe buffer has the only reference to the page. But generic_pipe_buf_steal() assumes that every reference to the pipe is tracked by the page's refcount, which isn't true for these buffers - buffer_pipe_buf_get(), which duplicates a buffer, doesn't touch the page's refcount. Fix it by using generic_pipe_buf_nosteal(), which refuses every attempted theft. It should be easy to actually support ->steal, but the only current users of pipe_buf_steal() are the virtio console and FUSE, and they also only use it as an optimization. So it's probably not worth the effort. - The ->get() and ->release() handlers can be invoked concurrently on pipe buffers backed by the same struct buffer_ref. Make them safe against concurrency by using refcount_t. - The pointers stored in ->private were only zeroed out when the last reference to the buffer_ref was dropped. As far as I know, this shouldn't be necessary anyway, but if we do it, let's always do it. Link: http://lkml.kernel.org/r/20190404215925.253531-1-jannh@google.com Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Al Viro Cc: stable@vger.kernel.org Fixes: 73a757e63114d ("ring-buffer: Return reader page back into existing ring buffer") Signed-off-by: Jann Horn Signed-off-by: Steven Rostedt (VMware) --- fs/splice.c | 4 ++-- include/linux/pipe_fs_i.h | 1 + kernel/trace/trace.c | 28 ++++++++++++++-------------- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/fs/splice.c b/fs/splice.c index 3ee7e82df48f..e75807380caa 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -330,8 +330,8 @@ const struct pipe_buf_operations default_pipe_buf_ops = { .get = generic_pipe_buf_get, }; -static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) +int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) { return 1; } diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 787d224ff43e..a830e9a00eb9 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -174,6 +174,7 @@ void free_pipe_info(struct pipe_inode_info *); void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *); +int generic_pipe_buf_nosteal(struct pipe_inode_info *, struct pipe_buffer *); void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *); void pipe_buf_mark_unmergeable(struct pipe_buffer *buf); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 21153e64bf1c..0cfa13a60086 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7025,19 +7025,23 @@ struct buffer_ref { struct ring_buffer *buffer; void *page; int cpu; - int ref; + refcount_t refcount; }; +static void buffer_ref_release(struct buffer_ref *ref) +{ + if (!refcount_dec_and_test(&ref->refcount)) + return; + ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page); + kfree(ref); +} + static void buffer_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { struct buffer_ref *ref = (struct buffer_ref *)buf->private; - if (--ref->ref) - return; - - ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page); - kfree(ref); + buffer_ref_release(ref); buf->private = 0; } @@ -7046,14 +7050,14 @@ static void buffer_pipe_buf_get(struct pipe_inode_info *pipe, { struct buffer_ref *ref = (struct buffer_ref *)buf->private; - ref->ref++; + refcount_inc(&ref->refcount); } /* Pipe buffer operations for a buffer. */ static const struct pipe_buf_operations buffer_pipe_buf_ops = { .confirm = generic_pipe_buf_confirm, .release = buffer_pipe_buf_release, - .steal = generic_pipe_buf_steal, + .steal = generic_pipe_buf_nosteal, .get = buffer_pipe_buf_get, }; @@ -7066,11 +7070,7 @@ static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i) struct buffer_ref *ref = (struct buffer_ref *)spd->partial[i].private; - if (--ref->ref) - return; - - ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page); - kfree(ref); + buffer_ref_release(ref); spd->partial[i].private = 0; } @@ -7125,7 +7125,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, break; } - ref->ref = 1; + refcount_set(&ref->refcount, 1); ref->buffer = iter->trace_buffer->buffer; ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file); if (IS_ERR(ref->page)) { -- cgit v1.2.3 From 91862cc7867bba4ee5c8fcf0ca2f1d30427b6129 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Fri, 19 Apr 2019 21:22:59 -0500 Subject: tracing: Fix a memory leak by early error exit in trace_pid_write() In trace_pid_write(), the buffer for trace parser is allocated through kmalloc() in trace_parser_get_init(). Later on, after the buffer is used, it is then freed through kfree() in trace_parser_put(). However, it is possible that trace_pid_write() is terminated due to unexpected errors, e.g., ENOMEM. In that case, the allocated buffer will not be freed, which is a memory leak bug. To fix this issue, free the allocated buffer when an error is encountered. Link: http://lkml.kernel.org/r/1555726979-15633-1-git-send-email-wang6495@umn.edu Fixes: f4d34a87e9c10 ("tracing: Use pid bitmap instead of a pid array for set_event_pid") Cc: stable@vger.kernel.org Signed-off-by: Wenwen Wang Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0cfa13a60086..46f68fad6373 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -496,8 +496,10 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, * not modified. */ pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL); - if (!pid_list) + if (!pid_list) { + trace_parser_put(&parser); return -ENOMEM; + } pid_list->pid_max = READ_ONCE(pid_max); @@ -507,6 +509,7 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3); if (!pid_list->pids) { + trace_parser_put(&parser); kfree(pid_list); return -ENOMEM; } -- cgit v1.2.3 From d6097c9e4454adf1f8f2c9547c2fa6060d55d952 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 23 Apr 2019 22:03:18 +0200 Subject: trace: Fix preempt_enable_no_resched() abuse Unless the very next line is schedule(), or implies it, one must not use preempt_enable_no_resched(). It can cause a preemption to go missing and thereby cause arbitrary delays, breaking the PREEMPT=y invariant. Link: http://lkml.kernel.org/r/20190423200318.GY14281@hirez.programming.kicks-ass.net Cc: Waiman Long Cc: Linus Torvalds Cc: Ingo Molnar Cc: Will Deacon Cc: Thomas Gleixner Cc: the arch/x86 maintainers Cc: Davidlohr Bueso Cc: Tim Chen Cc: huang ying Cc: Roman Gushchin Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: stable@vger.kernel.org Fixes: 2c2d7329d8af ("tracing/ftrace: use preempt_enable_no_resched_notrace in ring_buffer_time_stamp()") Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 41b6f96e5366..4ee8d8aa3d0f 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -762,7 +762,7 @@ u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu) preempt_disable_notrace(); time = rb_time_stamp(buffer); - preempt_enable_no_resched_notrace(); + preempt_enable_notrace(); return time; } -- cgit v1.2.3 From 4e43df38a2e6c876d3c8ecc4196ed67a895c425d Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Wed, 24 Apr 2019 22:18:53 +0200 Subject: genetlink: use idr_alloc_cyclic for family->id assignment When allocating the next family->id it makes more sense to use idr_alloc_cyclic to avoid re-using a previously used family->id as much as possible. Signed-off-by: Marcel Holtmann Signed-off-by: David S. Miller --- net/netlink/genetlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index f0ec068e1d02..cb69d35c8e6a 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -362,8 +362,8 @@ int genl_register_family(struct genl_family *family) } else family->attrbuf = NULL; - family->id = idr_alloc(&genl_fam_idr, family, - start, end + 1, GFP_KERNEL); + family->id = idr_alloc_cyclic(&genl_fam_idr, family, + start, end + 1, GFP_KERNEL); if (family->id < 0) { err = family->id; goto errout_free; -- cgit v1.2.3 From fdfdf86720a34527f777cbe0d8599bf0528fa146 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 25 Apr 2019 00:33:00 +0200 Subject: net: phy: marvell: Fix buffer overrun with stats counters marvell_get_sset_count() returns how many statistics counters there are. If the PHY supports fibre, there are 3, otherwise two. marvell_get_strings() does not make this distinction, and always returns 3 strings. This then often results in writing past the end of the buffer for the strings. Fixes: 2170fef78a40 ("Marvell phy: add field to get errors from fiber link.") Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/marvell.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 3ccba37bd6dd..f76c4048b978 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -1489,9 +1489,10 @@ static int marvell_get_sset_count(struct phy_device *phydev) static void marvell_get_strings(struct phy_device *phydev, u8 *data) { + int count = marvell_get_sset_count(phydev); int i; - for (i = 0; i < ARRAY_SIZE(marvell_hw_stats); i++) { + for (i = 0; i < count; i++) { strlcpy(data + i * ETH_GSTRING_LEN, marvell_hw_stats[i].string, ETH_GSTRING_LEN); } @@ -1519,9 +1520,10 @@ static u64 marvell_get_stat(struct phy_device *phydev, int i) static void marvell_get_stats(struct phy_device *phydev, struct ethtool_stats *stats, u64 *data) { + int count = marvell_get_sset_count(phydev); int i; - for (i = 0; i < ARRAY_SIZE(marvell_hw_stats); i++) + for (i = 0; i < count; i++) data[i] = marvell_get_stat(phydev, i); } -- cgit v1.2.3 From 89c02e69fc5245f8a2f34b58b42d43a737af1a5e Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 25 Apr 2019 22:23:37 -0700 Subject: mm/memory_hotplug.c: drop memory device reference after find_memory_block() Right now we are using find_memory_block() to get the node id for the pfn range to online. We are missing to drop a reference to the memory block device. While the device still gets unregistered via device_unregister(), resulting in no user visible problem, the device is never released via device_release(), resulting in a memory leak. Fix that by properly using a put_device(). Link: http://lkml.kernel.org/r/20190411110955.1430-1-david@redhat.com Fixes: d0dc12e86b31 ("mm/memory_hotplug: optimize memory hotplug") Signed-off-by: David Hildenbrand Reviewed-by: Oscar Salvador Reviewed-by: Wei Yang Acked-by: Michal Hocko Acked-by: Pankaj Gupta Cc: David Hildenbrand Cc: Pavel Tatashin Cc: Qian Cai Cc: Arun KS Cc: Mathieu Malaterre Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 0082d699be94..b236069ff0d8 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -874,6 +874,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ */ mem = find_memory_block(__pfn_to_section(pfn)); nid = mem->nid; + put_device(&mem->dev); /* associate pfn range with the zone */ zone = move_pfn_range(online_type, nid, pfn, nr_pages); -- cgit v1.2.3 From e153abc0739ff77bd89c9ba1688cdb963464af97 Mon Sep 17 00:00:00 2001 From: Jérôme Glisse Date: Thu, 25 Apr 2019 22:23:41 -0700 Subject: zram: pass down the bvec we need to read into in the work struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When scheduling work item to read page we need to pass down the proper bvec struct which points to the page to read into. Before this patch it uses a randomly initialized bvec (only if PAGE_SIZE != 4096) which is wrong. Note that without this patch on arch/kernel where PAGE_SIZE != 4096 userspace could read random memory through a zram block device (thought userspace probably would have no control on the address being read). Link: http://lkml.kernel.org/r/20190408183219.26377-1-jglisse@redhat.com Signed-off-by: Jérôme Glisse Reviewed-by: Andrew Morton Reviewed-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Nitin Gupta Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 399cad7daae7..d58a359a6622 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -774,18 +774,18 @@ struct zram_work { struct zram *zram; unsigned long entry; struct bio *bio; + struct bio_vec bvec; }; #if PAGE_SIZE != 4096 static void zram_sync_read(struct work_struct *work) { - struct bio_vec bvec; struct zram_work *zw = container_of(work, struct zram_work, work); struct zram *zram = zw->zram; unsigned long entry = zw->entry; struct bio *bio = zw->bio; - read_from_bdev_async(zram, &bvec, entry, bio); + read_from_bdev_async(zram, &zw->bvec, entry, bio); } /* @@ -798,6 +798,7 @@ static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, { struct zram_work work; + work.bvec = *bvec; work.zram = zram; work.entry = entry; work.bio = bio; -- cgit v1.2.3 From ae3d6a323347940f0548bbb4b17f0bb2e9164169 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 25 Apr 2019 22:23:44 -0700 Subject: lib/Kconfig.debug: fix build error without CONFIG_BLOCK If CONFIG_TEST_KMOD is set to M, while CONFIG_BLOCK is not set, XFS and BTRFS can not be compiled successly. Link: http://lkml.kernel.org/r/20190410075434.35220-1-yuehaibing@huawei.com Fixes: d9c6a72d6fa2 ("kmod: add test driver to stress test the module loader") Signed-off-by: YueHaibing Reported-by: Hulk Robot Reviewed-by: Kees Cook Cc: Masahiro Yamada Cc: Petr Mladek Cc: Andy Shevchenko Cc: Matthew Wilcox Cc: Joe Lawrence Cc: Robin Murphy Cc: Luis Chamberlain Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 00dbcdbc9a0d..d5a4a4036d2f 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1929,6 +1929,7 @@ config TEST_KMOD depends on m depends on BLOCK && (64BIT || LBDAF) # for XFS, BTRFS depends on NETDEVICES && NET_CORE && INET # for TUN + depends on BLOCK select TEST_LKM select XFS_FS select TUN -- cgit v1.2.3 From e789803507b2e154ed452865ee981b038654e98d Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Thu, 25 Apr 2019 22:23:47 -0700 Subject: lib/test_vmalloc.c: do not create cpumask_t variable on stack On my "Intel(R) Xeon(R) W-2135 CPU @ 3.70GHz" system(12 CPUs) i get the warning from the compiler about frame size: warning: the frame size of 1096 bytes is larger than 1024 bytes [-Wframe-larger-than=] the size of cpumask_t depends on number of CPUs, therefore just make use of cpumask_of() in set_cpus_allowed_ptr() as a second argument. Link: http://lkml.kernel.org/r/20190418193925.9361-1-urezki@gmail.com Signed-off-by: Uladzislau Rezki (Sony) Reviewed-by: Andrew Morton Reviewed-by: Roman Gushchin Cc: Uladzislau Rezki Cc: Michal Hocko Cc: Matthew Wilcox Cc: Thomas Garnier Cc: Oleksiy Avramchenko Cc: Steven Rostedt Cc: Joel Fernandes Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_vmalloc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c index 83cdcaa82bf6..f832b095afba 100644 --- a/lib/test_vmalloc.c +++ b/lib/test_vmalloc.c @@ -383,14 +383,14 @@ static void shuffle_array(int *arr, int n) static int test_func(void *private) { struct test_driver *t = private; - cpumask_t newmask = CPU_MASK_NONE; int random_array[ARRAY_SIZE(test_case_array)]; int index, i, j, ret; ktime_t kt; u64 delta; - cpumask_set_cpu(t->cpu, &newmask); - set_cpus_allowed_ptr(current, &newmask); + ret = set_cpus_allowed_ptr(current, cpumask_of(t->cpu)); + if (ret < 0) + pr_err("Failed to set affinity to %d CPU\n", t->cpu); for (i = 0; i < ARRAY_SIZE(test_case_array); i++) random_array[i] = i; -- cgit v1.2.3 From 24512228b7a3f412b5a51f189df302616b021c33 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 25 Apr 2019 22:23:51 -0700 Subject: mm: do not boost watermarks to avoid fragmentation for the DISCONTIG memory model Mikulas Patocka reported that commit 1c30844d2dfe ("mm: reclaim small amounts of memory when an external fragmentation event occurs") "broke" memory management on parisc. The machine is not NUMA but the DISCONTIG model creates three pgdats even though it's a UMA machine for the following ranges 0) Start 0x0000000000000000 End 0x000000003fffffff Size 1024 MB 1) Start 0x0000000100000000 End 0x00000001bfdfffff Size 3070 MB 2) Start 0x0000004040000000 End 0x00000040ffffffff Size 3072 MB Mikulas reported: With the patch 1c30844d2, the kernel will incorrectly reclaim the first zone when it fills up, ignoring the fact that there are two completely free zones. Basiscally, it limits cache size to 1GiB. For example, if I run: # dd if=/dev/sda of=/dev/null bs=1M count=2048 - with the proper kernel, there should be "Buffers - 2GiB" when this command finishes. With the patch 1c30844d2, buffers will consume just 1GiB or slightly more, because the kernel was incorrectly reclaiming them. The page allocator and reclaim makes assumptions that pgdats really represent NUMA nodes and zones represent ranges and makes decisions on that basis. Watermark boosting for small pgdats leads to unexpected results even though this would have behaved reasonably on SPARSEMEM. DISCONTIG is essentially deprecated and even parisc plans to move to SPARSEMEM so there is no need to be fancy, this patch simply disables watermark boosting by default on DISCONTIGMEM. Link: http://lkml.kernel.org/r/20190419094335.GJ18914@techsingularity.net Fixes: 1c30844d2dfe ("mm: reclaim small amounts of memory when an external fragmentation event occurs") Signed-off-by: Mel Gorman Reported-by: Mikulas Patocka Tested-by: Mikulas Patocka Acked-by: Vlastimil Babka Cc: James Bottomley Cc: Matthew Wilcox Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/sysctl/vm.txt | 16 ++++++++-------- mm/page_alloc.c | 13 +++++++++++++ 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 6af24cdb25cc..3f13d8599337 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -866,14 +866,14 @@ The intent is that compaction has less work to do in the future and to increase the success rate of future high-order allocations such as SLUB allocations, THP and hugetlbfs pages. -To make it sensible with respect to the watermark_scale_factor parameter, -the unit is in fractions of 10,000. The default value of 15,000 means -that up to 150% of the high watermark will be reclaimed in the event of -a pageblock being mixed due to fragmentation. The level of reclaim is -determined by the number of fragmentation events that occurred in the -recent past. If this value is smaller than a pageblock then a pageblocks -worth of pages will be reclaimed (e.g. 2MB on 64-bit x86). A boost factor -of 0 will disable the feature. +To make it sensible with respect to the watermark_scale_factor +parameter, the unit is in fractions of 10,000. The default value of +15,000 on !DISCONTIGMEM configurations means that up to 150% of the high +watermark will be reclaimed in the event of a pageblock being mixed due +to fragmentation. The level of reclaim is determined by the number of +fragmentation events that occurred in the recent past. If this value is +smaller than a pageblock then a pageblocks worth of pages will be reclaimed +(e.g. 2MB on 64-bit x86). A boost factor of 0 will disable the feature. ============================================================= diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c6ce20aaf80b..6c6d9f1c404e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -266,7 +266,20 @@ compound_page_dtor * const compound_page_dtors[] = { int min_free_kbytes = 1024; int user_min_free_kbytes = -1; +#ifdef CONFIG_DISCONTIGMEM +/* + * DiscontigMem defines memory ranges as separate pg_data_t even if the ranges + * are not on separate NUMA nodes. Functionally this works but with + * watermark_boost_factor, it can reclaim prematurely as the ranges can be + * quite small. By default, do not boost watermarks on discontigmem as in + * many cases very high-order allocations like THP are likely to be + * unsupported and the premature reclaim offsets the advantage of long-term + * fragmentation avoidance. + */ +int watermark_boost_factor __read_mostly; +#else int watermark_boost_factor __read_mostly = 15000; +#endif int watermark_scale_factor = 10; static unsigned long nr_kernel_pages __initdata; -- cgit v1.2.3 From ee8ab0eeb49bd3982090c8f14dc9cc65bcd13c5c Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 25 Apr 2019 22:23:54 -0700 Subject: mm, page_alloc: always use a captured page regardless of compaction result During the development of commit 5e1f0f098b46 ("mm, compaction: capture a page under direct compaction"), a paranoid check was added to ensure that if a captured page was available after compaction that it was consistent with the final state of compaction. The intent was to catch serious programming bugs such as using a stale page pointer and causing corruption problems. However, it is possible to get a captured page even if compaction was unsuccessful if an interrupt triggered and happened to free pages in interrupt context that got merged into a suitable high-order page. It's highly unlikely but Li Wang did report the following warning on s390 occuring when testing OOM handling. Note that the warning is slightly edited for clarity. WARNING: CPU: 0 PID: 9783 at mm/page_alloc.c:3777 __alloc_pages_direct_compact+0x182/0x190 Modules linked in: rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache sunrpc pkey ghash_s390 prng xts aes_s390 des_s390 des_generic sha512_s390 zcrypt_cex4 zcrypt vmur binfmt_misc ip_tables xfs libcrc32c dasd_fba_mod qeth_l2 dasd_eckd_mod dasd_mod qeth qdio lcs ctcm ccwgroup fsm dm_mirror dm_region_hash dm_log dm_mod CPU: 0 PID: 9783 Comm: copy.sh Kdump: loaded Not tainted 5.1.0-rc 5 #1 This patch simply removes the check entirely instead of trying to be clever about pages freed from interrupt context. If a serious programming error was introduced, it is highly likely to be caught by prep_new_page() instead. Link: http://lkml.kernel.org/r/20190419085133.GH18914@techsingularity.net Fixes: 5e1f0f098b46 ("mm, compaction: capture a page under direct compaction") Signed-off-by: Mel Gorman Reported-by: Li Wang Acked-by: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6c6d9f1c404e..d167c48d913c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3786,11 +3786,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, memalloc_noreclaim_restore(noreclaim_flag); psi_memstall_leave(&pflags); - if (*compact_result <= COMPACT_INACTIVE) { - WARN_ON_ONCE(page); - return NULL; - } - /* * At least in one zone compaction wasn't deferred or skipped, so let's * count a compaction stall -- cgit v1.2.3 From 8139ad043d632c0e9e12d760068a7a8e91659aa1 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 25 Apr 2019 22:23:58 -0700 Subject: mm/page_alloc.c: avoid potential NULL pointer dereference ac.preferred_zoneref->zone passed to alloc_flags_nofragment() can be NULL. 'zone' pointer unconditionally derefernced in alloc_flags_nofragment(). Bail out on NULL zone to avoid potential crash. Currently we don't see any crashes only because alloc_flags_nofragment() has another bug which allows compiler to optimize away all accesses to 'zone'. Link: http://lkml.kernel.org/r/20190423120806.3503-1-aryabinin@virtuozzo.com Fixes: 6bb154504f8b ("mm, page_alloc: spread allocations across zones before introducing fragmentation") Signed-off-by: Andrey Ryabinin Acked-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d167c48d913c..9992ca7f29f1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3432,6 +3432,9 @@ alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask) alloc_flags |= ALLOC_KSWAPD; #ifdef CONFIG_ZONE_DMA32 + if (!zone) + return alloc_flags; + if (zone_idx(zone) != ZONE_NORMAL) goto out; -- cgit v1.2.3 From 8118b82eb756e271929697e8ada5f637dc443af1 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 25 Apr 2019 22:24:01 -0700 Subject: mm/page_alloc.c: fix never set ALLOC_NOFRAGMENT flag Commit 0a79cdad5eb2 ("mm: use alloc_flags to record if kswapd can wake") removed setting of the ALLOC_NOFRAGMENT flag. Bring it back. The runtime effect is that ALLOC_NOFRAGMENT behaviour is restored so that allocations are spread across local zones to avoid fragmentation due to mixing pageblocks as long as possible. Link: http://lkml.kernel.org/r/20190423120806.3503-2-aryabinin@virtuozzo.com Fixes: 0a79cdad5eb2 ("mm: use alloc_flags to record if kswapd can wake") Signed-off-by: Andrey Ryabinin Acked-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9992ca7f29f1..c02cff1ed56e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3436,7 +3436,7 @@ alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask) return alloc_flags; if (zone_idx(zone) != ZONE_NORMAL) - goto out; + return alloc_flags; /* * If ZONE_DMA32 exists, assume it is the one after ZONE_NORMAL and @@ -3445,9 +3445,9 @@ alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask) */ BUILD_BUG_ON(ZONE_NORMAL - ZONE_DMA32 != 1); if (nr_online_nodes > 1 && !populated_zone(--zone)) - goto out; + return alloc_flags; -out: + alloc_flags |= ALLOC_NOFRAGMENT; #endif /* CONFIG_ZONE_DMA32 */ return alloc_flags; } -- cgit v1.2.3 From 89189557b47b35683a27c80ee78aef18248eefb4 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 25 Apr 2019 22:24:05 -0700 Subject: fs/proc/proc_sysctl.c: Fix a NULL pointer dereference Syzkaller report this: sysctl could not get directory: /net//bridge -12 kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN PTI CPU: 1 PID: 7027 Comm: syz-executor.0 Tainted: G C 5.1.0-rc3+ #8 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 RIP: 0010:__write_once_size include/linux/compiler.h:220 [inline] RIP: 0010:__rb_change_child include/linux/rbtree_augmented.h:144 [inline] RIP: 0010:__rb_erase_augmented include/linux/rbtree_augmented.h:186 [inline] RIP: 0010:rb_erase+0x5f4/0x19f0 lib/rbtree.c:459 Code: 00 0f 85 60 13 00 00 48 89 1a 48 83 c4 18 5b 5d 41 5c 41 5d 41 5e 41 5f c3 48 89 f2 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> 3c 02 00 0f 85 75 0c 00 00 4d 85 ed 4c 89 2e 74 ce 4c 89 ea 48 RSP: 0018:ffff8881bb507778 EFLAGS: 00010206 RAX: dffffc0000000000 RBX: ffff8881f224b5b8 RCX: ffffffff818f3f6a RDX: 000000000000000a RSI: 0000000000000050 RDI: ffff8881f224b568 RBP: 0000000000000000 R08: ffffed10376a0ef4 R09: ffffed10376a0ef4 R10: 0000000000000001 R11: ffffed10376a0ef4 R12: ffff8881f224b558 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 FS: 00007f3e7ce13700(0000) GS:ffff8881f7300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fd60fbe9398 CR3: 00000001cb55c001 CR4: 00000000007606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: erase_entry fs/proc/proc_sysctl.c:178 [inline] erase_header+0xe3/0x160 fs/proc/proc_sysctl.c:207 start_unregistering fs/proc/proc_sysctl.c:331 [inline] drop_sysctl_table+0x558/0x880 fs/proc/proc_sysctl.c:1631 get_subdir fs/proc/proc_sysctl.c:1022 [inline] __register_sysctl_table+0xd65/0x1090 fs/proc/proc_sysctl.c:1335 br_netfilter_init+0x68/0x1000 [br_netfilter] do_one_initcall+0xbc/0x47d init/main.c:901 do_init_module+0x1b5/0x547 kernel/module.c:3456 load_module+0x6405/0x8c10 kernel/module.c:3804 __do_sys_finit_module+0x162/0x190 kernel/module.c:3898 do_syscall_64+0x9f/0x450 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Modules linked in: br_netfilter(+) backlight comedi(C) hid_sensor_hub max3100 ti_ads8688 udc_core fddi snd_mona leds_gpio rc_streamzap mtd pata_netcell nf_log_common rc_winfast udp_tunnel snd_usbmidi_lib snd_usb_toneport snd_usb_line6 snd_rawmidi snd_seq_device snd_hwdep videobuf2_v4l2 videobuf2_common videodev media videobuf2_vmalloc videobuf2_memops rc_gadmei_rm008z 8250_of smm665 hid_tmff hid_saitek hwmon_vid rc_ati_tv_wonder_hd_600 rc_core pata_pdc202xx_old dn_rtmsg as3722 ad714x_i2c ad714x snd_soc_cs4265 hid_kensington panel_ilitek_ili9322 drm drm_panel_orientation_quirks ipack cdc_phonet usbcore phonet hid_jabra hid extcon_arizona can_dev industrialio_triggered_buffer kfifo_buf industrialio adm1031 i2c_mux_ltc4306 i2c_mux ipmi_msghandler mlxsw_core snd_soc_cs35l34 snd_soc_core snd_pcm_dmaengine snd_pcm snd_timer ac97_bus snd_compress snd soundcore gpio_da9055 uio ecdh_generic mdio_thunder of_mdio fixed_phy libphy mdio_cavium iptable_security iptable_raw iptable_mangle iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 iptable_filter bpfilter ip6_vti ip_vti ip_gre ipip sit tunnel4 ip_tunnel hsr veth netdevsim vxcan batman_adv cfg80211 rfkill chnl_net caif nlmon dummy team bonding vcan bridge stp llc ip6_gre gre ip6_tunnel tunnel6 tun joydev mousedev ppdev tpm kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel aesni_intel ide_pci_generic piix aes_x86_64 crypto_simd cryptd ide_core glue_helper input_leds psmouse intel_agp intel_gtt serio_raw ata_generic i2c_piix4 agpgart pata_acpi parport_pc parport floppy rtc_cmos sch_fq_codel ip_tables x_tables sha1_ssse3 sha1_generic ipv6 [last unloaded: br_netfilter] Dumping ftrace buffer: (ftrace buffer empty) ---[ end trace 68741688d5fbfe85 ]--- commit 23da9588037e ("fs/proc/proc_sysctl.c: fix NULL pointer dereference in put_links") forgot to handle start_unregistering() case, while header->parent is NULL, it calls erase_header() and as seen in the above syzkaller call trace, accessing &header->parent->root will trigger a NULL pointer dereference. As that commit explained, there is also no need to call start_unregistering() if header->parent is NULL. Link: http://lkml.kernel.org/r/20190409153622.28112-1-yuehaibing@huawei.com Fixes: 23da9588037e ("fs/proc/proc_sysctl.c: fix NULL pointer dereference in put_links") Fixes: 0e47c99d7fe25 ("sysctl: Replace root_list with links between sysctl_table_sets") Signed-off-by: YueHaibing Reported-by: Hulk Robot Reviewed-by: Kees Cook Cc: Luis Chamberlain Cc: Alexey Dobriyan Cc: Al Viro Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/proc_sysctl.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index d65390727541..7325baa8f9d4 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1626,9 +1626,11 @@ static void drop_sysctl_table(struct ctl_table_header *header) if (--header->nreg) return; - if (parent) + if (parent) { put_links(header); - start_unregistering(header); + start_unregistering(header); + } + if (!--header->count) kfree_rcu(header, rcu); -- cgit v1.2.3 From 3a349763cf11e63534b8f2d302f2d0c790566497 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 26 Apr 2019 17:22:01 -0700 Subject: Input: synaptics-rmi4 - write config register values to the right offset Currently any changed config register values don't take effect, as the function to write them back is called with the wrong register offset. Fixes: ff8f83708b3e (Input: synaptics-rmi4 - add support for 2D sensors and F11) Signed-off-by: Lucas Stach Reviewed-by: Philipp Zabel Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/rmi4/rmi_f11.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/rmi4/rmi_f11.c b/drivers/input/rmi4/rmi_f11.c index df64d6aed4f7..93901ebd122a 100644 --- a/drivers/input/rmi4/rmi_f11.c +++ b/drivers/input/rmi4/rmi_f11.c @@ -1230,7 +1230,7 @@ static int rmi_f11_initialize(struct rmi_function *fn) } rc = f11_write_control_regs(fn, &f11->sens_query, - &f11->dev_controls, fn->fd.query_base_addr); + &f11->dev_controls, fn->fd.control_base_addr); if (rc) dev_warn(&fn->dev, "Failed to write control registers\n"); -- cgit v1.2.3 From baf76f0c58aec435a3a864075b8f6d8ee5d1f17e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 25 Apr 2019 16:13:58 -0700 Subject: slip: make slhc_free() silently accept an error pointer This way, slhc_free() accepts what slhc_init() returns, whether that is an error or not. In particular, the pattern in sl_alloc_bufs() is slcomp = slhc_init(16, 16); ... slhc_free(slcomp); for the error handling path, and rather than complicate that code, just make it ok to always free what was returned by the init function. That's what the code used to do before commit 4ab42d78e37a ("ppp, slip: Validate VJ compression slot parameters completely") when slhc_init() just returned NULL for the error case, with no actual indication of the details of the error. Reported-by: syzbot+45474c076a4927533d2e@syzkaller.appspotmail.com Fixes: 4ab42d78e37a ("ppp, slip: Validate VJ compression slot parameters completely") Acked-by: Ben Hutchings Cc: David Miller Signed-off-by: Linus Torvalds --- drivers/net/slip/slhc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/slip/slhc.c b/drivers/net/slip/slhc.c index f4e93f5fc204..ea90db3c7705 100644 --- a/drivers/net/slip/slhc.c +++ b/drivers/net/slip/slhc.c @@ -153,7 +153,7 @@ out_fail: void slhc_free(struct slcompress *comp) { - if ( comp == NULLSLCOMPR ) + if ( IS_ERR_OR_NULL(comp) ) return; if ( comp->tstate != NULLSLSTATE ) -- cgit v1.2.3 From b4e30e8e7ea1d1e35ffd64ca46f7d9a7f227b4bf Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 25 Apr 2019 22:31:50 -0400 Subject: bnxt_en: Improve multicast address setup logic. The driver builds a list of multicast addresses and sends it to the firmware when the driver's ndo_set_rx_mode() is called. In rare cases, the firmware can fail this call if internal resources to add multicast addresses are exhausted. In that case, we should try the call again by setting the ALL_MCAST flag which is more guaranteed to succeed. Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 4c586ba4364b..42fd273d457e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -8961,8 +8961,15 @@ static int bnxt_cfg_rx_mode(struct bnxt *bp) skip_uc: rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, 0); + if (rc && vnic->mc_list_count) { + netdev_info(bp->dev, "Failed setting MC filters rc: %d, turning on ALL_MCAST mode\n", + rc); + vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST; + vnic->mc_list_count = 0; + rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, 0); + } if (rc) - netdev_err(bp->dev, "HWRM cfa l2 rx mask failure rc: %x\n", + netdev_err(bp->dev, "HWRM cfa l2 rx mask failure rc: %d\n", rc); return rc; -- cgit v1.2.3 From f9099d611449836a51a65f40ea7dc9cb5f2f665e Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Thu, 25 Apr 2019 22:31:51 -0400 Subject: bnxt_en: Free short FW command HWRM memory in error path in bnxt_init_one() In the bnxt_init_one() error path, short FW command request memory is not freed. This patch fixes it. Fixes: e605db801bde ("bnxt_en: Support for Short Firmware Message") Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 42fd273d457e..5d02f5999df7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -10692,6 +10692,7 @@ init_err_cleanup_tc: bnxt_clear_int_mode(bp); init_err_pci_clean: + bnxt_free_hwrm_short_cmd_req(bp); bnxt_free_hwrm_resources(bp); bnxt_free_ctx_mem(bp); kfree(bp->ctx); -- cgit v1.2.3 From 1f83391bd6fc48f92f627b0ec0bce686d100c6a5 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 25 Apr 2019 22:31:52 -0400 Subject: bnxt_en: Fix possible crash in bnxt_hwrm_ring_free() under error conditions. If we encounter errors during open and proceed to clean up, bnxt_hwrm_ring_free() may crash if the rings we try to free have never been allocated. bnxt_cp_ring_for_rx() or bnxt_cp_ring_for_tx() may reference pointers that have not been allocated. Fix it by checking for valid fw_ring_id first before calling bnxt_cp_ring_for_rx() or bnxt_cp_ring_for_tx(). Fixes: 2c61d2117ecb ("bnxt_en: Add helper functions to get firmware CP ring ID.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 5d02f5999df7..b03669f67a0c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5135,10 +5135,10 @@ static void bnxt_hwrm_ring_free(struct bnxt *bp, bool close_path) for (i = 0; i < bp->tx_nr_rings; i++) { struct bnxt_tx_ring_info *txr = &bp->tx_ring[i]; struct bnxt_ring_struct *ring = &txr->tx_ring_struct; - u32 cmpl_ring_id; - cmpl_ring_id = bnxt_cp_ring_for_tx(bp, txr); if (ring->fw_ring_id != INVALID_HW_RING_ID) { + u32 cmpl_ring_id = bnxt_cp_ring_for_tx(bp, txr); + hwrm_ring_free_send_msg(bp, ring, RING_FREE_REQ_RING_TYPE_TX, close_path ? cmpl_ring_id : @@ -5151,10 +5151,10 @@ static void bnxt_hwrm_ring_free(struct bnxt *bp, bool close_path) struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i]; struct bnxt_ring_struct *ring = &rxr->rx_ring_struct; u32 grp_idx = rxr->bnapi->index; - u32 cmpl_ring_id; - cmpl_ring_id = bnxt_cp_ring_for_rx(bp, rxr); if (ring->fw_ring_id != INVALID_HW_RING_ID) { + u32 cmpl_ring_id = bnxt_cp_ring_for_rx(bp, rxr); + hwrm_ring_free_send_msg(bp, ring, RING_FREE_REQ_RING_TYPE_RX, close_path ? cmpl_ring_id : @@ -5173,10 +5173,10 @@ static void bnxt_hwrm_ring_free(struct bnxt *bp, bool close_path) struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i]; struct bnxt_ring_struct *ring = &rxr->rx_agg_ring_struct; u32 grp_idx = rxr->bnapi->index; - u32 cmpl_ring_id; - cmpl_ring_id = bnxt_cp_ring_for_rx(bp, rxr); if (ring->fw_ring_id != INVALID_HW_RING_ID) { + u32 cmpl_ring_id = bnxt_cp_ring_for_rx(bp, rxr); + hwrm_ring_free_send_msg(bp, ring, type, close_path ? cmpl_ring_id : INVALID_HW_RING_ID); -- cgit v1.2.3 From ad361adf0d08f1135f3845c6b3a36be7cc0bfda5 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 25 Apr 2019 22:31:53 -0400 Subject: bnxt_en: Pass correct extended TX port statistics size to firmware. If driver determines that extended TX port statistics are not supported or allocation of the data structure fails, make sure to pass 0 TX stats size to firmware to disable it. The firmware returned TX stats size should also be set to 0 for consistency. This will prevent bnxt_get_ethtool_stats() from accessing the NULL TX stats pointer in case there is mismatch between firmware and driver. Fixes: 36e53349b60b ("bnxt_en: Add additional extended port statistics.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b03669f67a0c..a9172b2feb1d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6753,6 +6753,7 @@ static int bnxt_hwrm_port_qstats_ext(struct bnxt *bp) struct hwrm_queue_pri2cos_qcfg_input req2 = {0}; struct hwrm_port_qstats_ext_input req = {0}; struct bnxt_pf_info *pf = &bp->pf; + u32 tx_stat_size; int rc; if (!(bp->flags & BNXT_FLAG_PORT_STATS_EXT)) @@ -6762,13 +6763,16 @@ static int bnxt_hwrm_port_qstats_ext(struct bnxt *bp) req.port_id = cpu_to_le16(pf->port_id); req.rx_stat_size = cpu_to_le16(sizeof(struct rx_port_stats_ext)); req.rx_stat_host_addr = cpu_to_le64(bp->hw_rx_port_stats_ext_map); - req.tx_stat_size = cpu_to_le16(sizeof(struct tx_port_stats_ext)); + tx_stat_size = bp->hw_tx_port_stats_ext ? + sizeof(*bp->hw_tx_port_stats_ext) : 0; + req.tx_stat_size = cpu_to_le16(tx_stat_size); req.tx_stat_host_addr = cpu_to_le64(bp->hw_tx_port_stats_ext_map); mutex_lock(&bp->hwrm_cmd_lock); rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); if (!rc) { bp->fw_rx_stats_ext_size = le16_to_cpu(resp->rx_stat_size) / 8; - bp->fw_tx_stats_ext_size = le16_to_cpu(resp->tx_stat_size) / 8; + bp->fw_tx_stats_ext_size = tx_stat_size ? + le16_to_cpu(resp->tx_stat_size) / 8 : 0; } else { bp->fw_rx_stats_ext_size = 0; bp->fw_tx_stats_ext_size = 0; -- cgit v1.2.3 From 3f93cd3f098e284c851acb89265ebe35b994a5c8 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 25 Apr 2019 22:31:54 -0400 Subject: bnxt_en: Fix statistics context reservation logic. In an earlier commit that fixes the number of stats contexts to reserve for the RDMA driver, we added a function parameter to pass in the number of stats contexts to all the relevant functions. The passed in parameter should have been used to set the enables field of the firmware message. Fixes: 780baad44f0f ("bnxt_en: Reserve 1 stat_ctx for RDMA driver.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index a9172b2feb1d..b6cb7b8ceab9 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5315,17 +5315,16 @@ __bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, struct hwrm_func_cfg_input *req, req->num_tx_rings = cpu_to_le16(tx_rings); if (BNXT_NEW_RM(bp)) { enables |= rx_rings ? FUNC_CFG_REQ_ENABLES_NUM_RX_RINGS : 0; + enables |= stats ? FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0; if (bp->flags & BNXT_FLAG_CHIP_P5) { enables |= cp_rings ? FUNC_CFG_REQ_ENABLES_NUM_MSIX : 0; enables |= tx_rings + ring_grps ? - FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS | - FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0; + FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0; enables |= rx_rings ? FUNC_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS : 0; } else { enables |= cp_rings ? - FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS | - FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0; + FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0; enables |= ring_grps ? FUNC_CFG_REQ_ENABLES_NUM_HW_RING_GRPS | FUNC_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS : 0; @@ -5365,14 +5364,13 @@ __bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, enables |= tx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_TX_RINGS : 0; enables |= rx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_RX_RINGS | FUNC_VF_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS : 0; + enables |= stats ? FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0; if (bp->flags & BNXT_FLAG_CHIP_P5) { enables |= tx_rings + ring_grps ? - FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS | - FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0; + FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0; } else { enables |= cp_rings ? - FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS | - FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0; + FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0; enables |= ring_grps ? FUNC_VF_CFG_REQ_ENABLES_NUM_HW_RING_GRPS : 0; } -- cgit v1.2.3 From 0b397b17a4120cb80f7bf89eb30587b3dd9b0d1d Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 25 Apr 2019 22:31:55 -0400 Subject: bnxt_en: Fix uninitialized variable usage in bnxt_rx_pkt(). In bnxt_rx_pkt(), if the driver encounters BD errors, it will recycle the buffers and jump to the end where the uninitailized variable "len" is referenced. Fix it by adding a new jump label that will skip the length update. This is the most correct fix since the length may not be valid when we get this type of error. Fixes: 6a8788f25625 ("bnxt_en: add support for software dynamic interrupt moderation") Reported-by: Nathan Chancellor Cc: Nathan Chancellor Signed-off-by: Michael Chan Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b6cb7b8ceab9..52ade133b57c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1625,7 +1625,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, netdev_warn(bp->dev, "RX buffer error %x\n", rx_err); bnxt_sched_reset(bp, rxr); } - goto next_rx; + goto next_rx_no_len; } len = le32_to_cpu(rxcmp->rx_cmp_len_flags_type) >> RX_CMP_LEN_SHIFT; @@ -1706,12 +1706,13 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, rc = 1; next_rx: - rxr->rx_prod = NEXT_RX(prod); - rxr->rx_next_cons = NEXT_RX(cons); - cpr->rx_packets += 1; cpr->rx_bytes += len; +next_rx_no_len: + rxr->rx_prod = NEXT_RX(prod); + rxr->rx_next_cons = NEXT_RX(cons); + next_rx_no_prod_no_len: *raw_cons = tmp_raw_cons; -- cgit v1.2.3 From 97e1caa517e22d62a283b876fb8aa5f4672c83dd Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 25 Apr 2019 17:35:09 -0700 Subject: net/tls: don't copy negative amounts of data in reencrypt There is no guarantee the record starts before the skb frags. If we don't check for this condition copy amount will get negative, leading to reads and writes to random memory locations. Familiar hilarity ensues. Fixes: 4799ac81e52a ("tls: Add rx inline crypto offload") Signed-off-by: Jakub Kicinski Reviewed-by: John Hurley Signed-off-by: David S. Miller --- net/tls/tls_device.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index cc0256939eb6..96357060addc 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -628,14 +628,16 @@ static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb) else err = 0; - copy = min_t(int, skb_pagelen(skb) - offset, - rxm->full_len - TLS_CIPHER_AES_GCM_128_TAG_SIZE); + if (skb_pagelen(skb) > offset) { + copy = min_t(int, skb_pagelen(skb) - offset, + rxm->full_len - TLS_CIPHER_AES_GCM_128_TAG_SIZE); - if (skb->decrypted) - skb_store_bits(skb, offset, buf, copy); + if (skb->decrypted) + skb_store_bits(skb, offset, buf, copy); - offset += copy; - buf += copy; + offset += copy; + buf += copy; + } skb_walk_frags(skb, skb_iter) { copy = min_t(int, skb_iter->len, -- cgit v1.2.3 From eb3d38d5adb520435d4e4af32529ccb13ccc9935 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 25 Apr 2019 17:35:10 -0700 Subject: net/tls: fix copy to fragments in reencrypt Fragments may contain data from other records so we have to account for that when we calculate the destination and max length of copy we can perform. Note that 'offset' is the offset within the message, so it can't be passed as offset within the frag.. Here skb_store_bits() would have realised the call is wrong and simply not copy data. Fixes: 4799ac81e52a ("tls: Add rx inline crypto offload") Signed-off-by: Jakub Kicinski Reviewed-by: John Hurley Signed-off-by: David S. Miller --- net/tls/tls_device.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 96357060addc..14dedb24fa7b 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -597,7 +597,7 @@ void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn) static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb) { struct strp_msg *rxm = strp_msg(skb); - int err = 0, offset = rxm->offset, copy, nsg; + int err = 0, offset = rxm->offset, copy, nsg, data_len, pos; struct sk_buff *skb_iter, *unused; struct scatterlist sg[1]; char *orig_buf, *buf; @@ -628,9 +628,10 @@ static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb) else err = 0; + data_len = rxm->full_len - TLS_CIPHER_AES_GCM_128_TAG_SIZE; + if (skb_pagelen(skb) > offset) { - copy = min_t(int, skb_pagelen(skb) - offset, - rxm->full_len - TLS_CIPHER_AES_GCM_128_TAG_SIZE); + copy = min_t(int, skb_pagelen(skb) - offset, data_len); if (skb->decrypted) skb_store_bits(skb, offset, buf, copy); @@ -639,16 +640,30 @@ static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb) buf += copy; } + pos = skb_pagelen(skb); skb_walk_frags(skb, skb_iter) { - copy = min_t(int, skb_iter->len, - rxm->full_len - offset + rxm->offset - - TLS_CIPHER_AES_GCM_128_TAG_SIZE); + int frag_pos; + + /* Practically all frags must belong to msg if reencrypt + * is needed with current strparser and coalescing logic, + * but strparser may "get optimized", so let's be safe. + */ + if (pos + skb_iter->len <= offset) + goto done_with_frag; + if (pos >= data_len + rxm->offset) + break; + + frag_pos = offset - pos; + copy = min_t(int, skb_iter->len - frag_pos, + data_len + rxm->offset - offset); if (skb_iter->decrypted) - skb_store_bits(skb_iter, offset, buf, copy); + skb_store_bits(skb_iter, frag_pos, buf, copy); offset += copy; buf += copy; +done_with_frag: + pos += skb_iter->len; } free_buf: -- cgit v1.2.3 From 21f1b8a6636c4dbde4aa1ec0343f42eaf653ffcc Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 26 Apr 2019 12:50:44 +0200 Subject: udp: fix GRO reception in case of length mismatch Currently, the UDP GRO code path does bad things on some edge conditions - Aggregation can happen even on packet with different lengths. Fix the above by rewriting the 'complete' condition for GRO packets. While at it, note explicitly that we allow merging the first packet per burst below gso_size. Reported-by: Sean Tong Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/ipv4/udp_offload.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 64f9715173ac..d8776b2110c1 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -377,13 +377,14 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, /* Terminate the flow on len mismatch or if it grow "too much". * Under small packet flood GRO count could elsewhere grow a lot - * leading to execessive truesize values + * leading to execessive truesize values. + * On len mismatch merge the first packet shorter than gso_size, + * otherwise complete the GRO packet. */ - if (!skb_gro_receive(p, skb) && + if (uh->len > uh2->len || skb_gro_receive(p, skb) || + uh->len != uh2->len || NAPI_GRO_CB(p)->count >= UDP_GRO_CNT_MAX) pp = p; - else if (uh->len != uh2->len) - pp = p; return pp; } -- cgit v1.2.3 From de1887c064b9996ac03120d90d0a909a3f678f98 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Tue, 16 Apr 2019 12:57:21 +0300 Subject: iwlwifi: mvm: check for length correctness in iwl_mvm_create_skb() We don't check for the validity of the lengths in the packet received from the firmware. If the MPDU length received in the rx descriptor is too short to contain the header length and the crypt length together, we may end up trying to copy a negative number of bytes (headlen - hdrlen < 0) which will underflow and cause us to try to copy a huge amount of data. This causes oopses such as this one: BUG: unable to handle kernel paging request at ffff896be2970000 PGD 5e201067 P4D 5e201067 PUD 5e205067 PMD 16110d063 PTE 8000000162970161 Oops: 0003 [#1] PREEMPT SMP NOPTI CPU: 2 PID: 1824 Comm: irq/134-iwlwifi Not tainted 4.19.33-04308-geea41cf4930f #1 Hardware name: [...] RIP: 0010:memcpy_erms+0x6/0x10 Code: 90 90 90 90 eb 1e 0f 1f 00 48 89 f8 48 89 d1 48 c1 e9 03 83 e2 07 f3 48 a5 89 d1 f3 a4 c3 66 0f 1f 44 00 00 48 89 f8 48 89 d1 a4 c3 0f 1f 80 00 00 00 00 48 89 f8 48 83 fa 20 72 7e 40 38 fe RSP: 0018:ffffa4630196fc60 EFLAGS: 00010287 RAX: ffff896be2924618 RBX: ffff896bc8ecc600 RCX: 00000000fffb4610 RDX: 00000000fffffff8 RSI: ffff896a835e2a38 RDI: ffff896be2970000 RBP: ffffa4630196fd30 R08: ffff896bc8ecc600 R09: ffff896a83597000 R10: ffff896bd6998400 R11: 000000000200407f R12: ffff896a83597050 R13: 00000000fffffff8 R14: 0000000000000010 R15: ffff896a83597038 FS: 0000000000000000(0000) GS:ffff896be8280000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff896be2970000 CR3: 000000005dc12002 CR4: 00000000003606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: iwl_mvm_rx_mpdu_mq+0xb51/0x121b [iwlmvm] iwl_pcie_rx_handle+0x58c/0xa89 [iwlwifi] iwl_pcie_irq_rx_msix_handler+0xd9/0x12a [iwlwifi] irq_thread_fn+0x24/0x49 irq_thread+0xb0/0x122 kthread+0x138/0x140 ret_from_fork+0x1f/0x40 Fix that by checking the lengths for correctness and trigger a warning to show that we have received wrong data. Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 28 +++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 1e03acf30762..b516fd1867ec 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -169,9 +169,9 @@ static inline int iwl_mvm_check_pn(struct iwl_mvm *mvm, struct sk_buff *skb, } /* iwl_mvm_create_skb Adds the rxb to a new skb */ -static void iwl_mvm_create_skb(struct sk_buff *skb, struct ieee80211_hdr *hdr, - u16 len, u8 crypt_len, - struct iwl_rx_cmd_buffer *rxb) +static int iwl_mvm_create_skb(struct iwl_mvm *mvm, struct sk_buff *skb, + struct ieee80211_hdr *hdr, u16 len, u8 crypt_len, + struct iwl_rx_cmd_buffer *rxb) { struct iwl_rx_packet *pkt = rxb_addr(rxb); struct iwl_rx_mpdu_desc *desc = (void *)pkt->data; @@ -204,6 +204,20 @@ static void iwl_mvm_create_skb(struct sk_buff *skb, struct ieee80211_hdr *hdr, * present before copying packet data. */ hdrlen += crypt_len; + + if (WARN_ONCE(headlen < hdrlen, + "invalid packet lengths (hdrlen=%d, len=%d, crypt_len=%d)\n", + hdrlen, len, crypt_len)) { + /* + * We warn and trace because we want to be able to see + * it in trace-cmd as well. + */ + IWL_DEBUG_RX(mvm, + "invalid packet lengths (hdrlen=%d, len=%d, crypt_len=%d)\n", + hdrlen, len, crypt_len); + return -EINVAL; + } + skb_put_data(skb, hdr, hdrlen); skb_put_data(skb, (u8 *)hdr + hdrlen + pad_len, headlen - hdrlen); @@ -216,6 +230,8 @@ static void iwl_mvm_create_skb(struct sk_buff *skb, struct ieee80211_hdr *hdr, skb_add_rx_frag(skb, 0, rxb_steal_page(rxb), offset, fraglen, rxb->truesize); } + + return 0; } static void iwl_mvm_add_rtap_sniffer_config(struct iwl_mvm *mvm, @@ -1671,7 +1687,11 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi, rx_status->boottime_ns = ktime_get_boot_ns(); } - iwl_mvm_create_skb(skb, hdr, len, crypt_len, rxb); + if (iwl_mvm_create_skb(mvm, skb, hdr, len, crypt_len, rxb)) { + kfree_skb(skb); + goto out; + } + if (!iwl_mvm_reorder(mvm, napi, queue, sta, skb, desc)) iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, queue, sta, csi); -- cgit v1.2.3 From 5c9adef9789148d382d7d1307c3d6bfaf51d143d Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 21 Apr 2019 17:58:11 +0300 Subject: iwlwifi: fix driver operation for 5350 We introduced a bug that prevented this old device from working. The driver would simply not be able to complete the INIT flow while spewing this warning: CSR addresses aren't configured WARNING: CPU: 0 PID: 819 at drivers/net/wireless/intel/iwlwifi/pcie/drv.c:917 iwl_pci_probe+0x160/0x1e0 [iwlwifi] Cc: stable@vger.kernel.org # v4.18+ Fixes: a8cbb46f831d ("iwlwifi: allow different csr flags for different device families") Signed-off-by: Emmanuel Grumbach Fixes: c8f1b51e506d ("iwlwifi: allow different csr flags for different device families") Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/cfg/5000.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/5000.c b/drivers/net/wireless/intel/iwlwifi/cfg/5000.c index 575a7022d045..3846064d51a5 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/5000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/5000.c @@ -1,7 +1,7 @@ /****************************************************************************** * * Copyright(c) 2007 - 2014 Intel Corporation. All rights reserved. - * Copyright(c) 2018 Intel Corporation + * Copyright(c) 2018 - 2019 Intel Corporation * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -136,6 +136,7 @@ const struct iwl_cfg iwl5350_agn_cfg = { .ht_params = &iwl5000_ht_params, .led_mode = IWL_LED_BLINK, .internal_wimax_coex = true, + .csr = &iwl_csr_v1, }; #define IWL_DEVICE_5150 \ -- cgit v1.2.3 From d156e67d3f58c5d3c7ebe1bec80657db534f32d4 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Thu, 25 Apr 2019 10:03:34 +0300 Subject: iwlwifi: mvm: fix merge damage in iwl_mvm_vif_dbgfs_register() When I rebased Greg's patch, I accidentally left the old if block that was already there. Remove it. Fixes: 154d4899e411 ("iwlwifi: mvm: properly check debugfs dentry before using it") Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c index 738eddb2e7ac..6925527d8457 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c @@ -780,12 +780,6 @@ void iwl_mvm_vif_dbgfs_register(struct iwl_mvm *mvm, struct ieee80211_vif *vif) return; } - if (!mvmvif->dbgfs_dir) { - IWL_ERR(mvm, "Failed to create debugfs directory under %pd\n", - dbgfs_dir); - return; - } - if (iwlmvm_mod_params.power_scheme != IWL_POWER_SCHEME_CAM && ((vif->type == NL80211_IFTYPE_STATION && !vif->p2p) || (vif->type == NL80211_IFTYPE_STATION && vif->p2p))) -- cgit v1.2.3 From b1da6a51871c6929dced1a7fad81990988b36ed6 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 24 Apr 2019 18:39:57 +0200 Subject: fsnotify: Fix NULL ptr deref in fanotify_get_fsid() fanotify_get_fsid() is reading mark->connector->fsid under srcu. It can happen that it sees mark not fully initialized or mark that is already detached from the object list. In these cases mark->connector can be NULL leading to NULL ptr dereference. Fix the problem by being careful when reading mark->connector and check it for being NULL. Also use WRITE_ONCE when writing the mark just to prevent compiler from doing something stupid. Reported-by: syzbot+15927486a4f1bfcbaf91@syzkaller.appspotmail.com Fixes: 77115225acc6 ("fanotify: cache fsid in fsnotify_mark_connector") Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 14 ++++++++++++-- fs/notify/mark.c | 12 ++++++------ 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 6b9c27548997..63c6bb1f8c4d 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -346,10 +346,16 @@ static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info) __kernel_fsid_t fsid = {}; fsnotify_foreach_obj_type(type) { + struct fsnotify_mark_connector *conn; + if (!fsnotify_iter_should_report_type(iter_info, type)) continue; - fsid = iter_info->marks[type]->connector->fsid; + conn = READ_ONCE(iter_info->marks[type]->connector); + /* Mark is just getting destroyed or created? */ + if (!conn) + continue; + fsid = conn->fsid; if (WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1])) continue; return fsid; @@ -408,8 +414,12 @@ static int fanotify_handle_event(struct fsnotify_group *group, return 0; } - if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) + if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { fsid = fanotify_get_fsid(iter_info); + /* Racing with mark destruction or creation? */ + if (!fsid.val[0] && !fsid.val[1]) + return 0; + } event = fanotify_alloc_event(group, inode, mask, data, data_type, &fsid); diff --git a/fs/notify/mark.c b/fs/notify/mark.c index d593d4269561..22acb0a79b53 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -239,13 +239,13 @@ static void fsnotify_drop_object(unsigned int type, void *objp) void fsnotify_put_mark(struct fsnotify_mark *mark) { - struct fsnotify_mark_connector *conn; + struct fsnotify_mark_connector *conn = READ_ONCE(mark->connector); void *objp = NULL; unsigned int type = FSNOTIFY_OBJ_TYPE_DETACHED; bool free_conn = false; /* Catch marks that were actually never attached to object */ - if (!mark->connector) { + if (!conn) { if (refcount_dec_and_test(&mark->refcnt)) fsnotify_final_mark_destroy(mark); return; @@ -255,10 +255,9 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) * We have to be careful so that traversals of obj_list under lock can * safely grab mark reference. */ - if (!refcount_dec_and_lock(&mark->refcnt, &mark->connector->lock)) + if (!refcount_dec_and_lock(&mark->refcnt, &conn->lock)) return; - conn = mark->connector; hlist_del_init_rcu(&mark->obj_list); if (hlist_empty(&conn->list)) { objp = fsnotify_detach_connector_from_object(conn, &type); @@ -266,7 +265,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) } else { __fsnotify_recalc_mask(conn); } - mark->connector = NULL; + WRITE_ONCE(mark->connector, NULL); spin_unlock(&conn->lock); fsnotify_drop_object(type, objp); @@ -620,7 +619,7 @@ restart: /* mark should be the last entry. last is the current last entry */ hlist_add_behind_rcu(&mark->obj_list, &last->obj_list); added: - mark->connector = conn; + WRITE_ONCE(mark->connector, conn); out_err: spin_unlock(&conn->lock); spin_unlock(&mark->lock); @@ -808,6 +807,7 @@ void fsnotify_init_mark(struct fsnotify_mark *mark, refcount_set(&mark->refcnt, 1); fsnotify_get_group(group); mark->group = group; + WRITE_ONCE(mark->connector, NULL); } /* -- cgit v1.2.3 From 37624b58542fb9f2d9a70e6ea006ef8a5f66c30b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 28 Apr 2019 17:04:13 -0700 Subject: Linux 5.1-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index abe13538a8c0..2b99679148dc 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Shy Crocodile # *DOCUMENTATION* -- cgit v1.2.3 From 38faed150438be8d6e419137209d25439e6f4c33 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Tue, 26 Mar 2019 13:57:28 -0700 Subject: ath10k: perform crash dump collection in workqueue Commit 25733c4e67df ("ath10k: pci: use mutex for diagnostic window CE polling") introduced a regression where we try to sleep (grab a mutex) in an atomic context: [ 233.602619] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:254 [ 233.602626] in_atomic(): 1, irqs_disabled(): 0, pid: 0, name: swapper/0 [ 233.602636] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G W 5.1.0-rc2 #4 [ 233.602642] Hardware name: Google Scarlet (DT) [ 233.602647] Call trace: [ 233.602663] dump_backtrace+0x0/0x11c [ 233.602672] show_stack+0x20/0x28 [ 233.602681] dump_stack+0x98/0xbc [ 233.602690] ___might_sleep+0x154/0x16c [ 233.602696] __might_sleep+0x78/0x88 [ 233.602704] mutex_lock+0x2c/0x5c [ 233.602717] ath10k_pci_diag_read_mem+0x68/0x21c [ath10k_pci] [ 233.602725] ath10k_pci_diag_read32+0x48/0x74 [ath10k_pci] [ 233.602733] ath10k_pci_dump_registers+0x5c/0x16c [ath10k_pci] [ 233.602741] ath10k_pci_fw_crashed_dump+0xb8/0x548 [ath10k_pci] [ 233.602749] ath10k_pci_napi_poll+0x60/0x128 [ath10k_pci] [ 233.602757] net_rx_action+0x140/0x388 [ 233.602766] __do_softirq+0x1b0/0x35c [...] ath10k_pci_fw_crashed_dump() is called from NAPI contexts, and firmware memory dumps are retrieved using the diag memory interface. A simple reproduction case is to run this on QCA6174A / WLAN.RM.4.4.1-00132-QCARMSWP-1, which happens to be a way to b0rk the firmware: dd if=/sys/kernel/debug/ieee80211/phy0/ath10k/mem_value bs=4K count=1 of=/dev/null (NB: simulated firmware crashes, via debugfs, don't trigger firmware dumps.) The fix is to move the crash-dump into a workqueue context, and avoid relying on 'data_lock' for most mutual exclusion. We only keep using it here for protecting 'fw_crash_counter', while the rest of the coredump buffers are protected by a new 'dump_mutex'. I've tested the above with simulated firmware crashes (debugfs 'reset' file), real firmware crashes (the 'dd' command above), and a variety of reboot and suspend/resume configurations on QCA6174A. Reported here: http://lkml.kernel.org/linux-wireless/20190325202706.GA68720@google.com Fixes: 25733c4e67df ("ath10k: pci: use mutex for diagnostic window CE polling") Signed-off-by: Brian Norris Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/ce.c | 2 +- drivers/net/wireless/ath/ath10k/core.c | 1 + drivers/net/wireless/ath/ath10k/core.h | 3 +++ drivers/net/wireless/ath/ath10k/coredump.c | 6 +++--- drivers/net/wireless/ath/ath10k/pci.c | 24 +++++++++++++++++++----- drivers/net/wireless/ath/ath10k/pci.h | 2 ++ 6 files changed, 29 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/ce.c b/drivers/net/wireless/ath/ath10k/ce.c index 24b983edb357..eca87f7c5b6c 100644 --- a/drivers/net/wireless/ath/ath10k/ce.c +++ b/drivers/net/wireless/ath/ath10k/ce.c @@ -1855,7 +1855,7 @@ void ath10k_ce_dump_registers(struct ath10k *ar, struct ath10k_ce_crash_data ce_data; u32 addr, id; - lockdep_assert_held(&ar->data_lock); + lockdep_assert_held(&ar->dump_mutex); ath10k_err(ar, "Copy Engine register dump:\n"); diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index 835b8de92d55..aff585658fc0 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -3119,6 +3119,7 @@ struct ath10k *ath10k_core_create(size_t priv_size, struct device *dev, goto err_free_wq; mutex_init(&ar->conf_mutex); + mutex_init(&ar->dump_mutex); spin_lock_init(&ar->data_lock); INIT_LIST_HEAD(&ar->peers); diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h index e08a17b01e03..e35aae5146f1 100644 --- a/drivers/net/wireless/ath/ath10k/core.h +++ b/drivers/net/wireless/ath/ath10k/core.h @@ -1063,6 +1063,9 @@ struct ath10k { /* prevents concurrent FW reconfiguration */ struct mutex conf_mutex; + /* protects coredump data */ + struct mutex dump_mutex; + /* protects shared structure data */ spinlock_t data_lock; diff --git a/drivers/net/wireless/ath/ath10k/coredump.c b/drivers/net/wireless/ath/ath10k/coredump.c index 33838d9c1cb6..45a355fb62b9 100644 --- a/drivers/net/wireless/ath/ath10k/coredump.c +++ b/drivers/net/wireless/ath/ath10k/coredump.c @@ -1102,7 +1102,7 @@ struct ath10k_fw_crash_data *ath10k_coredump_new(struct ath10k *ar) { struct ath10k_fw_crash_data *crash_data = ar->coredump.fw_crash_data; - lockdep_assert_held(&ar->data_lock); + lockdep_assert_held(&ar->dump_mutex); if (ath10k_coredump_mask == 0) /* coredump disabled */ @@ -1146,7 +1146,7 @@ static struct ath10k_dump_file_data *ath10k_coredump_build(struct ath10k *ar) if (!buf) return NULL; - spin_lock_bh(&ar->data_lock); + mutex_lock(&ar->dump_mutex); dump_data = (struct ath10k_dump_file_data *)(buf); strlcpy(dump_data->df_magic, "ATH10K-FW-DUMP", @@ -1213,7 +1213,7 @@ static struct ath10k_dump_file_data *ath10k_coredump_build(struct ath10k *ar) sofar += sizeof(*dump_tlv) + crash_data->ramdump_buf_len; } - spin_unlock_bh(&ar->data_lock); + mutex_unlock(&ar->dump_mutex); return dump_data; } diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index 271f92c24d44..2c27f407a851 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -1441,7 +1441,7 @@ static void ath10k_pci_dump_registers(struct ath10k *ar, __le32 reg_dump_values[REG_DUMP_COUNT_QCA988X] = {}; int i, ret; - lockdep_assert_held(&ar->data_lock); + lockdep_assert_held(&ar->dump_mutex); ret = ath10k_pci_diag_read_hi(ar, ®_dump_values[0], hi_failure_state, @@ -1656,7 +1656,7 @@ static void ath10k_pci_dump_memory(struct ath10k *ar, int ret, i; u8 *buf; - lockdep_assert_held(&ar->data_lock); + lockdep_assert_held(&ar->dump_mutex); if (!crash_data) return; @@ -1734,14 +1734,19 @@ static void ath10k_pci_dump_memory(struct ath10k *ar, } } -static void ath10k_pci_fw_crashed_dump(struct ath10k *ar) +static void ath10k_pci_fw_dump_work(struct work_struct *work) { + struct ath10k_pci *ar_pci = container_of(work, struct ath10k_pci, + dump_work); struct ath10k_fw_crash_data *crash_data; + struct ath10k *ar = ar_pci->ar; char guid[UUID_STRING_LEN + 1]; - spin_lock_bh(&ar->data_lock); + mutex_lock(&ar->dump_mutex); + spin_lock_bh(&ar->data_lock); ar->stats.fw_crash_counter++; + spin_unlock_bh(&ar->data_lock); crash_data = ath10k_coredump_new(ar); @@ -1756,11 +1761,18 @@ static void ath10k_pci_fw_crashed_dump(struct ath10k *ar) ath10k_ce_dump_registers(ar, crash_data); ath10k_pci_dump_memory(ar, crash_data); - spin_unlock_bh(&ar->data_lock); + mutex_unlock(&ar->dump_mutex); queue_work(ar->workqueue, &ar->restart_work); } +static void ath10k_pci_fw_crashed_dump(struct ath10k *ar) +{ + struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); + + queue_work(ar->workqueue, &ar_pci->dump_work); +} + void ath10k_pci_hif_send_complete_check(struct ath10k *ar, u8 pipe, int force) { @@ -3442,6 +3454,8 @@ int ath10k_pci_setup_resource(struct ath10k *ar) spin_lock_init(&ar_pci->ps_lock); mutex_init(&ar_pci->ce_diag_mutex); + INIT_WORK(&ar_pci->dump_work, ath10k_pci_fw_dump_work); + timer_setup(&ar_pci->rx_post_retry, ath10k_pci_rx_replenish_retry, 0); if (QCA_REV_6174(ar) || QCA_REV_9377(ar)) diff --git a/drivers/net/wireless/ath/ath10k/pci.h b/drivers/net/wireless/ath/ath10k/pci.h index 3773c79f322f..4455ed6c5275 100644 --- a/drivers/net/wireless/ath/ath10k/pci.h +++ b/drivers/net/wireless/ath/ath10k/pci.h @@ -121,6 +121,8 @@ struct ath10k_pci { /* For protecting ce_diag */ struct mutex ce_diag_mutex; + struct work_struct dump_work; + struct ath10k_ce ce; struct timer_list rx_post_retry; -- cgit v1.2.3 From 9e80ad37f6788ed52b89a3cfcd593e0aa69b216d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 3 Mar 2019 18:24:33 +0100 Subject: ath10k: Drop WARN_ON()s that always trigger during system resume ath10k_mac_vif_chan() always returns an error for the given vif during system-wide resume which reliably triggers two WARN_ON()s in ath10k_bss_info_changed() and they are not particularly useful in that code path, so drop them. Tested: QCA6174 hw3.2 PCI with WLAN.RM.2.0-00180-QCARMSWPZ-1 Tested: QCA6174 hw3.2 SDIO with WLAN.RMH.4.4.1-00007-QCARMSWP-1 Fixes: cd93b83ad927 ("ath10k: support for multicast rate control") Fixes: f279294e9ee2 ("ath10k: add support for configuring management packet rate") Cc: stable@vger.kernel.org Reviewed-by: Brian Norris Tested-by: Brian Norris Tested-by: Claire Chang Signed-off-by: Rafael J. Wysocki Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/mac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 41e89db244d2..9c703d287333 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -5774,7 +5774,7 @@ static void ath10k_bss_info_changed(struct ieee80211_hw *hw, } if (changed & BSS_CHANGED_MCAST_RATE && - !WARN_ON(ath10k_mac_vif_chan(arvif->vif, &def))) { + !ath10k_mac_vif_chan(arvif->vif, &def)) { band = def.chan->band; rateidx = vif->bss_conf.mcast_rate[band] - 1; @@ -5812,7 +5812,7 @@ static void ath10k_bss_info_changed(struct ieee80211_hw *hw, } if (changed & BSS_CHANGED_BASIC_RATES) { - if (WARN_ON(ath10k_mac_vif_chan(vif, &def))) { + if (ath10k_mac_vif_chan(vif, &def)) { mutex_unlock(&ar->conf_mutex); return; } -- cgit v1.2.3 From dfbd199a7cfe3e3cd8531e1353cdbd7175bfbc5e Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Sun, 24 Feb 2019 21:55:28 -0300 Subject: selinux: use kernel linux/socket.h for genheaders and mdp When compiling genheaders and mdp from a newer host kernel, the following error happens: In file included from scripts/selinux/genheaders/genheaders.c:18: ./security/selinux/include/classmap.h:238:2: error: #error New address family defined, please update secclass_map. #error New address family defined, please update secclass_map. ^~~~~ make[3]: *** [scripts/Makefile.host:107: scripts/selinux/genheaders/genheaders] Error 1 make[2]: *** [scripts/Makefile.build:599: scripts/selinux/genheaders] Error 2 make[1]: *** [scripts/Makefile.build:599: scripts/selinux] Error 2 make[1]: *** Waiting for unfinished jobs.... Instead of relying on the host definition, include linux/socket.h in classmap.h to have PF_MAX. Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara Acked-by: Stephen Smalley [PM: manually merge in mdp.c, subject line tweaks] Signed-off-by: Paul Moore --- scripts/selinux/genheaders/genheaders.c | 1 - scripts/selinux/mdp/mdp.c | 1 - security/selinux/include/classmap.h | 1 + 3 files changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/selinux/genheaders/genheaders.c b/scripts/selinux/genheaders/genheaders.c index 1ceedea847dd..544ca126a8a8 100644 --- a/scripts/selinux/genheaders/genheaders.c +++ b/scripts/selinux/genheaders/genheaders.c @@ -9,7 +9,6 @@ #include #include #include -#include struct security_class_mapping { const char *name; diff --git a/scripts/selinux/mdp/mdp.c b/scripts/selinux/mdp/mdp.c index 073fe7537f6c..6d51b74bc679 100644 --- a/scripts/selinux/mdp/mdp.c +++ b/scripts/selinux/mdp/mdp.c @@ -32,7 +32,6 @@ #include #include #include -#include static void usage(char *name) { diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index bd5fe0d3204a..201f7e588a29 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include +#include #define COMMON_FILE_SOCK_PERMS "ioctl", "read", "write", "create", \ "getattr", "setattr", "lock", "relabelfrom", "relabelto", "append", "map" -- cgit v1.2.3 From 6a5c5d26c4c6c3cc486fef0bf04ff9551132611b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 29 Apr 2019 09:48:53 -0700 Subject: rdma: fix build errors on s390 and MIPS due to bad ZERO_PAGE use The parameter to ZERO_PAGE() was wrong, but since all architectures except for MIPS and s390 ignore it, it wasn't noticed until 0-day reported the build error. Fixes: 67f269b37f9b ("RDMA/ucontext: Fix regression with disassociate") Cc: stable@vger.kernel.org Cc: Andrea Arcangeli Cc: Leon Romanovsky Cc: Jason Gunthorpe Signed-off-by: Linus Torvalds --- drivers/infiniband/core/uverbs_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 7843e89235c3..c489f545baae 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -895,7 +895,7 @@ static vm_fault_t rdma_umap_fault(struct vm_fault *vmf) /* Read only pages can just use the system zero page. */ if (!(vmf->vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) { - vmf->page = ZERO_PAGE(vmf->vm_start); + vmf->page = ZERO_PAGE(vmf->address); get_page(vmf->page); return 0; } -- cgit v1.2.3 From 80871482fd5cb1cb396ea232237a7d9c540854f9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 29 Apr 2019 09:51:29 -0700 Subject: x86: make ZERO_PAGE() at least parse its argument This doesn't really do anything, but at least we now parse teh ZERO_PAGE() address argument so that we'll catch the most obvious errors in usage next time they'll happen. See commit 6a5c5d26c4c6 ("rdma: fix build errors on s390 and MIPS due to bad ZERO_PAGE use") what happens when we don't have any use of the macro argument at all. Signed-off-by: Linus Torvalds --- arch/x86/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 2779ace16d23..50b3e2d963c9 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -46,7 +46,7 @@ void ptdump_walk_user_pgd_level_checkwx(void); */ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __visible; -#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) +#define ZERO_PAGE(vaddr) ((void)(vaddr),virt_to_page(empty_zero_page)) extern spinlock_t pgd_lock; extern struct list_head pgd_list; -- cgit v1.2.3 From 95c169251bf734aa555a1e8043e4d88ec97a04ec Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 25 Apr 2019 12:06:54 -0400 Subject: ipv6: invert flowlabel sharing check in process and user mode A request for a flowlabel fails in process or user exclusive mode must fail if the caller pid or uid does not match. Invert the test. Previously, the test was unsafe wrt PID recycling, but indeed tested for inequality: fl1->owner != fl->owner Fixes: 4f82f45730c68 ("net ip6 flowlabel: Make owner a union of struct pid* and kuid_t") Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv6/ip6_flowlabel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index cb54a8a3c273..a05036bc808d 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -633,9 +633,9 @@ recheck: if (fl1->share == IPV6_FL_S_EXCL || fl1->share != fl->share || ((fl1->share == IPV6_FL_S_PROCESS) && - (fl1->owner.pid == fl->owner.pid)) || + (fl1->owner.pid != fl->owner.pid)) || ((fl1->share == IPV6_FL_S_USER) && - uid_eq(fl1->owner.uid, fl->owner.uid))) + !uid_eq(fl1->owner.uid, fl->owner.uid))) goto release; err = -ENOMEM; -- cgit v1.2.3 From ca2fe2956acef2f87f6c55549874fdd2e92d9824 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 26 Apr 2019 10:10:05 -0700 Subject: tcp: add sanity tests in tcp_add_backlog() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Richard and Bruno both reported that my commit added a bug, and Bruno was able to determine the problem came when a segment wih a FIN packet was coalesced to a prior one in tcp backlog queue. It turns out the header prediction in tcp_rcv_established() looks back to TCP headers in the packet, not in the metadata (aka TCP_SKB_CB(skb)->tcp_flags) The fast path in tcp_rcv_established() is not supposed to handle a FIN flag (it does not call tcp_fin()) Therefore we need to make sure to propagate the FIN flag, so that the coalesced packet does not go through the fast path, the same than a GRO packet carrying a FIN flag. While we are at it, make sure we do not coalesce packets with RST or SYN, or if they do not have ACK set. Many thanks to Richard and Bruno for pinpointing the bad commit, and to Richard for providing a first version of the fix. Fixes: 4f693b55c3d2 ("tcp: implement coalescing on backlog queue") Signed-off-by: Eric Dumazet Reported-by: Richard Purdie Reported-by: Bruno Prémont Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2f8039a26b08..a2896944aa37 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1673,7 +1673,9 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) if (TCP_SKB_CB(tail)->end_seq != TCP_SKB_CB(skb)->seq || TCP_SKB_CB(tail)->ip_dsfield != TCP_SKB_CB(skb)->ip_dsfield || ((TCP_SKB_CB(tail)->tcp_flags | - TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_URG) || + TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_SYN | TCPHDR_RST | TCPHDR_URG)) || + !((TCP_SKB_CB(tail)->tcp_flags & + TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_ACK) || ((TCP_SKB_CB(tail)->tcp_flags ^ TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_ECE | TCPHDR_CWR)) || #ifdef CONFIG_TLS_DEVICE @@ -1692,6 +1694,15 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) if (after(TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(tail)->ack_seq)) TCP_SKB_CB(tail)->ack_seq = TCP_SKB_CB(skb)->ack_seq; + /* We have to update both TCP_SKB_CB(tail)->tcp_flags and + * thtail->fin, so that the fast path in tcp_rcv_established() + * is not entered if we append a packet with a FIN. + * SYN, RST, URG are not present. + * ACK is set on both packets. + * PSH : we do not really care in TCP stack, + * at least for 'GRO' packets. + */ + thtail->fin |= th->fin; TCP_SKB_CB(tail)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags; if (TCP_SKB_CB(skb)->has_rxtstamp) { -- cgit v1.2.3 From 1d3fd8a10bedb09006cfc963bfcf051c3021f626 Mon Sep 17 00:00:00 2001 From: Stephen Suryaputra Date: Sat, 27 Apr 2019 09:14:33 -0400 Subject: vrf: Use orig netdev to count Ip6InNoRoutes and a fresh route lookup when sending dest unreach When there is no route to an IPv6 dest addr, skb_dst(skb) points to loopback dev in the case of that the IP6CB(skb)->iif is enslaved to a vrf. This causes Ip6InNoRoutes to be incremented on the loopback dev. This also causes the lookup to fail on icmpv6_send() and the dest unreachable to not sent and Ip6OutNoRoutes gets incremented on the loopback dev. To reproduce: * Gateway configuration: ip link add dev vrf_258 type vrf table 258 ip link set dev enp0s9 master vrf_258 ip addr add 66:1/64 dev enp0s9 ip -6 route add unreachable default metric 8192 table 258 sysctl -w net.ipv6.conf.all.forwarding=1 sysctl -w net.ipv6.conf.enp0s9.forwarding=1 * Sender configuration: ip addr add 66::2/64 dev enp0s9 ip -6 route add default via 66::1 and ping 67::1 for example from the sender. Fix this by counting on the original netdev and reset the skb dst to force a fresh lookup. v2: Fix typo of destination address in the repro steps. v3: Simplify the loopback check (per David Ahern) and use reverse Christmas tree format (per David Miller). Signed-off-by: Stephen Suryaputra Reviewed-by: David Ahern Tested-by: David Ahern Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7178e32eb15d..b4899f0de0d0 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3668,23 +3668,34 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) { - int type; struct dst_entry *dst = skb_dst(skb); + struct net *net = dev_net(dst->dev); + struct inet6_dev *idev; + int type; + + if (netif_is_l3_master(skb->dev) && + dst->dev == net->loopback_dev) + idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif)); + else + idev = ip6_dst_idev(dst); + switch (ipstats_mib_noroutes) { case IPSTATS_MIB_INNOROUTES: type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); if (type == IPV6_ADDR_ANY) { - IP6_INC_STATS(dev_net(dst->dev), - __in6_dev_get_safely(skb->dev), - IPSTATS_MIB_INADDRERRORS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); break; } /* FALLTHROUGH */ case IPSTATS_MIB_OUTNOROUTES: - IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), - ipstats_mib_noroutes); + IP6_INC_STATS(net, idev, ipstats_mib_noroutes); break; } + + /* Start over by dropping the dst for l3mdev case */ + if (netif_is_l3_master(skb->dev)) + skb_dst_drop(skb); + icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0); kfree_skb(skb); return 0; -- cgit v1.2.3 From 6c0afef5fb0c27758f4d52b2210c61b6bd8b4470 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 27 Apr 2019 16:49:06 -0700 Subject: ipv6/flowlabel: wait rcu grace period before put_pid() syzbot was able to catch a use-after-free read in pid_nr_ns() [1] ip6fl_seq_show() seems to use RCU protection, dereferencing fl->owner.pid but fl_free() releases fl->owner.pid before rcu grace period is started. [1] BUG: KASAN: use-after-free in pid_nr_ns+0x128/0x140 kernel/pid.c:407 Read of size 4 at addr ffff888094012a04 by task syz-executor.0/18087 CPU: 0 PID: 18087 Comm: syz-executor.0 Not tainted 5.1.0-rc6+ #89 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 print_address_description.cold+0x7c/0x20d mm/kasan/report.c:187 kasan_report.cold+0x1b/0x40 mm/kasan/report.c:317 __asan_report_load4_noabort+0x14/0x20 mm/kasan/generic_report.c:131 pid_nr_ns+0x128/0x140 kernel/pid.c:407 ip6fl_seq_show+0x2f8/0x4f0 net/ipv6/ip6_flowlabel.c:794 seq_read+0xad3/0x1130 fs/seq_file.c:268 proc_reg_read+0x1fe/0x2c0 fs/proc/inode.c:227 do_loop_readv_writev fs/read_write.c:701 [inline] do_loop_readv_writev fs/read_write.c:688 [inline] do_iter_read+0x4a9/0x660 fs/read_write.c:922 vfs_readv+0xf0/0x160 fs/read_write.c:984 kernel_readv fs/splice.c:358 [inline] default_file_splice_read+0x475/0x890 fs/splice.c:413 do_splice_to+0x12a/0x190 fs/splice.c:876 splice_direct_to_actor+0x2d2/0x970 fs/splice.c:953 do_splice_direct+0x1da/0x2a0 fs/splice.c:1062 do_sendfile+0x597/0xd00 fs/read_write.c:1443 __do_sys_sendfile64 fs/read_write.c:1498 [inline] __se_sys_sendfile64 fs/read_write.c:1490 [inline] __x64_sys_sendfile64+0x15a/0x220 fs/read_write.c:1490 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x458da9 Code: ad b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f300d24bc78 EFLAGS: 00000246 ORIG_RAX: 0000000000000028 RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 0000000000458da9 RDX: 00000000200000c0 RSI: 0000000000000008 RDI: 0000000000000007 RBP: 000000000073bf00 R08: 0000000000000000 R09: 0000000000000000 R10: 000000000000005a R11: 0000000000000246 R12: 00007f300d24c6d4 R13: 00000000004c5fa3 R14: 00000000004da748 R15: 00000000ffffffff Allocated by task 17543: save_stack+0x45/0xd0 mm/kasan/common.c:75 set_track mm/kasan/common.c:87 [inline] __kasan_kmalloc mm/kasan/common.c:497 [inline] __kasan_kmalloc.constprop.0+0xcf/0xe0 mm/kasan/common.c:470 kasan_slab_alloc+0xf/0x20 mm/kasan/common.c:505 slab_post_alloc_hook mm/slab.h:437 [inline] slab_alloc mm/slab.c:3393 [inline] kmem_cache_alloc+0x11a/0x6f0 mm/slab.c:3555 alloc_pid+0x55/0x8f0 kernel/pid.c:168 copy_process.part.0+0x3b08/0x7980 kernel/fork.c:1932 copy_process kernel/fork.c:1709 [inline] _do_fork+0x257/0xfd0 kernel/fork.c:2226 __do_sys_clone kernel/fork.c:2333 [inline] __se_sys_clone kernel/fork.c:2327 [inline] __x64_sys_clone+0xbf/0x150 kernel/fork.c:2327 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 7789: save_stack+0x45/0xd0 mm/kasan/common.c:75 set_track mm/kasan/common.c:87 [inline] __kasan_slab_free+0x102/0x150 mm/kasan/common.c:459 kasan_slab_free+0xe/0x10 mm/kasan/common.c:467 __cache_free mm/slab.c:3499 [inline] kmem_cache_free+0x86/0x260 mm/slab.c:3765 put_pid.part.0+0x111/0x150 kernel/pid.c:111 put_pid+0x20/0x30 kernel/pid.c:105 fl_free+0xbe/0xe0 net/ipv6/ip6_flowlabel.c:102 ip6_fl_gc+0x295/0x3e0 net/ipv6/ip6_flowlabel.c:152 call_timer_fn+0x190/0x720 kernel/time/timer.c:1325 expire_timers kernel/time/timer.c:1362 [inline] __run_timers kernel/time/timer.c:1681 [inline] __run_timers kernel/time/timer.c:1649 [inline] run_timer_softirq+0x652/0x1700 kernel/time/timer.c:1694 __do_softirq+0x266/0x95a kernel/softirq.c:293 The buggy address belongs to the object at ffff888094012a00 which belongs to the cache pid_2 of size 88 The buggy address is located 4 bytes inside of 88-byte region [ffff888094012a00, ffff888094012a58) The buggy address belongs to the page: page:ffffea0002500480 count:1 mapcount:0 mapping:ffff88809a483080 index:0xffff888094012980 flags: 0x1fffc0000000200(slab) raw: 01fffc0000000200 ffffea00018a3508 ffffea0002524a88 ffff88809a483080 raw: ffff888094012980 ffff888094012000 000000010000001b 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888094012900: fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc fc ffff888094012980: fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc fc >ffff888094012a00: fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc fc ^ ffff888094012a80: fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc fc ffff888094012b00: fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc fc Fixes: 4f82f45730c6 ("net ip6 flowlabel: Make owner a union of struct pid * and kuid_t") Signed-off-by: Eric Dumazet Cc: Eric W. Biederman Reported-by: syzbot Signed-off-by: David S. Miller --- net/ipv6/ip6_flowlabel.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index a05036bc808d..be5f3d7ceb96 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -94,15 +94,21 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label) return fl; } +static void fl_free_rcu(struct rcu_head *head) +{ + struct ip6_flowlabel *fl = container_of(head, struct ip6_flowlabel, rcu); + + if (fl->share == IPV6_FL_S_PROCESS) + put_pid(fl->owner.pid); + kfree(fl->opt); + kfree(fl); +} + static void fl_free(struct ip6_flowlabel *fl) { - if (fl) { - if (fl->share == IPV6_FL_S_PROCESS) - put_pid(fl->owner.pid); - kfree(fl->opt); - kfree_rcu(fl, rcu); - } + if (fl) + call_rcu(&fl->rcu, fl_free_rcu); } static void fl_release(struct ip6_flowlabel *fl) -- cgit v1.2.3 From b13023421b5179413421333f602850914f6a7ad8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Apr 2019 08:34:08 +0100 Subject: rxrpc: Fix net namespace cleanup In rxrpc_destroy_all_calls(), there are two phases: (1) make sure the ->calls list is empty, emitting error messages if not, and (2) wait for the RCU cleanup to happen on outstanding calls (ie. ->nr_calls becomes 0). To avoid taking the call_lock, the function prechecks ->calls and if empty, it returns to avoid taking the lock - this is wrong, however: it still needs to go and do the second phase and wait for ->nr_calls to become 0. Without this, the rxrpc_net struct may get deallocated before we get to the RCU cleanup for the last calls. This can lead to: Slab corruption (Not tainted): kmalloc-16k start=ffff88802b178000, len=16384 050: 6b 6b 6b 6b 6b 6b 6b 6b 61 6b 6b 6b 6b 6b 6b 6b kkkkkkkkakkkkkkk Note the "61" at offset 0x58. This corresponds to the ->nr_calls member of struct rxrpc_net (which is >9k in size, and thus allocated out of the 16k slab). Fix this by flipping the condition on the if-statement, putting the locked section inside the if-body and dropping the return from there. The function will then always go on to wait for the RCU cleanup on outstanding calls. Fixes: 2baec2c3f854 ("rxrpc: Support network namespacing") Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/call_object.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 8aa2937b069f..fe96881a334d 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -604,30 +604,30 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet) _enter(""); - if (list_empty(&rxnet->calls)) - return; + if (!list_empty(&rxnet->calls)) { + write_lock(&rxnet->call_lock); - write_lock(&rxnet->call_lock); + while (!list_empty(&rxnet->calls)) { + call = list_entry(rxnet->calls.next, + struct rxrpc_call, link); + _debug("Zapping call %p", call); - while (!list_empty(&rxnet->calls)) { - call = list_entry(rxnet->calls.next, struct rxrpc_call, link); - _debug("Zapping call %p", call); + rxrpc_see_call(call); + list_del_init(&call->link); - rxrpc_see_call(call); - list_del_init(&call->link); + pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n", + call, atomic_read(&call->usage), + rxrpc_call_states[call->state], + call->flags, call->events); - pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n", - call, atomic_read(&call->usage), - rxrpc_call_states[call->state], - call->flags, call->events); + write_unlock(&rxnet->call_lock); + cond_resched(); + write_lock(&rxnet->call_lock); + } write_unlock(&rxnet->call_lock); - cond_resched(); - write_lock(&rxnet->call_lock); } - write_unlock(&rxnet->call_lock); - atomic_dec(&rxnet->nr_calls); wait_var_event(&rxnet->nr_calls, !atomic_read(&rxnet->nr_calls)); } -- cgit v1.2.3 From f949a12fd697479f68d99dc65e9bbab68ee49043 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 30 Apr 2019 13:44:19 +0300 Subject: net: dsa: bcm_sf2: fix buffer overflow doing set_rxnfc The "fs->location" is a u32 that comes from the user in ethtool_set_rxnfc(). We can't pass unclamped values to test_bit() or it results in an out of bounds access beyond the end of the bitmap. Fixes: 7318166cacad ("net: dsa: bcm_sf2: Add support for ethtool::rxnfc") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2_cfp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c index e6234d209787..4212bc4a5f31 100644 --- a/drivers/net/dsa/bcm_sf2_cfp.c +++ b/drivers/net/dsa/bcm_sf2_cfp.c @@ -886,6 +886,9 @@ static int bcm_sf2_cfp_rule_set(struct dsa_switch *ds, int port, fs->m_ext.data[1])) return -EINVAL; + if (fs->location != RX_CLS_LOC_ANY && fs->location >= CFP_NUM_RULES) + return -EINVAL; + if (fs->location != RX_CLS_LOC_ANY && test_bit(fs->location, priv->cfp.used)) return -EBUSY; @@ -974,6 +977,9 @@ static int bcm_sf2_cfp_rule_del(struct bcm_sf2_priv *priv, int port, u32 loc) struct cfp_rule *rule; int ret; + if (loc >= CFP_NUM_RULES) + return -EINVAL; + /* Refuse deleting unused rules, and those that are not unique since * that could leave IPv6 rules with one of the chained rule in the * table. -- cgit v1.2.3 From c93ad1337ad06a718890a89cdd85188ff9a5a5cc Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 30 Apr 2019 19:34:08 +0800 Subject: appletalk: Set error code if register_snap_client failed If register_snap_client fails in atalk_init, error code should be set, otherwise it will triggers NULL pointer dereference while unloading module. Fixes: 9804501fa122 ("appletalk: Fix potential NULL pointer dereference in unregister_snap_client") Signed-off-by: YueHaibing Signed-off-by: David S. Miller --- net/appletalk/ddp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 709d2542f729..dbe8b1993be9 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1920,6 +1920,7 @@ static int __init atalk_init(void) ddp_dl = register_snap_client(ddp_snap_id, atalk_rcv); if (!ddp_dl) { pr_crit("Unable to register DDP with SNAP.\n"); + rc = -ENOMEM; goto out_sock; } -- cgit v1.2.3 From a622b40035d16196bf19b2b33b854862595245fc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 30 Apr 2019 06:27:58 -0700 Subject: l2ip: fix possible use-after-free Before taking a refcount on a rcu protected structure, we need to make sure the refcount is not zero. syzbot reported : refcount_t: increment on 0; use-after-free. WARNING: CPU: 1 PID: 23533 at lib/refcount.c:156 refcount_inc_checked lib/refcount.c:156 [inline] WARNING: CPU: 1 PID: 23533 at lib/refcount.c:156 refcount_inc_checked+0x61/0x70 lib/refcount.c:154 Kernel panic - not syncing: panic_on_warn set ... CPU: 1 PID: 23533 Comm: syz-executor.2 Not tainted 5.1.0-rc7+ #93 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 panic+0x2cb/0x65c kernel/panic.c:214 __warn.cold+0x20/0x45 kernel/panic.c:571 report_bug+0x263/0x2b0 lib/bug.c:186 fixup_bug arch/x86/kernel/traps.c:179 [inline] fixup_bug arch/x86/kernel/traps.c:174 [inline] do_error_trap+0x11b/0x200 arch/x86/kernel/traps.c:272 do_invalid_op+0x37/0x50 arch/x86/kernel/traps.c:291 invalid_op+0x14/0x20 arch/x86/entry/entry_64.S:973 RIP: 0010:refcount_inc_checked lib/refcount.c:156 [inline] RIP: 0010:refcount_inc_checked+0x61/0x70 lib/refcount.c:154 Code: 1d 98 2b 2a 06 31 ff 89 de e8 db 2c 40 fe 84 db 75 dd e8 92 2b 40 fe 48 c7 c7 20 7a a1 87 c6 05 78 2b 2a 06 01 e8 7d d9 12 fe <0f> 0b eb c1 90 90 90 90 90 90 90 90 90 90 90 55 48 89 e5 41 57 41 RSP: 0018:ffff888069f0fba8 EFLAGS: 00010286 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 000000000000f353 RSI: ffffffff815afcb6 RDI: ffffed100d3e1f67 RBP: ffff888069f0fbb8 R08: ffff88809b1845c0 R09: ffffed1015d23ef1 R10: ffffed1015d23ef0 R11: ffff8880ae91f787 R12: ffff8880a8f26968 R13: 0000000000000004 R14: dffffc0000000000 R15: ffff8880a49a6440 l2tp_tunnel_inc_refcount net/l2tp/l2tp_core.h:240 [inline] l2tp_tunnel_get+0x250/0x580 net/l2tp/l2tp_core.c:173 pppol2tp_connect+0xc00/0x1c70 net/l2tp/l2tp_ppp.c:702 __sys_connect+0x266/0x330 net/socket.c:1808 __do_sys_connect net/socket.c:1819 [inline] __se_sys_connect net/socket.c:1816 [inline] __x64_sys_connect+0x73/0xb0 net/socket.c:1816 Fixes: 54652eb12c1b ("l2tp: hold tunnel while looking up sessions in l2tp_netlink") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index aee33d132018..52b5a2797c0c 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -169,8 +169,8 @@ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id) rcu_read_lock_bh(); list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { - if (tunnel->tunnel_id == tunnel_id) { - l2tp_tunnel_inc_refcount(tunnel); + if (tunnel->tunnel_id == tunnel_id && + refcount_inc_not_zero(&tunnel->ref_count)) { rcu_read_unlock_bh(); return tunnel; @@ -190,8 +190,8 @@ struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth) rcu_read_lock_bh(); list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { - if (++count > nth) { - l2tp_tunnel_inc_refcount(tunnel); + if (++count > nth && + refcount_inc_not_zero(&tunnel->ref_count)) { rcu_read_unlock_bh(); return tunnel; } -- cgit v1.2.3 From 8449eedaa1da6a51d67190c905b1b54243e095f6 Mon Sep 17 00:00:00 2001 From: Stefan Bühler Date: Sat, 27 Apr 2019 20:34:19 +0200 Subject: io_uring: fix handling SQEs requesting NOWAIT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not all request types set REQ_F_FORCE_NONBLOCK when they needed async punting; reverse logic instead and set REQ_F_NOWAIT if request mustn't be punted. Signed-off-by: Stefan Bühler Merged with my previous patch for this. Signed-off-by: Jens Axboe --- fs/io_uring.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 0e9fb2cb1984..d5e23a6dd6aa 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -221,7 +221,7 @@ struct io_kiocb { struct list_head list; unsigned int flags; refcount_t refs; -#define REQ_F_FORCE_NONBLOCK 1 /* inline submission attempt */ +#define REQ_F_NOWAIT 1 /* must not punt to workers */ #define REQ_F_IOPOLL_COMPLETED 2 /* polled IO has completed */ #define REQ_F_FIXED_FILE 4 /* ctx owns file */ #define REQ_F_SEQ_PREV 8 /* sequential with previous */ @@ -774,10 +774,14 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s, ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags)); if (unlikely(ret)) return ret; - if (force_nonblock) { + + /* don't allow async punt if RWF_NOWAIT was requested */ + if (kiocb->ki_flags & IOCB_NOWAIT) + req->flags |= REQ_F_NOWAIT; + + if (force_nonblock) kiocb->ki_flags |= IOCB_NOWAIT; - req->flags |= REQ_F_FORCE_NONBLOCK; - } + if (ctx->flags & IORING_SETUP_IOPOLL) { if (!(kiocb->ki_flags & IOCB_DIRECT) || !kiocb->ki_filp->f_op->iopoll) @@ -1436,8 +1440,7 @@ restart: struct sqe_submit *s = &req->submit; const struct io_uring_sqe *sqe = s->sqe; - /* Ensure we clear previously set forced non-block flag */ - req->flags &= ~REQ_F_FORCE_NONBLOCK; + /* Ensure we clear previously set non-block flag */ req->rw.ki_flags &= ~IOCB_NOWAIT; ret = 0; @@ -1623,7 +1626,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, goto out; ret = __io_submit_sqe(ctx, req, s, true); - if (ret == -EAGAIN) { + if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) { struct io_uring_sqe *sqe_copy; sqe_copy = kmalloc(sizeof(*sqe_copy), GFP_KERNEL); -- cgit v1.2.3 From 1e84b97b7377bd0198f87b49ad3e396e84bf0458 Mon Sep 17 00:00:00 2001 From: Stefan Bühler Date: Wed, 24 Apr 2019 23:54:16 +0200 Subject: io_uring: fix notes on barriers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The application reading the CQ ring needs a barrier to pair with the smp_store_release in io_commit_cqring, not the barrier after it. Also a write barrier *after* writing something (but not *before* writing anything interesting) doesn't order anything, so an smp_wmb() after writing SQ tail is not needed. Additionally consider reading SQ head and writing CQ tail in the notes. Also add some clarifications how the various other fields in the ring buffers are used. Signed-off-by: Stefan Bühler Signed-off-by: Jens Axboe --- fs/io_uring.c | 119 +++++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 110 insertions(+), 9 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index d5e23a6dd6aa..7ab93e854eb2 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4,15 +4,28 @@ * supporting fast/efficient IO. * * A note on the read/write ordering memory barriers that are matched between - * the application and kernel side. When the application reads the CQ ring - * tail, it must use an appropriate smp_rmb() to order with the smp_wmb() - * the kernel uses after writing the tail. Failure to do so could cause a - * delay in when the application notices that completion events available. - * This isn't a fatal condition. Likewise, the application must use an - * appropriate smp_wmb() both before writing the SQ tail, and after writing - * the SQ tail. The first one orders the sqe writes with the tail write, and - * the latter is paired with the smp_rmb() the kernel will issue before - * reading the SQ tail on submission. + * the application and kernel side. + * + * After the application reads the CQ ring tail, it must use an + * appropriate smp_rmb() to pair with the smp_wmb() the kernel uses + * before writing the tail (using smp_load_acquire to read the tail will + * do). It also needs a smp_mb() before updating CQ head (ordering the + * entry load(s) with the head store), pairing with an implicit barrier + * through a control-dependency in io_get_cqring (smp_store_release to + * store head will do). Failure to do so could lead to reading invalid + * CQ entries. + * + * Likewise, the application must use an appropriate smp_wmb() before + * writing the SQ tail (ordering SQ entry stores with the tail store), + * which pairs with smp_load_acquire in io_get_sqring (smp_store_release + * to store the tail will do). And it needs a barrier ordering the SQ + * head load before writing new SQ entries (smp_load_acquire to read + * head will do). + * + * When using the SQ poll thread (IORING_SETUP_SQPOLL), the application + * needs to check the SQ flags for IORING_SQ_NEED_WAKEUP *after* + * updating the SQ tail; a full memory barrier smp_mb() is needed + * between. * * Also see the examples in the liburing library: * @@ -70,20 +83,108 @@ struct io_uring { u32 tail ____cacheline_aligned_in_smp; }; +/* + * This data is shared with the application through the mmap at offset + * IORING_OFF_SQ_RING. + * + * The offsets to the member fields are published through struct + * io_sqring_offsets when calling io_uring_setup. + */ struct io_sq_ring { + /* + * Head and tail offsets into the ring; the offsets need to be + * masked to get valid indices. + * + * The kernel controls head and the application controls tail. + */ struct io_uring r; + /* + * Bitmask to apply to head and tail offsets (constant, equals + * ring_entries - 1) + */ u32 ring_mask; + /* Ring size (constant, power of 2) */ u32 ring_entries; + /* + * Number of invalid entries dropped by the kernel due to + * invalid index stored in array + * + * Written by the kernel, shouldn't be modified by the + * application (i.e. get number of "new events" by comparing to + * cached value). + * + * After a new SQ head value was read by the application this + * counter includes all submissions that were dropped reaching + * the new SQ head (and possibly more). + */ u32 dropped; + /* + * Runtime flags + * + * Written by the kernel, shouldn't be modified by the + * application. + * + * The application needs a full memory barrier before checking + * for IORING_SQ_NEED_WAKEUP after updating the sq tail. + */ u32 flags; + /* + * Ring buffer of indices into array of io_uring_sqe, which is + * mmapped by the application using the IORING_OFF_SQES offset. + * + * This indirection could e.g. be used to assign fixed + * io_uring_sqe entries to operations and only submit them to + * the queue when needed. + * + * The kernel modifies neither the indices array nor the entries + * array. + */ u32 array[]; }; +/* + * This data is shared with the application through the mmap at offset + * IORING_OFF_CQ_RING. + * + * The offsets to the member fields are published through struct + * io_cqring_offsets when calling io_uring_setup. + */ struct io_cq_ring { + /* + * Head and tail offsets into the ring; the offsets need to be + * masked to get valid indices. + * + * The application controls head and the kernel tail. + */ struct io_uring r; + /* + * Bitmask to apply to head and tail offsets (constant, equals + * ring_entries - 1) + */ u32 ring_mask; + /* Ring size (constant, power of 2) */ u32 ring_entries; + /* + * Number of completion events lost because the queue was full; + * this should be avoided by the application by making sure + * there are not more requests pending thatn there is space in + * the completion queue. + * + * Written by the kernel, shouldn't be modified by the + * application (i.e. get number of "new events" by comparing to + * cached value). + * + * As completion events come in out of order this counter is not + * ordered with any other data. + */ u32 overflow; + /* + * Ring buffer of completion events. + * + * The kernel writes completion events fresh every time they are + * produced, so the application is allowed to modify pending + * entries. + */ struct io_uring_cqe cqes[]; }; -- cgit v1.2.3 From 4f7067c3fb7f2974363a28c597a41949d971af02 Mon Sep 17 00:00:00 2001 From: Stefan Bühler Date: Wed, 24 Apr 2019 23:54:17 +0200 Subject: io_uring: remove unnecessary barrier before wq_has_sleeper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit wq_has_sleeper has a full barrier internally. The smp_rmb barrier in io_uring_poll synchronizes with it. Signed-off-by: Stefan Bühler Signed-off-by: Jens Axboe --- fs/io_uring.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 7ab93e854eb2..bb71b7f00bb3 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -418,12 +418,6 @@ static void io_commit_cqring(struct io_ring_ctx *ctx) /* order cqe stores with ring update */ smp_store_release(&ring->r.tail, ctx->cached_cq_tail); - /* - * Write sider barrier of tail update, app has read side. See - * comment at the top of this file. - */ - smp_wmb(); - if (wq_has_sleeper(&ctx->cq_wait)) { wake_up_interruptible(&ctx->cq_wait); kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN); @@ -2677,7 +2671,10 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait) __poll_t mask = 0; poll_wait(file, &ctx->cq_wait, wait); - /* See comment at the top of this file */ + /* + * synchronizes with barrier from wq_has_sleeper call in + * io_commit_cqring + */ smp_rmb(); if (READ_ONCE(ctx->sq_ring->r.tail) - ctx->cached_sq_head != ctx->sq_ring->ring_entries) -- cgit v1.2.3 From 115e12e58dbc055e98c965e3255aed7b20214f95 Mon Sep 17 00:00:00 2001 From: Stefan Bühler Date: Wed, 24 Apr 2019 23:54:18 +0200 Subject: io_uring: remove unnecessary barrier before reading cq head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The memory operations before reading cq head are unrelated and we don't care about their order. Document that the control dependency in combination with READ_ONCE and WRITE_ONCE forms a barrier we need. Signed-off-by: Stefan Bühler Signed-off-by: Jens Axboe --- fs/io_uring.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index bb71b7f00bb3..3671a654a146 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -431,8 +431,11 @@ static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx) unsigned tail; tail = ctx->cached_cq_tail; - /* See comment at the top of the file */ - smp_rmb(); + /* + * writes to the cq entry need to come after reading head; the + * control dependency is enough as we're using WRITE_ONCE to + * fill the cq entry + */ if (tail - READ_ONCE(ring->r.head) == ring->ring_entries) return NULL; -- cgit v1.2.3 From 9e4c15a3939448d2ea9b9bf59561183bbe3fdc49 Mon Sep 17 00:00:00 2001 From: Stefan Bühler Date: Wed, 24 Apr 2019 23:54:19 +0200 Subject: io_uring: remove unnecessary barrier after updating SQ head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no operation afterwards to order with. Signed-off-by: Stefan Bühler Signed-off-by: Jens Axboe --- fs/io_uring.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 3671a654a146..d3c57ee233fe 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1798,12 +1798,6 @@ static void io_commit_sqring(struct io_ring_ctx *ctx) * write new data to them. */ smp_store_release(&ring->r.head, ctx->cached_sq_head); - - /* - * write side barrier of head update, app has read side. See - * comment at the top of this file - */ - smp_wmb(); } } -- cgit v1.2.3 From 82ab082c0e2f8592c2ff6b2ab99a92d8406c8c2c Mon Sep 17 00:00:00 2001 From: Stefan Bühler Date: Wed, 24 Apr 2019 23:54:20 +0200 Subject: io_uring: remove unnecessary barrier before reading SQ tail MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no operation before to order with. Signed-off-by: Stefan Bühler Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index d3c57ee233fe..662f1c070c8c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1831,8 +1831,6 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s) * though the application is the one updating it. */ head = ctx->cached_sq_head; - /* See comment at the top of this file */ - smp_rmb(); /* make sure SQ entry isn't read before tail */ if (head == smp_load_acquire(&ring->r.tail)) return false; -- cgit v1.2.3 From b841f19524a16cd93a39f9306191f85c549a2bc2 Mon Sep 17 00:00:00 2001 From: Stefan Bühler Date: Wed, 24 Apr 2019 23:54:21 +0200 Subject: io_uring: remove unnecessary barrier after incrementing dropped counter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit smp_store_release in io_commit_sqring already orders the store to dropped before the update to SQ head. Signed-off-by: Stefan Bühler Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 662f1c070c8c..2ebc33cc907b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1846,8 +1846,6 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s) /* drop invalid entries */ ctx->cached_sq_head++; ring->dropped++; - /* See comment at the top of this file */ - smp_wmb(); return false; } -- cgit v1.2.3 From 62977281a6384d3904c02272a638cc3ac3bac54d Mon Sep 17 00:00:00 2001 From: Stefan Bühler Date: Wed, 24 Apr 2019 23:54:22 +0200 Subject: io_uring: remove unnecessary barrier after unsetting IORING_SQ_NEED_WAKEUP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no operation to order with afterwards, and removing the flag is not critical in any way. There will always be a "race condition" where the application will trigger IORING_ENTER_SQ_WAKEUP when it isn't actually needed. Signed-off-by: Stefan Bühler Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 2ebc33cc907b..77b247b5d10b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1969,13 +1969,11 @@ static int io_sq_thread(void *data) finish_wait(&ctx->sqo_wait, &wait); ctx->sq_ring->flags &= ~IORING_SQ_NEED_WAKEUP; - smp_wmb(); continue; } finish_wait(&ctx->sqo_wait, &wait); ctx->sq_ring->flags &= ~IORING_SQ_NEED_WAKEUP; - smp_wmb(); } i = 0; -- cgit v1.2.3 From 2c2a2fb1e2a9256714338875bede6b7cbd4b9542 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 30 Apr 2019 11:18:21 +0200 Subject: Revert "ACPICA: Clear status of GPEs before enabling them" Revert commit c8b1917c8987 ("ACPICA: Clear status of GPEs before enabling them") that causes problems with Thunderbolt controllers to occur if a dock device is connected at init time (the xhci_hcd and thunderbolt modules crash which prevents peripherals connected through them from working). Commit c8b1917c8987 effectively causes commit ecc1165b8b74 ("ACPICA: Dispatch active GPEs at init time") to get undone, so the problem addressed by commit ecc1165b8b74 appears again as a result of it. Fixes: c8b1917c8987 ("ACPICA: Clear status of GPEs before enabling them") Link: https://lore.kernel.org/lkml/s5hy33siofw.wl-tiwai@suse.de/T/#u Link: https://bugzilla.opensuse.org/show_bug.cgi?id=1132943 Reported-by: Michael Hirmke Reported-by: Takashi Iwai Cc: 4.17+ # 4.17+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/evgpe.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/acpi/acpica/evgpe.c b/drivers/acpi/acpica/evgpe.c index 5e9d7348c16f..62d3aa74277b 100644 --- a/drivers/acpi/acpica/evgpe.c +++ b/drivers/acpi/acpica/evgpe.c @@ -81,12 +81,8 @@ acpi_status acpi_ev_enable_gpe(struct acpi_gpe_event_info *gpe_event_info) ACPI_FUNCTION_TRACE(ev_enable_gpe); - /* Clear the GPE status */ - status = acpi_hw_clear_gpe(gpe_event_info); - if (ACPI_FAILURE(status)) - return_ACPI_STATUS(status); - /* Enable the requested GPE */ + status = acpi_hw_low_set_gpe(gpe_event_info, ACPI_GPE_ENABLE); return_ACPI_STATUS(status); } -- cgit v1.2.3 From 0e2338749192ce0e52e7174c5352f627632f478a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 28 Apr 2019 12:22:25 -0700 Subject: ipv6: fix races in ip6_dst_destroy() We had many syzbot reports that seem to be caused by use-after-free of struct fib6_info. ip6_dst_destroy(), fib6_drop_pcpu_from() and rt6_remove_exception() are writers vs rt->from, and use non consistent synchronization among themselves. Switching to xchg() will solve the issues with no possible lockdep issues. BUG: KASAN: user-memory-access in atomic_dec_and_test include/asm-generic/atomic-instrumented.h:747 [inline] BUG: KASAN: user-memory-access in fib6_info_release include/net/ip6_fib.h:294 [inline] BUG: KASAN: user-memory-access in fib6_info_release include/net/ip6_fib.h:292 [inline] BUG: KASAN: user-memory-access in fib6_drop_pcpu_from net/ipv6/ip6_fib.c:927 [inline] BUG: KASAN: user-memory-access in fib6_purge_rt+0x4f6/0x670 net/ipv6/ip6_fib.c:960 Write of size 4 at addr 0000000000ffffb4 by task syz-executor.1/7649 CPU: 0 PID: 7649 Comm: syz-executor.1 Not tainted 5.1.0-rc6+ #183 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 kasan_report.cold+0x5/0x40 mm/kasan/report.c:321 check_memory_region_inline mm/kasan/generic.c:185 [inline] check_memory_region+0x123/0x190 mm/kasan/generic.c:191 kasan_check_write+0x14/0x20 mm/kasan/common.c:108 atomic_dec_and_test include/asm-generic/atomic-instrumented.h:747 [inline] fib6_info_release include/net/ip6_fib.h:294 [inline] fib6_info_release include/net/ip6_fib.h:292 [inline] fib6_drop_pcpu_from net/ipv6/ip6_fib.c:927 [inline] fib6_purge_rt+0x4f6/0x670 net/ipv6/ip6_fib.c:960 fib6_del_route net/ipv6/ip6_fib.c:1813 [inline] fib6_del+0xac2/0x10a0 net/ipv6/ip6_fib.c:1844 fib6_clean_node+0x3a8/0x590 net/ipv6/ip6_fib.c:2006 fib6_walk_continue+0x495/0x900 net/ipv6/ip6_fib.c:1928 fib6_walk+0x9d/0x100 net/ipv6/ip6_fib.c:1976 fib6_clean_tree+0xe0/0x120 net/ipv6/ip6_fib.c:2055 __fib6_clean_all+0x118/0x2a0 net/ipv6/ip6_fib.c:2071 fib6_clean_all+0x2b/0x40 net/ipv6/ip6_fib.c:2082 rt6_sync_down_dev+0x134/0x150 net/ipv6/route.c:4057 rt6_disable_ip+0x27/0x5f0 net/ipv6/route.c:4062 addrconf_ifdown+0xa2/0x1220 net/ipv6/addrconf.c:3705 addrconf_notify+0x19a/0x2260 net/ipv6/addrconf.c:3630 notifier_call_chain+0xc7/0x240 kernel/notifier.c:93 __raw_notifier_call_chain kernel/notifier.c:394 [inline] raw_notifier_call_chain+0x2e/0x40 kernel/notifier.c:401 call_netdevice_notifiers_info+0x3f/0x90 net/core/dev.c:1753 call_netdevice_notifiers_extack net/core/dev.c:1765 [inline] call_netdevice_notifiers net/core/dev.c:1779 [inline] dev_close_many+0x33f/0x6f0 net/core/dev.c:1522 rollback_registered_many+0x43b/0xfd0 net/core/dev.c:8177 rollback_registered+0x109/0x1d0 net/core/dev.c:8242 unregister_netdevice_queue net/core/dev.c:9289 [inline] unregister_netdevice_queue+0x1ee/0x2c0 net/core/dev.c:9282 unregister_netdevice include/linux/netdevice.h:2658 [inline] __tun_detach+0xd5b/0x1000 drivers/net/tun.c:727 tun_detach drivers/net/tun.c:744 [inline] tun_chr_close+0xe0/0x180 drivers/net/tun.c:3443 __fput+0x2e5/0x8d0 fs/file_table.c:278 ____fput+0x16/0x20 fs/file_table.c:309 task_work_run+0x14a/0x1c0 kernel/task_work.c:113 exit_task_work include/linux/task_work.h:22 [inline] do_exit+0x90a/0x2fa0 kernel/exit.c:876 do_group_exit+0x135/0x370 kernel/exit.c:980 __do_sys_exit_group kernel/exit.c:991 [inline] __se_sys_exit_group kernel/exit.c:989 [inline] __x64_sys_exit_group+0x44/0x50 kernel/exit.c:989 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x458da9 Code: ad b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007ffeafc2a6a8 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7 RAX: ffffffffffffffda RBX: 000000000000001c RCX: 0000000000458da9 RDX: 0000000000412a80 RSI: 0000000000a54ef0 RDI: 0000000000000043 RBP: 00000000004be552 R08: 000000000000000c R09: 000000000004c0d1 R10: 0000000002341940 R11: 0000000000000246 R12: 00000000ffffffff R13: 00007ffeafc2a7f0 R14: 000000000004c065 R15: 00007ffeafc2a800 Fixes: a68886a69180 ("net/ipv6: Make from in rt6_info rcu protected") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: David Ahern Reviewed-by: David Ahern Acked-by: Martin KaFai Lau Acked-by: Wei Wang Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 4 +--- net/ipv6/route.c | 9 ++------- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 6613d8dbb0e5..91247a6fc67f 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -921,9 +921,7 @@ static void fib6_drop_pcpu_from(struct fib6_info *f6i, if (pcpu_rt) { struct fib6_info *from; - from = rcu_dereference_protected(pcpu_rt->from, - lockdep_is_held(&table->tb6_lock)); - rcu_assign_pointer(pcpu_rt->from, NULL); + from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL); fib6_info_release(from); } } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b4899f0de0d0..2cc166bc978d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -379,11 +379,8 @@ static void ip6_dst_destroy(struct dst_entry *dst) in6_dev_put(idev); } - rcu_read_lock(); - from = rcu_dereference(rt->from); - rcu_assign_pointer(rt->from, NULL); + from = xchg((__force struct fib6_info **)&rt->from, NULL); fib6_info_release(from); - rcu_read_unlock(); } static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, @@ -1288,9 +1285,7 @@ static void rt6_remove_exception(struct rt6_exception_bucket *bucket, /* purge completely the exception to allow releasing the held resources: * some [sk] cache may keep the dst around for unlimited time */ - from = rcu_dereference_protected(rt6_ex->rt6i->from, - lockdep_is_held(&rt6_exception_lock)); - rcu_assign_pointer(rt6_ex->rt6i->from, NULL); + from = xchg((__force struct fib6_info **)&rt6_ex->rt6i->from, NULL); fib6_info_release(from); dst_dev_put(&rt6_ex->rt6i->dst); -- cgit v1.2.3 From 5c8b0b54db22c54f2aec991b388f550d3a927f26 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 30 Apr 2019 10:16:07 -0600 Subject: io_uring: have submission side sqe errors post a cqe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we only post a cqe if we get an error OUTSIDE of submission. For submission, we return the error directly through io_uring_enter(). This is a bit awkward for applications, and it makes more sense to always post a cqe with an error, if the error happens on behalf of an sqe. This changes submission behavior a bit. io_uring_enter() returns -ERROR for an error, and > 0 for number of sqes submitted. Before this change, if you wanted to submit 8 entries and had an error on the 5th entry, io_uring_enter() would return 4 (for number of entries successfully submitted) and rewind the sqring. The application would then have to peek at the sqring and figure out what was wrong with the head sqe, and then skip it itself. With this change, we'll return 5 since we did consume 5 sqes, and the last sqe (with the error) will result in a cqe being posted with the error. This makes the logic easier to handle in the application, and it cleans up the submission part. Suggested-by: Stefan Bühler Signed-off-by: Jens Axboe --- fs/io_uring.c | 34 ++++++---------------------------- 1 file changed, 6 insertions(+), 28 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 77b247b5d10b..0a894d7baceb 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1801,14 +1801,6 @@ static void io_commit_sqring(struct io_ring_ctx *ctx) } } -/* - * Undo last io_get_sqring() - */ -static void io_drop_sqring(struct io_ring_ctx *ctx) -{ - ctx->cached_sq_head--; -} - /* * Fetch an sqe, if one is available. Note that s->sqe will point to memory * that is mapped by userspace. This means that care needs to be taken to @@ -2018,7 +2010,7 @@ static int io_sq_thread(void *data) static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit) { struct io_submit_state state, *statep = NULL; - int i, ret = 0, submit = 0; + int i, submit = 0; if (to_submit > IO_PLUG_THRESHOLD) { io_submit_state_start(&state, ctx, to_submit); @@ -2027,6 +2019,7 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit) for (i = 0; i < to_submit; i++) { struct sqe_submit s; + int ret; if (!io_get_sqring(ctx, &s)) break; @@ -2034,21 +2027,18 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit) s.has_user = true; s.needs_lock = false; s.needs_fixed_file = false; + submit++; ret = io_submit_sqe(ctx, &s, statep); - if (ret) { - io_drop_sqring(ctx); - break; - } - - submit++; + if (ret) + io_cqring_add_event(ctx, s.sqe->user_data, ret, 0); } io_commit_sqring(ctx); if (statep) io_submit_state_end(statep); - return submit ? submit : ret; + return submit; } static unsigned io_cqring_events(struct io_cq_ring *ring) @@ -2779,24 +2769,12 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, mutex_lock(&ctx->uring_lock); submitted = io_ring_submit(ctx, to_submit); mutex_unlock(&ctx->uring_lock); - - if (submitted < 0) - goto out_ctx; } if (flags & IORING_ENTER_GETEVENTS) { unsigned nr_events = 0; min_complete = min(min_complete, ctx->cq_entries); - /* - * The application could have included the 'to_submit' count - * in how many events it wanted to wait for. If we failed to - * submit the desired count, we may need to adjust the number - * of events to poll/wait for. - */ - if (submitted < to_submit) - min_complete = min_t(unsigned, submitted, min_complete); - if (ctx->flags & IORING_SETUP_IOPOLL) { mutex_lock(&ctx->uring_lock); ret = io_iopoll_check(ctx, &nr_events, min_complete); -- cgit v1.2.3 From 799381e49b4e7b0177664cdbc54a2de8b41647a8 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 28 Apr 2019 18:10:39 -0700 Subject: Documentation: fix netdev-FAQ.rst markup warning Fix ReST underline warning: ./Documentation/networking/netdev-FAQ.rst:135: WARNING: Title underline too short. Q: I made changes to only a few patches in a patch series should I resend only those changed? -------------------------------------------------------------------------------------------- Fixes: ffa91253739c ("Documentation: networking: Update netdev-FAQ regarding patches") Signed-off-by: Randy Dunlap Cc: Florian Fainelli Signed-off-by: David S. Miller --- Documentation/networking/netdev-FAQ.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst index 8c7a713cf657..642fa963be3c 100644 --- a/Documentation/networking/netdev-FAQ.rst +++ b/Documentation/networking/netdev-FAQ.rst @@ -132,7 +132,7 @@ version that should be applied. If there is any doubt, the maintainer will reply and ask what should be done. Q: I made changes to only a few patches in a patch series should I resend only those changed? --------------------------------------------------------------------------------------------- +--------------------------------------------------------------------------------------------- A: No, please resend the entire patch series and make sure you do number your patches such that it is clear this is the latest and greatest set of patches that can be applied. -- cgit v1.2.3 From 37e9c087c81447cebd4ca022226829e319b0e280 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 29 Apr 2019 07:51:44 +0200 Subject: stmmac: pci: Fix typo in IOT2000 comment Signed-off-by: Jan Kiszka Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c index cc1e887e47b5..26db6aa002d1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c @@ -160,7 +160,7 @@ static const struct dmi_system_id quark_pci_dmi[] = { .driver_data = (void *)&galileo_stmmac_dmi_data, }, /* - * There are 2 types of SIMATIC IOT2000: IOT20202 and IOT2040. + * There are 2 types of SIMATIC IOT2000: IOT2020 and IOT2040. * The asset tag "6ES7647-0AA00-0YA2" is only for IOT2020 which * has only one pci network device while other asset tags are * for IOT2040 which has two. -- cgit v1.2.3 From fbd019737d71e405f86549fd738f81e2ff3dd073 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 29 Apr 2019 14:16:19 +0800 Subject: sctp: avoid running the sctp state machine recursively Ying triggered a call trace when doing an asconf testing: BUG: scheduling while atomic: swapper/12/0/0x10000100 Call Trace: [] dump_stack+0x19/0x1b [] __schedule_bug+0x64/0x72 [] __schedule+0x9ba/0xa00 [] __cond_resched+0x26/0x30 [] _cond_resched+0x3a/0x50 [] kmem_cache_alloc_node+0x38/0x200 [] __alloc_skb+0x5d/0x2d0 [] sctp_packet_transmit+0x610/0xa20 [sctp] [] sctp_outq_flush+0x2ce/0xc00 [sctp] [] sctp_outq_uncork+0x1c/0x20 [sctp] [] sctp_cmd_interpreter.isra.22+0xc8/0x1460 [sctp] [] sctp_do_sm+0xe1/0x350 [sctp] [] sctp_primitive_ASCONF+0x3d/0x50 [sctp] [] sctp_cmd_interpreter.isra.22+0x114/0x1460 [sctp] [] sctp_do_sm+0xe1/0x350 [sctp] [] sctp_assoc_bh_rcv+0xf4/0x1b0 [sctp] [] sctp_inq_push+0x51/0x70 [sctp] [] sctp_rcv+0xa8b/0xbd0 [sctp] As it shows, the first sctp_do_sm() running under atomic context (NET_RX softirq) invoked sctp_primitive_ASCONF() that uses GFP_KERNEL flag later, and this flag is supposed to be used in non-atomic context only. Besides, sctp_do_sm() was called recursively, which is not expected. Vlad tried to fix this recursive call in Commit c0786693404c ("sctp: Fix oops when sending queued ASCONF chunks") by introducing a new command SCTP_CMD_SEND_NEXT_ASCONF. But it didn't work as this command is still used in the first sctp_do_sm() call, and sctp_primitive_ASCONF() will be called in this command again. To avoid calling sctp_do_sm() recursively, we send the next queued ASCONF not by sctp_primitive_ASCONF(), but by sctp_sf_do_prm_asconf() in the 1st sctp_do_sm() directly. Reported-by: Ying Xu Signed-off-by: Xin Long Acked-by: Neil Horman Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/command.h | 1 - net/sctp/sm_sideeffect.c | 29 ----------------------------- net/sctp/sm_statefuns.c | 35 +++++++++++++++++++++++++++-------- 3 files changed, 27 insertions(+), 38 deletions(-) diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index 6640f84fe536..6d5beac29bc1 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -105,7 +105,6 @@ enum sctp_verb { SCTP_CMD_T1_RETRAN, /* Mark for retransmission after T1 timeout */ SCTP_CMD_UPDATE_INITTAG, /* Update peer inittag */ SCTP_CMD_SEND_MSG, /* Send the whole use message */ - SCTP_CMD_SEND_NEXT_ASCONF, /* Send the next ASCONF after ACK */ SCTP_CMD_PURGE_ASCONF_QUEUE, /* Purge all asconf queues.*/ SCTP_CMD_SET_ASOC, /* Restore association context */ SCTP_CMD_LAST diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 1d143bc3f73d..4aa03588f87b 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1112,32 +1112,6 @@ static void sctp_cmd_send_msg(struct sctp_association *asoc, } -/* Sent the next ASCONF packet currently stored in the association. - * This happens after the ASCONF_ACK was succeffully processed. - */ -static void sctp_cmd_send_asconf(struct sctp_association *asoc) -{ - struct net *net = sock_net(asoc->base.sk); - - /* Send the next asconf chunk from the addip chunk - * queue. - */ - if (!list_empty(&asoc->addip_chunk_list)) { - struct list_head *entry = asoc->addip_chunk_list.next; - struct sctp_chunk *asconf = list_entry(entry, - struct sctp_chunk, list); - list_del_init(entry); - - /* Hold the chunk until an ASCONF_ACK is received. */ - sctp_chunk_hold(asconf); - if (sctp_primitive_ASCONF(net, asoc, asconf)) - sctp_chunk_free(asconf); - else - asoc->addip_last_asconf = asconf; - } -} - - /* These three macros allow us to pull the debugging code out of the * main flow of sctp_do_sm() to keep attention focused on the real * functionality there. @@ -1783,9 +1757,6 @@ static int sctp_cmd_interpreter(enum sctp_event_type event_type, } sctp_cmd_send_msg(asoc, cmd->obj.msg, gfp); break; - case SCTP_CMD_SEND_NEXT_ASCONF: - sctp_cmd_send_asconf(asoc); - break; case SCTP_CMD_PURGE_ASCONF_QUEUE: sctp_asconf_queue_teardown(asoc); break; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index c9ae3404b1bb..713a669d2058 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3824,6 +3824,29 @@ enum sctp_disposition sctp_sf_do_asconf(struct net *net, return SCTP_DISPOSITION_CONSUME; } +static enum sctp_disposition sctp_send_next_asconf( + struct net *net, + const struct sctp_endpoint *ep, + struct sctp_association *asoc, + const union sctp_subtype type, + struct sctp_cmd_seq *commands) +{ + struct sctp_chunk *asconf; + struct list_head *entry; + + if (list_empty(&asoc->addip_chunk_list)) + return SCTP_DISPOSITION_CONSUME; + + entry = asoc->addip_chunk_list.next; + asconf = list_entry(entry, struct sctp_chunk, list); + + list_del_init(entry); + sctp_chunk_hold(asconf); + asoc->addip_last_asconf = asconf; + + return sctp_sf_do_prm_asconf(net, ep, asoc, type, asconf, commands); +} + /* * ADDIP Section 4.3 General rules for address manipulation * When building TLV parameters for the ASCONF Chunk that will add or @@ -3915,14 +3938,10 @@ enum sctp_disposition sctp_sf_do_asconf_ack(struct net *net, SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO)); if (!sctp_process_asconf_ack((struct sctp_association *)asoc, - asconf_ack)) { - /* Successfully processed ASCONF_ACK. We can - * release the next asconf if we have one. - */ - sctp_add_cmd_sf(commands, SCTP_CMD_SEND_NEXT_ASCONF, - SCTP_NULL()); - return SCTP_DISPOSITION_CONSUME; - } + asconf_ack)) + return sctp_send_next_asconf(net, ep, + (struct sctp_association *)asoc, + type, commands); abort = sctp_make_abort(asoc, asconf_ack, sizeof(struct sctp_errhdr)); -- cgit v1.2.3 From 975554b03eddc1df73bda3a764a09e18cadd5f1c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 30 Apr 2019 13:34:51 +0100 Subject: io_uring: fix SQPOLL cpu validation In io_sq_offload_start(), we call cpu_possible() on an unbounded cpu value from userspace. On v5.1-rc7 on arm64 with CONFIG_DEBUG_PER_CPU_MAPS, this results in a splat: WARNING: CPU: 1 PID: 27601 at include/linux/cpumask.h:121 cpu_max_bits_warn include/linux/cpumask.h:121 [inline] There was an attempt to fix this in commit: 917257daa0fea7a0 ("io_uring: only test SQPOLL cpu after we've verified it") ... by adding a check after the cpu value had been limited to NR_CPU_IDS using array_index_nospec(). However, this left an unbound check at the start of the function, for which the warning still fires. Let's fix this correctly by checking that the cpu value is bound by nr_cpu_ids before passing it to cpu_possible(). Note that only nr_cpu_ids of a cpumask are guaranteed to exist at runtime, and nr_cpu_ids can be significantly smaller than NR_CPUs. For example, an arm64 defconfig has NR_CPUS=256, while my test VM has 4 vCPUs. Following the intent from the commit message for 917257daa0fea7a0, the check is moved under the SQ_AFF branch, which is the only branch where the cpu values is consumed. The check is performed before bounding the value with array_index_nospec() so that we don't silently accept bogus cpu values from userspace, where array_index_nospec() would force these values to 0. I suspect we can remove the array_index_nospec() call entirely, but I've conservatively left that in place, updated to use nr_cpu_ids to match the prior check. Tested on arm64 with the Syzkaller reproducer: https://syzkaller.appspot.com/bug?extid=cd714a07c6de2bc34293 https://syzkaller.appspot.com/x/repro.syz?x=15d8b397200000 Full splat from before this patch: WARNING: CPU: 1 PID: 27601 at include/linux/cpumask.h:121 cpu_max_bits_warn include/linux/cpumask.h:121 [inline] WARNING: CPU: 1 PID: 27601 at include/linux/cpumask.h:121 cpumask_check include/linux/cpumask.h:128 [inline] WARNING: CPU: 1 PID: 27601 at include/linux/cpumask.h:121 cpumask_test_cpu include/linux/cpumask.h:344 [inline] WARNING: CPU: 1 PID: 27601 at include/linux/cpumask.h:121 io_sq_offload_start fs/io_uring.c:2244 [inline] WARNING: CPU: 1 PID: 27601 at include/linux/cpumask.h:121 io_uring_create fs/io_uring.c:2864 [inline] WARNING: CPU: 1 PID: 27601 at include/linux/cpumask.h:121 io_uring_setup+0x1108/0x15a0 fs/io_uring.c:2916 Kernel panic - not syncing: panic_on_warn set ... CPU: 1 PID: 27601 Comm: syz-executor.0 Not tainted 5.1.0-rc7 #3 Hardware name: linux,dummy-virt (DT) Call trace: dump_backtrace+0x0/0x2f0 include/linux/compiler.h:193 show_stack+0x20/0x30 arch/arm64/kernel/traps.c:158 __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x110/0x190 lib/dump_stack.c:113 panic+0x384/0x68c kernel/panic.c:214 __warn+0x2bc/0x2c0 kernel/panic.c:571 report_bug+0x228/0x2d8 lib/bug.c:186 bug_handler+0xa0/0x1a0 arch/arm64/kernel/traps.c:956 call_break_hook arch/arm64/kernel/debug-monitors.c:301 [inline] brk_handler+0x1d4/0x388 arch/arm64/kernel/debug-monitors.c:316 do_debug_exception+0x1a0/0x468 arch/arm64/mm/fault.c:831 el1_dbg+0x18/0x8c cpu_max_bits_warn include/linux/cpumask.h:121 [inline] cpumask_check include/linux/cpumask.h:128 [inline] cpumask_test_cpu include/linux/cpumask.h:344 [inline] io_sq_offload_start fs/io_uring.c:2244 [inline] io_uring_create fs/io_uring.c:2864 [inline] io_uring_setup+0x1108/0x15a0 fs/io_uring.c:2916 __do_sys_io_uring_setup fs/io_uring.c:2929 [inline] __se_sys_io_uring_setup fs/io_uring.c:2926 [inline] __arm64_sys_io_uring_setup+0x50/0x70 fs/io_uring.c:2926 __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline] invoke_syscall arch/arm64/kernel/syscall.c:47 [inline] el0_svc_common.constprop.0+0x148/0x2e0 arch/arm64/kernel/syscall.c:83 el0_svc_handler+0xdc/0x100 arch/arm64/kernel/syscall.c:129 el0_svc+0x8/0xc arch/arm64/kernel/entry.S:948 SMP: stopping secondary CPUs Dumping ftrace buffer: (ftrace buffer empty) Kernel Offset: disabled CPU features: 0x002,23000438 Memory Limit: none Rebooting in 1 seconds.. Fixes: 917257daa0fea7a0 ("io_uring: only test SQPOLL cpu after we've verified it") Signed-off-by: Mark Rutland Cc: Jens Axboe Cc: Alexander Viro Cc: linux-block@vger.kernel.org Cc: linux-fsdevel@vger.kernel.org Cc: linux-kernel@vger.kernel.org Simplied the logic Signed-off-by: Jens Axboe --- fs/io_uring.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 0a894d7baceb..5954047ee96d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2319,10 +2319,6 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx, mmgrab(current->mm); ctx->sqo_mm = current->mm; - ret = -EINVAL; - if (!cpu_possible(p->sq_thread_cpu)) - goto err; - if (ctx->flags & IORING_SETUP_SQPOLL) { ret = -EPERM; if (!capable(CAP_SYS_ADMIN)) @@ -2333,11 +2329,11 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx, ctx->sq_thread_idle = HZ; if (p->flags & IORING_SETUP_SQ_AFF) { - int cpu; + int cpu = array_index_nospec(p->sq_thread_cpu, + nr_cpu_ids); - cpu = array_index_nospec(p->sq_thread_cpu, NR_CPUS); ret = -EINVAL; - if (!cpu_possible(p->sq_thread_cpu)) + if (!cpu_possible(cpu)) goto err; ctx->sqo_thread = kthread_create_on_cpu(io_sq_thread, -- cgit v1.2.3 From 52e04ef4c9d459cba3afd86ec335a411b40b7fd2 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 30 Apr 2019 17:30:21 +0100 Subject: io_uring: free allocated io_memory once If io_allocate_scq_urings() fails to allocate an sq_* region, it will call io_mem_free() for any previously allocated regions, but leave dangling pointers to these regions in the ctx. Any regions which have not yet been allocated are left NULL. Note that when returning -EOVERFLOW, the previously allocated sq_ring is not freed, which appears to be an unintentional leak. When io_allocate_scq_urings() fails, io_uring_create() will call io_ring_ctx_wait_and_kill(), which calls io_mem_free() on all the sq_* regions, assuming the pointers are valid and not NULL. This can result in pages being freed multiple times, which has been observed to corrupt the page state, leading to subsequent fun. This can also result in virt_to_page() on NULL, resulting in the use of bogus page addresses, and yet more subsequent fun. The latter can be detected with CONFIG_DEBUG_VIRTUAL on arm64. Adding a cleanup path to io_allocate_scq_urings() complicates the logic, so let's leave it to io_ring_ctx_free() to consistently free these pointers, and simplify the io_allocate_scq_urings() error paths. Full splats from before this patch below. Note that the pointer logged by the DEBUG_VIRTUAL "non-linear address" warning has been hashed, and is actually NULL. [ 26.098129] page:ffff80000e949a00 count:0 mapcount:-128 mapping:0000000000000000 index:0x0 [ 26.102976] flags: 0x63fffc000000() [ 26.104373] raw: 000063fffc000000 ffff80000e86c188 ffff80000ea3df08 0000000000000000 [ 26.108917] raw: 0000000000000000 0000000000000001 00000000ffffff7f 0000000000000000 [ 26.137235] page dumped because: VM_BUG_ON_PAGE(page_ref_count(page) == 0) [ 26.143960] ------------[ cut here ]------------ [ 26.146020] kernel BUG at include/linux/mm.h:547! [ 26.147586] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP [ 26.149163] Modules linked in: [ 26.150287] Process syz-executor.21 (pid: 20204, stack limit = 0x000000000e9cefeb) [ 26.153307] CPU: 2 PID: 20204 Comm: syz-executor.21 Not tainted 5.1.0-rc7-00004-g7d30b2ea43d6 #18 [ 26.156566] Hardware name: linux,dummy-virt (DT) [ 26.158089] pstate: 40400005 (nZcv daif +PAN -UAO) [ 26.159869] pc : io_mem_free+0x9c/0xa8 [ 26.161436] lr : io_mem_free+0x9c/0xa8 [ 26.162720] sp : ffff000013003d60 [ 26.164048] x29: ffff000013003d60 x28: ffff800025048040 [ 26.165804] x27: 0000000000000000 x26: ffff800025048040 [ 26.167352] x25: 00000000000000c0 x24: ffff0000112c2820 [ 26.169682] x23: 0000000000000000 x22: 0000000020000080 [ 26.171899] x21: ffff80002143b418 x20: ffff80002143b400 [ 26.174236] x19: ffff80002143b280 x18: 0000000000000000 [ 26.176607] x17: 0000000000000000 x16: 0000000000000000 [ 26.178997] x15: 0000000000000000 x14: 0000000000000000 [ 26.181508] x13: 00009178a5e077b2 x12: 0000000000000001 [ 26.183863] x11: 0000000000000000 x10: 0000000000000980 [ 26.186437] x9 : ffff000013003a80 x8 : ffff800025048a20 [ 26.189006] x7 : ffff8000250481c0 x6 : ffff80002ffe9118 [ 26.191359] x5 : ffff80002ffe9118 x4 : 0000000000000000 [ 26.193863] x3 : ffff80002ffefe98 x2 : 44c06ddd107d1f00 [ 26.196642] x1 : 0000000000000000 x0 : 000000000000003e [ 26.198892] Call trace: [ 26.199893] io_mem_free+0x9c/0xa8 [ 26.201155] io_ring_ctx_wait_and_kill+0xec/0x180 [ 26.202688] io_uring_setup+0x6c4/0x6f0 [ 26.204091] __arm64_sys_io_uring_setup+0x18/0x20 [ 26.205576] el0_svc_common.constprop.0+0x7c/0xe8 [ 26.207186] el0_svc_handler+0x28/0x78 [ 26.208389] el0_svc+0x8/0xc [ 26.209408] Code: aa0203e0 d0006861 9133a021 97fcdc3c (d4210000) [ 26.211995] ---[ end trace bdb81cd43a21e50d ]--- [ 81.770626] ------------[ cut here ]------------ [ 81.825015] virt_to_phys used for non-linear address: 000000000d42f2c7 ( (null)) [ 81.827860] WARNING: CPU: 1 PID: 30171 at arch/arm64/mm/physaddr.c:15 __virt_to_phys+0x48/0x68 [ 81.831202] Modules linked in: [ 81.832212] CPU: 1 PID: 30171 Comm: syz-executor.20 Not tainted 5.1.0-rc7-00004-g7d30b2ea43d6 #19 [ 81.835616] Hardware name: linux,dummy-virt (DT) [ 81.836863] pstate: 60400005 (nZCv daif +PAN -UAO) [ 81.838727] pc : __virt_to_phys+0x48/0x68 [ 81.840572] lr : __virt_to_phys+0x48/0x68 [ 81.842264] sp : ffff80002cf67c70 [ 81.843858] x29: ffff80002cf67c70 x28: ffff800014358e18 [ 81.846463] x27: 0000000000000000 x26: 0000000020000080 [ 81.849148] x25: 0000000000000000 x24: ffff80001bb01f40 [ 81.851986] x23: ffff200011db06c8 x22: ffff2000127e3c60 [ 81.854351] x21: ffff800014358cc0 x20: ffff800014358d98 [ 81.856711] x19: 0000000000000000 x18: 0000000000000000 [ 81.859132] x17: 0000000000000000 x16: 0000000000000000 [ 81.861586] x15: 0000000000000000 x14: 0000000000000000 [ 81.863905] x13: 0000000000000000 x12: ffff1000037603e9 [ 81.866226] x11: 1ffff000037603e8 x10: 0000000000000980 [ 81.868776] x9 : ffff80002cf67840 x8 : ffff80001bb02920 [ 81.873272] x7 : ffff1000037603e9 x6 : ffff80001bb01f47 [ 81.875266] x5 : ffff1000037603e9 x4 : dfff200000000000 [ 81.876875] x3 : ffff200010087528 x2 : ffff1000059ecf58 [ 81.878751] x1 : 44c06ddd107d1f00 x0 : 0000000000000000 [ 81.880453] Call trace: [ 81.881164] __virt_to_phys+0x48/0x68 [ 81.882919] io_mem_free+0x18/0x110 [ 81.886585] io_ring_ctx_wait_and_kill+0x13c/0x1f0 [ 81.891212] io_uring_setup+0xa60/0xad0 [ 81.892881] __arm64_sys_io_uring_setup+0x2c/0x38 [ 81.894398] el0_svc_common.constprop.0+0xac/0x150 [ 81.896306] el0_svc_handler+0x34/0x88 [ 81.897744] el0_svc+0x8/0xc [ 81.898715] ---[ end trace b4a703802243cbba ]--- Fixes: 2b188cc1bb857a9d ("Add io_uring IO interface") Signed-off-by: Mark Rutland Cc: Jens Axboe Cc: Alexander Viro Cc: linux-block@vger.kernel.org Cc: linux-fsdevel@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Jens Axboe --- fs/io_uring.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 5954047ee96d..046fc4e1e155 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2396,8 +2396,12 @@ static int io_account_mem(struct user_struct *user, unsigned long nr_pages) static void io_mem_free(void *ptr) { - struct page *page = virt_to_head_page(ptr); + struct page *page; + + if (!ptr) + return; + page = virt_to_head_page(ptr); if (put_page_testzero(page)) free_compound_page(page); } @@ -2816,17 +2820,12 @@ static int io_allocate_scq_urings(struct io_ring_ctx *ctx, return -EOVERFLOW; ctx->sq_sqes = io_mem_alloc(size); - if (!ctx->sq_sqes) { - io_mem_free(ctx->sq_ring); + if (!ctx->sq_sqes) return -ENOMEM; - } cq_ring = io_mem_alloc(struct_size(cq_ring, cqes, p->cq_entries)); - if (!cq_ring) { - io_mem_free(ctx->sq_ring); - io_mem_free(ctx->sq_sqes); + if (!cq_ring) return -ENOMEM; - } ctx->cq_ring = cq_ring; cq_ring->ring_mask = p->cq_entries - 1; -- cgit v1.2.3 From 817869d2519f0cb7be5b3482129dadc806dfb747 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 30 Apr 2019 14:44:05 -0600 Subject: io_uring: drop req submit reference always in async punt If we don't end up actually calling submit in io_sq_wq_submit_work(), we still need to drop the submit reference to the request. If we don't, then we can leak the request. This can happen if we race with ring shutdown while flushing the workqueue for requests that require use of the mm_struct. Fixes: e65ef56db494 ("io_uring: use regular request ref counts") Signed-off-by: Jens Axboe --- fs/io_uring.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 046fc4e1e155..18cecb6a0151 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1568,10 +1568,11 @@ restart: break; cond_resched(); } while (1); - - /* drop submission reference */ - io_put_req(req); } + + /* drop submission reference */ + io_put_req(req); + if (ret) { io_cqring_add_event(ctx, sqe->user_data, ret, 0); io_put_req(req); -- cgit v1.2.3 From 60a27b906d1a372474669c914c10d6c993858a4a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 26 Apr 2019 18:45:20 +0800 Subject: block: fix handling for BIO_NO_PAGE_REF Commit 399254aaf489211 ("block: add BIO_NO_PAGE_REF flag") introduces BIO_NO_PAGE_REF, and once this flag is set for one bio, all pages in the bio won't be get/put during IO. However, if one bio is submitted via __blkdev_direct_IO_simple(), even though BIO_NO_PAGE_REF is set, pages still may be put. Fixes this issue by avoiding to put pages if BIO_NO_PAGE_REF is set. Fixes: 399254aaf489211 ("block: add BIO_NO_PAGE_REF flag") Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- fs/block_dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 24615c76c1d0..bb28e2ead679 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -264,7 +264,8 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, bio_for_each_segment_all(bvec, &bio, i, iter_all) { if (should_dirty && !PageCompound(bvec->bv_page)) set_page_dirty_lock(bvec->bv_page); - put_page(bvec->bv_page); + if (!bio_flagged(&bio, BIO_NO_PAGE_REF)) + put_page(bvec->bv_page); } if (unlikely(bio.bi_status)) -- cgit v1.2.3 From f5eb4d3b92a6a1096ef3480b54782a9409281300 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 26 Apr 2019 18:45:21 +0800 Subject: iov_iter: fix iov_iter_type Commit 875f1d0769cd ("iov_iter: add ITER_BVEC_FLAG_NO_REF flag") introduces one extra flag of ITER_BVEC_FLAG_NO_REF, and this flag is stored into iter->type. However, iov_iter_type() doesn't consider the new added flag, fix it by masking this flag in iov_iter_type(). Fixes: 875f1d0769cd ("iov_iter: add ITER_BVEC_FLAG_NO_REF flag") Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/uio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/uio.h b/include/linux/uio.h index f184af1999a8..2d0131ad4604 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -60,7 +60,7 @@ struct iov_iter { static inline enum iter_type iov_iter_type(const struct iov_iter *i) { - return i->type & ~(READ | WRITE); + return i->type & ~(READ | WRITE | ITER_BVEC_FLAG_NO_REF); } static inline bool iter_is_iovec(const struct iov_iter *i) -- cgit v1.2.3 From b2cf86e1563e33a14a1c69b3e508d15dc12f804c Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 29 Apr 2019 11:46:55 -0400 Subject: packet: in recvmsg msg_name return at least sizeof sockaddr_ll Packet send checks that msg_name is at least sizeof sockaddr_ll. Packet recv must return at least this length, so that its output can be passed unmodified to packet send. This ceased to be true since adding support for lladdr longer than sll_addr. Since, the return value uses true address length. Always return at least sizeof sockaddr_ll, even if address length is shorter. Zero the padding bytes. Change v1->v2: do not overwrite zeroed padding again. use copy_len. Fixes: 0fb375fb9b93 ("[AF_PACKET]: Allow for > 8 byte hardware addresses.") Suggested-by: David Laight Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/packet/af_packet.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 9419c5cf4de5..7d361cd53ad5 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3344,20 +3344,29 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, sock_recv_ts_and_drops(msg, sk, skb); if (msg->msg_name) { + int copy_len; + /* If the address length field is there to be filled * in, we fill it in now. */ if (sock->type == SOCK_PACKET) { __sockaddr_check_size(sizeof(struct sockaddr_pkt)); msg->msg_namelen = sizeof(struct sockaddr_pkt); + copy_len = msg->msg_namelen; } else { struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll; msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr); + copy_len = msg->msg_namelen; + if (msg->msg_namelen < sizeof(struct sockaddr_ll)) { + memset(msg->msg_name + + offsetof(struct sockaddr_ll, sll_addr), + 0, sizeof(sll->sll_addr)); + msg->msg_namelen = sizeof(struct sockaddr_ll); + } } - memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, - msg->msg_namelen); + memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len); } if (pkt_sk(sk)->auxdata) { -- cgit v1.2.3 From 486efdc8f6ce802b27e15921d2353cc740c55451 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 29 Apr 2019 11:53:18 -0400 Subject: packet: validate msg_namelen in send directly Packet sockets in datagram mode take a destination address. Verify its length before passing to dev_hard_header. Prior to 2.6.14-rc3, the send code ignored sll_halen. This is established behavior. Directly compare msg_namelen to dev->addr_len. Change v1->v2: initialize addr in all paths Fixes: 6b8d95f1795c4 ("packet: validate address length if non-zero") Suggested-by: David Laight Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/packet/af_packet.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 7d361cd53ad5..9b81813dd16a 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2602,8 +2602,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) void *ph; DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name); bool need_wait = !(msg->msg_flags & MSG_DONTWAIT); + unsigned char *addr = NULL; int tp_len, size_max; - unsigned char *addr; void *data; int len_sum = 0; int status = TP_STATUS_AVAILABLE; @@ -2614,7 +2614,6 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); proto = po->num; - addr = NULL; } else { err = -EINVAL; if (msg->msg_namelen < sizeof(struct sockaddr_ll)) @@ -2624,10 +2623,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) sll_addr))) goto out; proto = saddr->sll_protocol; - addr = saddr->sll_halen ? saddr->sll_addr : NULL; dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); - if (addr && dev && saddr->sll_halen < dev->addr_len) - goto out_put; + if (po->sk.sk_socket->type == SOCK_DGRAM) { + if (dev && msg->msg_namelen < dev->addr_len + + offsetof(struct sockaddr_ll, sll_addr)) + goto out_put; + addr = saddr->sll_addr; + } } err = -ENXIO; @@ -2799,7 +2801,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) struct sk_buff *skb; struct net_device *dev; __be16 proto; - unsigned char *addr; + unsigned char *addr = NULL; int err, reserve = 0; struct sockcm_cookie sockc; struct virtio_net_hdr vnet_hdr = { 0 }; @@ -2816,7 +2818,6 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); proto = po->num; - addr = NULL; } else { err = -EINVAL; if (msg->msg_namelen < sizeof(struct sockaddr_ll)) @@ -2824,10 +2825,13 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr))) goto out; proto = saddr->sll_protocol; - addr = saddr->sll_halen ? saddr->sll_addr : NULL; dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); - if (addr && dev && saddr->sll_halen < dev->addr_len) - goto out_unlock; + if (sock->type == SOCK_DGRAM) { + if (dev && msg->msg_namelen < dev->addr_len + + offsetof(struct sockaddr_ll, sll_addr)) + goto out_unlock; + addr = saddr->sll_addr; + } } err = -ENXIO; -- cgit v1.2.3 From 15d55bae4e3c43cd9f87fd93c73a263e172d34e1 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 29 Apr 2019 10:30:09 -0700 Subject: selftests: fib_rule_tests: Fix icmp proto with ipv6 A recent commit returns an error if icmp is used as the ip-proto for IPv6 fib rules. Update fib_rule_tests to send ipv6-icmp instead of icmp. Fixes: 5e1a99eae8499 ("ipv4: Add ICMPv6 support when parse route ipproto") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/fib_rule_tests.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index d4cfb6a7a086..5b92c1f4dc04 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -147,8 +147,8 @@ fib_rule6_test() fib_check_iproute_support "ipproto" "ipproto" if [ $? -eq 0 ]; then - match="ipproto icmp" - fib_rule6_test_match_n_redirect "$match" "$match" "ipproto icmp match" + match="ipproto ipv6-icmp" + fib_rule6_test_match_n_redirect "$match" "$match" "ipproto ipv6-icmp match" fi } -- cgit v1.2.3 From 2dcb003314032c6efb13a065ffae60d164b2dd35 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 29 Apr 2019 12:19:12 -0700 Subject: net/tls: avoid NULL pointer deref on nskb->sk in fallback update_chksum() accesses nskb->sk before it has been set by complete_skb(), move the init up. Fixes: e8f69799810c ("net/tls: Add generic NIC offload infrastructure") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/tls/tls_device_fallback.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index a3ebd4b02714..c3a5fe624b4e 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -201,13 +201,14 @@ static void complete_skb(struct sk_buff *nskb, struct sk_buff *skb, int headln) skb_put(nskb, skb->len); memcpy(nskb->data, skb->data, headln); - update_chksum(nskb, headln); nskb->destructor = skb->destructor; nskb->sk = sk; skb->destructor = NULL; skb->sk = NULL; + update_chksum(nskb, headln); + delta = nskb->truesize - skb->truesize; if (likely(delta < 0)) WARN_ON_ONCE(refcount_sub_and_test(-delta, &sk->sk_wmem_alloc)); -- cgit v1.2.3 From d4ef647510b1200fe1c996ff1cbf5ac47eb930cc Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 1 May 2019 16:59:16 +0100 Subject: io_uring: avoid page allocation warnings In io_sqe_buffer_register() we allocate a number of arrays based on the iov_len from the user-provided iov. While we limit iov_len to SZ_1G, we can still attempt to allocate arrays exceeding MAX_ORDER. On a 64-bit system with 4KiB pages, for an iov where iov_base = 0x10 and iov_len = SZ_1G, we'll calculate that nr_pages = 262145. When we try to allocate a corresponding array of (16-byte) bio_vecs, requiring 4194320 bytes, which is greater than 4MiB. This results in SLUB warning that we're trying to allocate greater than MAX_ORDER, and failing the allocation. Avoid this by using kvmalloc() for allocations dependent on the user-provided iov_len. At the same time, fix a leak of imu->bvec when registration fails. Full splat from before this patch: WARNING: CPU: 1 PID: 2314 at mm/page_alloc.c:4595 __alloc_pages_nodemask+0x7ac/0x2938 mm/page_alloc.c:4595 Kernel panic - not syncing: panic_on_warn set ... CPU: 1 PID: 2314 Comm: syz-executor326 Not tainted 5.1.0-rc7-dirty #4 Hardware name: linux,dummy-virt (DT) Call trace: dump_backtrace+0x0/0x2f0 include/linux/compiler.h:193 show_stack+0x20/0x30 arch/arm64/kernel/traps.c:158 __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x110/0x190 lib/dump_stack.c:113 panic+0x384/0x68c kernel/panic.c:214 __warn+0x2bc/0x2c0 kernel/panic.c:571 report_bug+0x228/0x2d8 lib/bug.c:186 bug_handler+0xa0/0x1a0 arch/arm64/kernel/traps.c:956 call_break_hook arch/arm64/kernel/debug-monitors.c:301 [inline] brk_handler+0x1d4/0x388 arch/arm64/kernel/debug-monitors.c:316 do_debug_exception+0x1a0/0x468 arch/arm64/mm/fault.c:831 el1_dbg+0x18/0x8c __alloc_pages_nodemask+0x7ac/0x2938 mm/page_alloc.c:4595 alloc_pages_current+0x164/0x278 mm/mempolicy.c:2132 alloc_pages include/linux/gfp.h:509 [inline] kmalloc_order+0x20/0x50 mm/slab_common.c:1231 kmalloc_order_trace+0x30/0x2b0 mm/slab_common.c:1243 kmalloc_large include/linux/slab.h:480 [inline] __kmalloc+0x3dc/0x4f0 mm/slub.c:3791 kmalloc_array include/linux/slab.h:670 [inline] io_sqe_buffer_register fs/io_uring.c:2472 [inline] __io_uring_register fs/io_uring.c:2962 [inline] __do_sys_io_uring_register fs/io_uring.c:3008 [inline] __se_sys_io_uring_register fs/io_uring.c:2990 [inline] __arm64_sys_io_uring_register+0x9e0/0x1bc8 fs/io_uring.c:2990 __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline] invoke_syscall arch/arm64/kernel/syscall.c:47 [inline] el0_svc_common.constprop.0+0x148/0x2e0 arch/arm64/kernel/syscall.c:83 el0_svc_handler+0xdc/0x100 arch/arm64/kernel/syscall.c:129 el0_svc+0x8/0xc arch/arm64/kernel/entry.S:948 SMP: stopping secondary CPUs Dumping ftrace buffer: (ftrace buffer empty) Kernel Offset: disabled CPU features: 0x002,23000438 Memory Limit: none Rebooting in 1 seconds.. Fixes: edafccee56ff3167 ("io_uring: add support for pre-mapped user IO buffers") Signed-off-by: Mark Rutland Cc: Alexander Viro Cc: Jens Axboe Cc: linux-fsdevel@vger.kernel.org Cc: linux-block@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Jens Axboe --- fs/io_uring.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 18cecb6a0151..84efb8956734 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2443,7 +2443,7 @@ static int io_sqe_buffer_unregister(struct io_ring_ctx *ctx) if (ctx->account_mem) io_unaccount_mem(ctx->user, imu->nr_bvecs); - kfree(imu->bvec); + kvfree(imu->bvec); imu->nr_bvecs = 0; } @@ -2535,9 +2535,9 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg, if (!pages || nr_pages > got_pages) { kfree(vmas); kfree(pages); - pages = kmalloc_array(nr_pages, sizeof(struct page *), + pages = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL); - vmas = kmalloc_array(nr_pages, + vmas = kvmalloc_array(nr_pages, sizeof(struct vm_area_struct *), GFP_KERNEL); if (!pages || !vmas) { @@ -2549,7 +2549,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg, got_pages = nr_pages; } - imu->bvec = kmalloc_array(nr_pages, sizeof(struct bio_vec), + imu->bvec = kvmalloc_array(nr_pages, sizeof(struct bio_vec), GFP_KERNEL); ret = -ENOMEM; if (!imu->bvec) { @@ -2588,6 +2588,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg, } if (ctx->account_mem) io_unaccount_mem(ctx->user, nr_pages); + kvfree(imu->bvec); goto err; } @@ -2610,12 +2611,12 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg, ctx->nr_user_bufs++; } - kfree(pages); - kfree(vmas); + kvfree(pages); + kvfree(vmas); return 0; err: - kfree(pages); - kfree(vmas); + kvfree(pages); + kvfree(vmas); io_sqe_buffer_unregister(ctx); return ret; } -- cgit v1.2.3 From d2f0c961148f65bc73eda72b9fa3a4e80973cb49 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Mon, 29 Apr 2019 16:39:30 +0300 Subject: ipv4: ip_do_fragment: Preserve skb_iif during fragmentation Previously, during fragmentation after forwarding, skb->skb_iif isn't preserved, i.e. 'ip_copy_metadata' does not copy skb_iif from given 'from' skb. As a result, ip_do_fragment's creates fragments with zero skb_iif, leading to inconsistent behavior. Assume for example an eBPF program attached at tc egress (post forwarding) that examines __sk_buff->ingress_ifindex: - the correct iif is observed if forwarding path does not involve fragmentation/refragmentation - a bogus iif is observed if forwarding path involves fragmentation/refragmentatiom Fix, by preserving skb_iif during 'ip_copy_metadata'. Signed-off-by: Shmulik Ladkani Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index c80188875f39..e8bb2e85c5a4 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -519,6 +519,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->pkt_type = from->pkt_type; to->priority = from->priority; to->protocol = from->protocol; + to->skb_iif = from->skb_iif; skb_dst_drop(to); skb_dst_copy(to, from); to->dev = from->dev; -- cgit v1.2.3 From 6f303d60534c46aa1a239f29c321f95c83dda748 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 1 May 2019 11:05:41 -0700 Subject: gcc-9: silence 'address-of-packed-member' warning We already did this for clang, but now gcc has that warning too. Yes, yes, the address may be unaligned. And that's kind of the point. Signed-off-by: Linus Torvalds --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2b99679148dc..633d1196bf00 100644 --- a/Makefile +++ b/Makefile @@ -678,6 +678,7 @@ KBUILD_CFLAGS += $(call cc-disable-warning,frame-address,) KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation) KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow) KBUILD_CFLAGS += $(call cc-disable-warning, int-in-bool-context) +KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE KBUILD_CFLAGS += -Os @@ -719,7 +720,6 @@ ifdef CONFIG_CC_IS_CLANG KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,) KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier) KBUILD_CFLAGS += $(call cc-disable-warning, gnu) -KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) # Quiet clang warning: comparison of unsigned expression < 0 is always false KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare) # CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the -- cgit v1.2.3 From cf676908846a06443fa5e6724ca3f5dd7460eca1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 1 May 2019 11:07:40 -0700 Subject: gcc-9: don't warn about uninitialized variable I'm not sure what made gcc warn about this code now. The 'ret' variable does end up initialized in all cases, but it's definitely not obvious, so the compiler is quite reasonable to warn about this. So just add initialization to make it all much more obvious both to compilers and to humans. Signed-off-by: Linus Torvalds --- drivers/i2c/i2c-core-base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 38af18645133..c480ca385ffb 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -185,7 +185,7 @@ static int i2c_generic_bus_free(struct i2c_adapter *adap) int i2c_generic_scl_recovery(struct i2c_adapter *adap) { struct i2c_bus_recovery_info *bri = adap->bus_recovery_info; - int i = 0, scl = 1, ret; + int i = 0, scl = 1, ret = 0; if (bri->prepare_recovery) bri->prepare_recovery(adap); -- cgit v1.2.3 From 459e3a21535ae3c7a9a123650e54f5c882b8fcbf Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 1 May 2019 11:20:53 -0700 Subject: gcc-9: properly declare the {pv,hv}clock_page storage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pvlock_page and hvclock_page variables are (as the name implies) addresses to pages, created by the linker script. But we declared them as just "extern u8" variables, which _works_, but now that gcc does some more bounds checking, it causes warnings like warning: array subscript 1 is outside array bounds of ‘u8[1]’ when we then access more than one byte from those variables. Fix this by simply making the declaration of the variables match reality, which makes the compiler happy too. Signed-off-by: Linus Torvalds --- arch/x86/entry/vdso/vclock_gettime.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 007b3fe9d727..98c7d12b945c 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -29,12 +29,12 @@ extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); extern time_t __vdso_time(time_t *t); #ifdef CONFIG_PARAVIRT_CLOCK -extern u8 pvclock_page +extern u8 pvclock_page[PAGE_SIZE] __attribute__((visibility("hidden"))); #endif #ifdef CONFIG_HYPERV_TSCPAGE -extern u8 hvclock_page +extern u8 hvclock_page[PAGE_SIZE] __attribute__((visibility("hidden"))); #endif -- cgit v1.2.3 From f68d7c44e76532e46f292ad941aa3706cb9e6e40 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 30 Apr 2019 10:46:10 +0800 Subject: selftests: fib_rule_tests: print the result and return 1 if any tests failed Fixes: 65b2b4939a64 ("selftests: net: initial fib rule tests") Signed-off-by: Hangbin Liu Reviewed-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/fib_rule_tests.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index 5b92c1f4dc04..4b7e107865bf 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -27,6 +27,7 @@ log_test() nsuccess=$((nsuccess+1)) printf "\n TEST: %-50s [ OK ]\n" "${msg}" else + ret=1 nfail=$((nfail+1)) printf "\n TEST: %-50s [FAIL]\n" "${msg}" if [ "${PAUSE_ON_FAIL}" = "yes" ]; then @@ -245,4 +246,9 @@ setup run_fibrule_tests cleanup +if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} +fi + exit $ret -- cgit v1.2.3 From 7e74e235bb31a1fefc28d5303da0718b88627ea8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 1 May 2019 12:19:20 -0700 Subject: gcc-9: don't warn about uninitialized btrfs extent_type variable The 'extent_type' variable does seem to be reliably initialized, but it's _very_ non-obvious, since there's a "goto next" case that jumps over the normal initialization. That will then always trigger the "start >= extent_end" test, which will end up never falling through to the use of that variable. But the code is certainly not obvious, and the compiler warning looks reasonable. Make 'extent_type' an int, and initialize it to an invalid negative value, which seems to be the common pattern in other places. Signed-off-by: Linus Torvalds --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 82fdda8ff5ab..2973608824ec 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6783,7 +6783,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, u64 extent_start = 0; u64 extent_end = 0; u64 objectid = btrfs_ino(inode); - u8 extent_type; + int extent_type = -1; struct btrfs_path *path = NULL; struct btrfs_root *root = inode->root; struct btrfs_file_extent_item *item; -- cgit v1.2.3 From 15d2aba7c602cd9005b20ff011b670547b3882c4 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 22 Apr 2019 16:43:30 -0600 Subject: PCI/portdrv: Use shared MSI/MSI-X vector for Bandwidth Management The Interrupt Message Number in the PCIe Capabilities register (PCIe r4.0, sec 7.5.3.2) indicates which MSI/MSI-X vector is shared by interrupts related to the PCIe Capability, including Link Bandwidth Management and Link Autonomous Bandwidth Interrupts (Link Control, 7.5.3.7), Command Completed and Hot-Plug Interrupts (Slot Control, 7.5.3.10), and the PME Interrupt (Root Control, 7.5.3.12). pcie_message_numbers() checked whether we want to enable PME or Hot-Plug interrupts but neglected to check for Link Bandwidth Management, so if we only wanted the Bandwidth Management interrupts, it decided we didn't need any vectors at all. Then pcie_port_enable_irq_vec() tried to reallocate zero vectors, which failed, resulting in fallback to INTx. On some systems, e.g., an X79-based workstation, that INTx seems broken or not handled correctly, so we got spurious IRQ16 interrupts for Bandwidth Management events. Change pcie_message_numbers() so that if we want Link Bandwidth Management interrupts, we use the shared MSI/MSI-X vector from the PCIe Capabilities register. Fixes: e8303bb7a75c ("PCI/LINK: Report degraded links via link bandwidth notification") Link: https://lore.kernel.org/lkml/155597243666.19387.1205950870601742062.stgit@gimli.home Signed-off-by: Alex Williamson [bhelgaas: changelog] Signed-off-by: Bjorn Helgaas --- drivers/pci/pcie/portdrv_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 7d04f9d087a6..1b330129089f 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -55,7 +55,8 @@ static int pcie_message_numbers(struct pci_dev *dev, int mask, * 7.8.2, 7.10.10, 7.31.2. */ - if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP)) { + if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP | + PCIE_PORT_SERVICE_BWNOTIF)) { pcie_capability_read_word(dev, PCI_EXP_FLAGS, ®16); *pme = (reg16 & PCI_EXP_FLAGS_IRQ) >> 9; nvec = *pme + 1; -- cgit v1.2.3 From f3505745c07ff50c22aeca9dde98762d1c8b5898 Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Tue, 30 Apr 2019 05:12:57 +0200 Subject: rds: ib: force endiannes annotation While the endiannes is being handled correctly as indicated by the comment above the offending line - sparse was unhappy with the missing annotation as be64_to_cpu() expects a __be64 argument. To mitigate this annotation all involved variables are changed to a consistent __le64 and the conversion to uint64_t delayed to the call to rds_cong_map_updated(). Signed-off-by: Nicholas Mc Guire Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/ib_recv.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 70559854837e..8946c89d7392 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -772,7 +772,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn, unsigned long frag_off; unsigned long to_copy; unsigned long copied; - uint64_t uncongested = 0; + __le64 uncongested = 0; void *addr; /* catch completely corrupt packets */ @@ -789,7 +789,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn, copied = 0; while (copied < RDS_CONG_MAP_BYTES) { - uint64_t *src, *dst; + __le64 *src, *dst; unsigned int k; to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off); @@ -824,9 +824,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn, } /* the congestion map is in little endian order */ - uncongested = le64_to_cpu(uncongested); - - rds_cong_map_updated(map, uncongested); + rds_cong_map_updated(map, le64_to_cpu(uncongested)); } static void rds_ib_process_recv(struct rds_connection *conn, -- cgit v1.2.3 From 886b7a50100a50f1cbd08a6f8ec5884dfbe082dc Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 30 Apr 2019 10:45:12 -0700 Subject: ipv6: A few fixes on dereferencing rt->from It is a followup after the fix in commit 9c69a1320515 ("route: Avoid crash from dereferencing NULL rt->from") rt6_do_redirect(): 1. NULL checking is needed on rt->from because a parallel fib6_info delete could happen that sets rt->from to NULL. (e.g. rt6_remove_exception() and fib6_drop_pcpu_from()). 2. fib6_info_hold() is not enough. Same reason as (1). Meaning, holding dst->__refcnt cannot ensure rt->from is not NULL or rt->from->fib6_ref is not 0. Instead of using fib6_info_hold_safe() which ip6_rt_cache_alloc() is already doing, this patch chooses to extend the rcu section to keep "from" dereference-able after checking for NULL. inet6_rtm_getroute(): 1. NULL checking is also needed on rt->from for a similar reason. Note that inet6_rtm_getroute() is using RTNL_FLAG_DOIT_UNLOCKED. Fixes: a68886a69180 ("net/ipv6: Make from in rt6_info rcu protected") Signed-off-by: Martin KaFai Lau Acked-by: Wei Wang Reviewed-by: David Ahern Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/route.c | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 2cc166bc978d..0520aca3354b 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3392,11 +3392,8 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu rcu_read_lock(); from = rcu_dereference(rt->from); - /* This fib6_info_hold() is safe here because we hold reference to rt - * and rt already holds reference to fib6_info. - */ - fib6_info_hold(from); - rcu_read_unlock(); + if (!from) + goto out; nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL); if (!nrt) @@ -3408,10 +3405,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; - /* No need to remove rt from the exception table if rt is - * a cached route because rt6_insert_exception() will - * takes care of it - */ + /* rt6_insert_exception() will take care of duplicated exceptions */ if (rt6_insert_exception(nrt, from)) { dst_release_immediate(&nrt->dst); goto out; @@ -3424,7 +3418,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); out: - fib6_info_release(from); + rcu_read_unlock(); neigh_release(neigh); } @@ -5023,16 +5017,20 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, rcu_read_lock(); from = rcu_dereference(rt->from); - - if (fibmatch) - err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif, - RTM_NEWROUTE, NETLINK_CB(in_skb).portid, - nlh->nlmsg_seq, 0); - else - err = rt6_fill_node(net, skb, from, dst, &fl6.daddr, - &fl6.saddr, iif, RTM_NEWROUTE, - NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, - 0); + if (from) { + if (fibmatch) + err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, + iif, RTM_NEWROUTE, + NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, 0); + else + err = rt6_fill_node(net, skb, from, dst, &fl6.daddr, + &fl6.saddr, iif, RTM_NEWROUTE, + NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, 0); + } else { + err = -ENETUNREACH; + } rcu_read_unlock(); if (err < 0) { -- cgit v1.2.3 From 4dd2b82d5adfbe0b1587ccad7a8f76d826120f37 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 1 May 2019 18:56:28 -0700 Subject: udp: fix GRO packet of death syzbot was able to crash host by sending UDP packets with a 0 payload. TCP does not have this issue since we do not aggregate packets without payload. Since dev_gro_receive() sets gso_size based on skb_gro_len(skb) it seems not worth trying to cope with padded packets. BUG: KASAN: slab-out-of-bounds in skb_gro_receive+0xf5f/0x10e0 net/core/skbuff.c:3826 Read of size 16 at addr ffff88808893fff0 by task syz-executor612/7889 CPU: 0 PID: 7889 Comm: syz-executor612 Not tainted 5.1.0-rc7+ #96 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 print_address_description.cold+0x7c/0x20d mm/kasan/report.c:187 kasan_report.cold+0x1b/0x40 mm/kasan/report.c:317 __asan_report_load16_noabort+0x14/0x20 mm/kasan/generic_report.c:133 skb_gro_receive+0xf5f/0x10e0 net/core/skbuff.c:3826 udp_gro_receive_segment net/ipv4/udp_offload.c:382 [inline] call_gro_receive include/linux/netdevice.h:2349 [inline] udp_gro_receive+0xb61/0xfd0 net/ipv4/udp_offload.c:414 udp4_gro_receive+0x763/0xeb0 net/ipv4/udp_offload.c:478 inet_gro_receive+0xe72/0x1110 net/ipv4/af_inet.c:1510 dev_gro_receive+0x1cd0/0x23c0 net/core/dev.c:5581 napi_gro_frags+0x36b/0xd10 net/core/dev.c:5843 tun_get_user+0x2f24/0x3fb0 drivers/net/tun.c:1981 tun_chr_write_iter+0xbd/0x156 drivers/net/tun.c:2027 call_write_iter include/linux/fs.h:1866 [inline] do_iter_readv_writev+0x5e1/0x8e0 fs/read_write.c:681 do_iter_write fs/read_write.c:957 [inline] do_iter_write+0x184/0x610 fs/read_write.c:938 vfs_writev+0x1b3/0x2f0 fs/read_write.c:1002 do_writev+0x15e/0x370 fs/read_write.c:1037 __do_sys_writev fs/read_write.c:1110 [inline] __se_sys_writev fs/read_write.c:1107 [inline] __x64_sys_writev+0x75/0xb0 fs/read_write.c:1107 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x441cc0 Code: 05 48 3d 01 f0 ff ff 0f 83 9d 09 fc ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 83 3d 51 93 29 00 00 75 14 b8 14 00 00 00 0f 05 <48> 3d 01 f0 ff ff 0f 83 74 09 fc ff c3 48 83 ec 08 e8 ba 2b 00 00 RSP: 002b:00007ffe8c716118 EFLAGS: 00000246 ORIG_RAX: 0000000000000014 RAX: ffffffffffffffda RBX: 00007ffe8c716150 RCX: 0000000000441cc0 RDX: 0000000000000001 RSI: 00007ffe8c716170 RDI: 00000000000000f0 RBP: 0000000000000000 R08: 000000000000ffff R09: 0000000000a64668 R10: 0000000020000040 R11: 0000000000000246 R12: 000000000000c2d9 R13: 0000000000402b50 R14: 0000000000000000 R15: 0000000000000000 Allocated by task 5143: save_stack+0x45/0xd0 mm/kasan/common.c:75 set_track mm/kasan/common.c:87 [inline] __kasan_kmalloc mm/kasan/common.c:497 [inline] __kasan_kmalloc.constprop.0+0xcf/0xe0 mm/kasan/common.c:470 kasan_slab_alloc+0xf/0x20 mm/kasan/common.c:505 slab_post_alloc_hook mm/slab.h:437 [inline] slab_alloc mm/slab.c:3393 [inline] kmem_cache_alloc+0x11a/0x6f0 mm/slab.c:3555 mm_alloc+0x1d/0xd0 kernel/fork.c:1030 bprm_mm_init fs/exec.c:363 [inline] __do_execve_file.isra.0+0xaa3/0x23f0 fs/exec.c:1791 do_execveat_common fs/exec.c:1865 [inline] do_execve fs/exec.c:1882 [inline] __do_sys_execve fs/exec.c:1958 [inline] __se_sys_execve fs/exec.c:1953 [inline] __x64_sys_execve+0x8f/0xc0 fs/exec.c:1953 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 5351: save_stack+0x45/0xd0 mm/kasan/common.c:75 set_track mm/kasan/common.c:87 [inline] __kasan_slab_free+0x102/0x150 mm/kasan/common.c:459 kasan_slab_free+0xe/0x10 mm/kasan/common.c:467 __cache_free mm/slab.c:3499 [inline] kmem_cache_free+0x86/0x260 mm/slab.c:3765 __mmdrop+0x238/0x320 kernel/fork.c:677 mmdrop include/linux/sched/mm.h:49 [inline] finish_task_switch+0x47b/0x780 kernel/sched/core.c:2746 context_switch kernel/sched/core.c:2880 [inline] __schedule+0x81b/0x1cc0 kernel/sched/core.c:3518 preempt_schedule_irq+0xb5/0x140 kernel/sched/core.c:3745 retint_kernel+0x1b/0x2d arch_local_irq_restore arch/x86/include/asm/paravirt.h:767 [inline] kmem_cache_free+0xab/0x260 mm/slab.c:3766 anon_vma_chain_free mm/rmap.c:134 [inline] unlink_anon_vmas+0x2ba/0x870 mm/rmap.c:401 free_pgtables+0x1af/0x2f0 mm/memory.c:394 exit_mmap+0x2d1/0x530 mm/mmap.c:3144 __mmput kernel/fork.c:1046 [inline] mmput+0x15f/0x4c0 kernel/fork.c:1067 exec_mmap fs/exec.c:1046 [inline] flush_old_exec+0x8d9/0x1c20 fs/exec.c:1279 load_elf_binary+0x9bc/0x53f0 fs/binfmt_elf.c:864 search_binary_handler fs/exec.c:1656 [inline] search_binary_handler+0x17f/0x570 fs/exec.c:1634 exec_binprm fs/exec.c:1698 [inline] __do_execve_file.isra.0+0x1394/0x23f0 fs/exec.c:1818 do_execveat_common fs/exec.c:1865 [inline] do_execve fs/exec.c:1882 [inline] __do_sys_execve fs/exec.c:1958 [inline] __se_sys_execve fs/exec.c:1953 [inline] __x64_sys_execve+0x8f/0xc0 fs/exec.c:1953 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff88808893f7c0 which belongs to the cache mm_struct of size 1496 The buggy address is located 600 bytes to the right of 1496-byte region [ffff88808893f7c0, ffff88808893fd98) The buggy address belongs to the page: page:ffffea0002224f80 count:1 mapcount:0 mapping:ffff88821bc40ac0 index:0xffff88808893f7c0 compound_mapcount: 0 flags: 0x1fffc0000010200(slab|head) raw: 01fffc0000010200 ffffea00025b4f08 ffffea00027b9d08 ffff88821bc40ac0 raw: ffff88808893f7c0 ffff88808893e440 0000000100000001 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88808893fe80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88808893ff00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88808893ff80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ^ ffff888088940000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff888088940080: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.") Signed-off-by: Eric Dumazet Cc: Paolo Abeni Reported-by: syzbot Signed-off-by: David S. Miller --- net/ipv4/udp_offload.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index d8776b2110c1..065334b41d57 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -352,6 +352,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, struct sk_buff *pp = NULL; struct udphdr *uh2; struct sk_buff *p; + unsigned int ulen; /* requires non zero csum, for symmetry with GSO */ if (!uh->check) { @@ -359,6 +360,12 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, return NULL; } + /* Do not deal with padded or malicious packets, sorry ! */ + ulen = ntohs(uh->len); + if (ulen <= sizeof(*uh) || ulen != skb_gro_len(skb)) { + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + } /* pull encapsulating udp header */ skb_gro_pull(skb, sizeof(struct udphdr)); skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr)); @@ -377,12 +384,12 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, /* Terminate the flow on len mismatch or if it grow "too much". * Under small packet flood GRO count could elsewhere grow a lot - * leading to execessive truesize values. + * leading to excessive truesize values. * On len mismatch merge the first packet shorter than gso_size, * otherwise complete the GRO packet. */ - if (uh->len > uh2->len || skb_gro_receive(p, skb) || - uh->len != uh2->len || + if (ulen > ntohs(uh2->len) || skb_gro_receive(p, skb) || + ulen != ntohs(uh2->len) || NAPI_GRO_CB(p)->count >= UDP_GRO_CNT_MAX) pp = p; -- cgit v1.2.3 From 2078e1e7f7e0e21bd0291908f3037c39e666d27b Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 1 May 2019 08:29:42 -0600 Subject: PCI/LINK: Add Kconfig option (default off) e8303bb7a75c ("PCI/LINK: Report degraded links via link bandwidth notification") added dmesg logging whenever a link changes speed or width to a state that is considered degraded. Unfortunately, it cannot differentiate signal integrity-related link changes from those intentionally initiated by an endpoint driver, including drivers that may live in userspace or VMs when making use of vfio-pci. Some GPU drivers actively manage the link state to save power, which generates a stream of messages like this: vfio-pci 0000:07:00.0: 32.000 Gb/s available PCIe bandwidth, limited by 2.5 GT/s x16 link at 0000:00:02.0 (capable of 64.000 Gb/s with 5 GT/s x16 link) Since we can't distinguish the intentional changes from the signal integrity issues, leave the reporting turned off by default. Add a Kconfig option to turn it on if desired. Fixes: e8303bb7a75c ("PCI/LINK: Report degraded links via link bandwidth notification") Link: https://lore.kernel.org/linux-pci/20190501142942.26972-1-keith.busch@intel.com Signed-off-by: Keith Busch Signed-off-by: Bjorn Helgaas --- drivers/pci/pcie/Kconfig | 8 ++++++++ drivers/pci/pcie/Makefile | 2 +- drivers/pci/pcie/portdrv.h | 4 ++++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig index 5cbdbca904ac..362eb8cfa53b 100644 --- a/drivers/pci/pcie/Kconfig +++ b/drivers/pci/pcie/Kconfig @@ -142,3 +142,11 @@ config PCIE_PTM This is only useful if you have devices that support PTM, but it is safe to enable even if you don't. + +config PCIE_BW + bool "PCI Express Bandwidth Change Notification" + depends on PCIEPORTBUS + help + This enables PCI Express Bandwidth Change Notification. If + you know link width or rate changes occur only to correct + unreliable links, you may answer Y. diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile index f1d7bc1e5efa..efb9d2e71e9e 100644 --- a/drivers/pci/pcie/Makefile +++ b/drivers/pci/pcie/Makefile @@ -3,7 +3,6 @@ # Makefile for PCI Express features and port driver pcieportdrv-y := portdrv_core.o portdrv_pci.o err.o -pcieportdrv-y += bw_notification.o obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o @@ -13,3 +12,4 @@ obj-$(CONFIG_PCIEAER_INJECT) += aer_inject.o obj-$(CONFIG_PCIE_PME) += pme.o obj-$(CONFIG_PCIE_DPC) += dpc.o obj-$(CONFIG_PCIE_PTM) += ptm.o +obj-$(CONFIG_PCIE_BW) += bw_notification.o diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index 1d50dc58ac40..944827a8c7d3 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -49,7 +49,11 @@ int pcie_dpc_init(void); static inline int pcie_dpc_init(void) { return 0; } #endif +#ifdef CONFIG_PCIE_BW int pcie_bandwidth_notification_init(void); +#else +static inline int pcie_bandwidth_notification_init(void) { return 0; } +#endif /* Port Type */ #define PCIE_ANY_PORT (~0) -- cgit v1.2.3